I've a data.frame given below. I am trying to move it from long format to wide format. Using the spreading column being dates. using spread function from the tidyr
package presents two fold problem:
- The data is filled with NA
- The months get ordered by alphabetic order
So how do I go from
30-Apr-2015 632.95
28-May-2015 532.95
25-Jun-2015 232.95
to
30-Apr-2015 28-May-2015 25-Jun-2015
632.95 532.95 232.95
instead I end up at
30-Apr-2015 25-Jun-2015 28-May-2015
632.95 NA 232.95
NA 232.95 NA
NA NA 532.95
Actual dates don't matter, but their relative ordering matter, i.e. the nearest month data should go to first column, followed by the other two month data, in successive order. This is necessary because I'm using rbind
on the result
The code I've tried
data = tidyr::spread(data, key = EXPIRY_DT, value = CHG_IN_OI)
colnames(data)[3:5] = c('Month1', 'Month2', 'Month3')
The data.frame is as given below:
data = structure(list(SYMBOL = c("A", "A", "A", "B", "B", "B", "C",
"C", "C", "D", "D", "D"), EXPIRY_DT = c("30-Apr-2015", "28-May-2015",
"25-Jun-2015", "30-Apr-2015", "28-May-2015", "25-Jun-2015", "30-Apr-2015",
"28-May-2015", "25-Jun-2015", "30-Apr-2015", "28-May-2015", "25-Jun-2015"
), OPEN = c(1750, 1789, 0, 1627.5, 1653.3, 0, 632.95, 644.1,
0, 317.8, 319.5, 0), HIGH = c(1788.05, 1795, 0, 1656.5, 1653.3,
0, 646.4, 650.5, 0, 324.6, 326.65, 0), LOW = c(1746, 1760, 0,
1627.5, 1645.45, 0, 629.65, 635, 0, 315.85, 318.4, 0), CLOSE = c(1782.3,
1791.85, 1695.1, 1642.95, 1646.75, 1613.9, 640.85, 644.35, 614.6,
320.55, 322.35, 310.85), SETTLE_PR = c(1782.3, 1791.85, 1804.8,
1642.95, 1653.85, 1664.35, 640.85, 644.35, 649.1, 320.55, 322.35,
325.35), CONTRACTS = c(1469L, 78L, 0L, 2638L, 14L, 0L, 4964L,
181L, 0L, 3416L, 82L, 0L), VALUE = c(6496.96, 347.91, 0, 10830.05,
57.68, 0, 15869.41, 583.38, 0, 10969.31, 264.93, 0), OPEN_INT = c(1353750L,
8500L, 0L, 1377250L, 17000L, 0L, 6264000L, 98000L, 0L, 8228000L,
216000L, 0L), CHG_IN_OI = c(15250L, 1250L, 0L, -21000L, 1500L,
0L, 73500L, 6000L, 0L, -192000L, 13000L, 0L), TIMESTAMP = c("10-APR-2015",
"10-APR-2015", "10-APR-2015", "10-APR-2015", "10-APR-2015", "10-APR-2015",
"10-APR-2015", "10-APR-2015", "10-APR-2015", "10-APR-2015", "10-APR-2015",
"10-APR-2015")), .Names = c("SYMBOL", "EXPIRY_DT", "OPEN", "HIGH",
"LOW", "CLOSE", "SETTLE_PR", "CONTRACTS", "VALUE", "OPEN_INT",
"CHG_IN_OI", "TIMESTAMP"), row.names = 40:51, class = "data.frame")
Thanks for reading.
Edit:
After comments from @akrun adding the expected output. Because the values for each dates are different, i.e. would need the data for each month placed one after another, with the column names are being appended with the string 'Month1/2/3' instead of the actual date. Hope that helps.
output = structure(list(SYMBOL = c("A", "B", "C", "D"), TIMESTAMP = c("10-Apr-15",
"10-Apr-15", "10-Apr-15", "10-Apr-15"), OPEN.Month1 = c(1750,
1627.5, 632.95, 317.8), HIGH.Month1 = c(1788.05, 1656.5, 646.4,
324.6), LOW.Month1 = c(1746, 1627.5, 629.65, 315.85), CLOSE.Month1 = c(1782.3,
1642.95, 640.85, 320.55), SETTLE_PR.Month1 = c(1782.3, 1642.95,
640.85, 320.55), CONTRACTS.Month1 = c(1469L, 2638L, 4964L, 3416L
), VALUE.Month1 = c(6496.96, 10830.05, 15869.41, 10969.31), OPEN_INT.Month1 = c(1353750L,
1377250L, 6264000L, 8228000L), CHG_IN_OI.Month1 = c(15250L, -21000L,
73500L, -192000L), OPEN.Month2 = c(1789, 1653.3, 644.1, 319.5
), HIGH.Month2 = c(1795, 1653.3, 650.5, 326.65), LOW.Month2 = c(1760,
1645.45, 635, 318.4), CLOSE.Month2 = c(1791.85, 1646.75, 644.35,
322.35), SETTLE_PR.Month2 = c(1791.85, 1653.85, 644.35, 322.35
), CONTRACTS.Month2 = c(78L, 14L, 181L, 82L), VALUE.Month2 = c(347.91,
57.68, 583.38, 264.93), OPEN_INT.Month2 = c(8500L, 17000L, 98000L,
216000L), CHG_IN_OI.Month2 = c(1250L, 1500L, 6000L, 13000L),
OPEN.Month3 = c(0L, 0L, 0L, 0L), HIGH.Month3 = c(0L, 0L,
0L, 0L), LOW.Month3 = c(0L, 0L, 0L, 0L), CLOSE.Month3 = c(1695.1,
1613.9, 614.6, 310.85), SETTLE_PR.Month3 = c(1804.8, 1664.35,
649.1, 325.35), CONTRACTS.Month3 = c(0L, 0L, 0L, 0L), VALUE.Month3 = c(0L,
0L, 0L, 0L), OPEN_INT.Month3 = c(0L, 0L, 0L, 0L), CHG_IN_OI.Month3 = c(0L,
0L, 0L, 0L)), .Names = c("SYMBOL", "TIMESTAMP", "OPEN.Month1",
"HIGH.Month1", "LOW.Month1", "CLOSE.Month1", "SETTLE_PR.Month1",
"CONTRACTS.Month1", "VALUE.Month1", "OPEN_INT.Month1", "CHG_IN_OI.Month1",
"OPEN.Month2", "HIGH.Month2", "LOW.Month2", "CLOSE.Month2", "SETTLE_PR.Month2",
"CONTRACTS.Month2", "VALUE.Month2", "OPEN_INT.Month2", "CHG_IN_OI.Month2",
"OPEN.Month3", "HIGH.Month3", "LOW.Month3", "CLOSE.Month3", "SETTLE_PR.Month3",
"CONTRACTS.Month3", "VALUE.Month3", "OPEN_INT.Month3", "CHG_IN_OI.Month3"
), class = "data.frame", row.names = c(NA, -4L))