I have a data frame in R like this:
Company | Date | Revenues
c1 | d1 | r1
c1 | d2 | r2
c1 | d3 | r3
c2 | d1 | r4
c2 | d2 | r5
c2 | d3 | r6
c3 | d2 | r7
c3 | d3 | r8
And I want to change the format to this:
Company|Date|Revs|YOY|Company|Date|Revs|YOY|Company|Date|Revenues|YOY
c1 |d1 |r1 |y1 | c2 |d1 |r4 |y4 | c3 |NA |NA |NA
c1 |d2 |r2 |y2 | c2 |d2 |r5 |y5 | c3 |d2 |r7 |y7
c1 |d3 |r3 |y3 | c2 |d3 |r6 |y6 | c3 |d3 |r8 |y8
That is, I want to change the data frame so that each set of company data is grouped and set next to each other, and the year-over-year differences in revenues are calculated and inserted into a new column after the Revenues column.
The observations are each quarter/month, but some data is missing, so I'd like to insert NA into those fields that are missing, and line up the dates as indicated in the second table.
I'd include some code, but after much searching, I still don't know how to even approach this. I've looked at reshape, but that doesn't seem to do what I need to. I was thinking perhaps a loop is the right way to approach this?
Here is a set of sample input data from dput, that includes some missing values:
structure(list(X = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 7L, 54L, 55L,
56L, 57L, 58L, 59L, 60L, 61L, 82L, 83L, 84L, 85L, 86L, 87L, 88L,
89L, 596L, 597L, 598L, 599L, 600L, 601L), Company = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Company 1",
"Company 2", "Company 3", "Company 4"), class = "factor"), Date = structure(c(8L,
7L, 6L, 5L, 4L, 3L, 2L, 1L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, 8L,
7L, 6L, 5L, 4L, 3L, 2L, 1L, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("2014-Q1",
"2014-Q2", "2014-Q3", "2014-Q4", "2015-Q1", "2015-Q2", "2015-Q3",
"2015-Q4"), class = "factor"), Revenue = structure(c(16L, 11L,
12L, 7L, 10L, 14L, 9L, 8L, 15L, 2L, 4L, 1L, 30L, 6L, 3L, 5L,
13L, 26L, 27L, 21L, 23L, 24L, 25L, 22L, 29L, 19L, 20L, 17L, 28L,
18L), .Label = c("$1,009,306", "$1,129,899", "$1,173,698", "$1,290,262",
"$1,329,210", "$1,338,107", "$1,342,401", "$1,455,988", "$1,632,968",
"$1,697,235", "$1,716,398", "$1,756,648", "$17,215,900", "$2,405,874",
"$216,536", "$422,063", "$47,665,398", "$52,772,667", "$53,941,124",
"$54,059,612", "$54,548,057", "$54,946,768", "$55,735,568", "$58,099,615",
"$59,753,619", "$59,955,413", "$60,655,988", "$66,236,339", "$79,135,033",
"$962,366"), class = "factor")), .Names = c("X", "Company", "Date",
"Revenue"), class = "data.frame", row.names = c(NA, -30L))
and here is what I'd like the data to look like after transforming:
structure(list(Company = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "Company 1", class = "factor"), Date = structure(c(8L,
7L, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("2014-Q1", "2014-Q2",
"2014-Q3", "2014-Q4", "2015-Q1", "2015-Q2", "2015-Q3", "2015-Q4"
), class = "factor"), Revenue = structure(c(8L, 5L, 6L, 1L, 4L,
7L, 3L, 2L), .Label = c("$134", "$146", "$163", "$170", "$172",
"$176", "$241", "$42"), class = "factor"), YOY = structure(c(2L,
1L, 4L, 3L, NA, NA, NA, NA), .Label = c("-29%", "-75%", "-8%",
"8%"), class = "factor"), Company.1 = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), .Label = "Company 2", class = "factor"),
Date.1 = structure(c(8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("2014-Q1",
"2014-Q2", "2014-Q3", "2014-Q4", "2015-Q1", "2015-Q2", "2015-Q3",
"2015-Q4"), class = "factor"), Revenue.1 = structure(c(7L,
2L, 4L, 1L, 8L, 6L, 3L, 5L), .Label = c("$101", "$113", "$117",
"$129", "$133", "$134", "$22", "$96"), class = "factor"),
YOY.1 = structure(c(3L, 1L, 4L, 2L, NA, NA, NA, NA), .Label = c("-16%",
"-24%", "-77%", "10%"), class = "factor"), Company.2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Company 3", class = "factor"),
Date.2 = structure(c(8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("2014-Q1",
"2014-Q2", "2014-Q3", "2014-Q4", "2015-Q1", "2015-Q2", "2015-Q3",
"2015-Q4"), class = "factor"), Revenue.2 = structure(c(1L,
7L, 8L, 2L, 4L, 5L, 6L, 3L), .Label = c("$1,722", "$5,455",
"$5,495", "$5,574", "$5,810", "$5,975", "$5,996", "$6,066"
), class = "factor"), YOY.2 = structure(c(2L, 4L, 3L, 1L,
NA, NA, NA, NA), .Label = c("-1%", "-69%", "2%", "3%"), class = "factor"),
Company.3 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Company 4", class = "factor"),
Date.3 = structure(c(NA, NA, 6L, 5L, 4L, 3L, 2L, 1L), .Label = c("2014-Q1",
"2014-Q2", "2014-Q3", "2014-Q4", "2015-Q1", "2015-Q2"), class = "factor"),
Revenue.3 = structure(c(NA, NA, 6L, 3L, 4L, 1L, 5L, 2L), .Label = c("$4,767",
"$5,277", "$5,394", "$5,406", "$6,624", "$7,914"), class = "factor"),
YOY.3 = structure(c(NA, NA, 1L, 2L, NA, NA, NA, NA), .Label = c("19%",
"2%"), class = "factor"), X = c(NA, NA, NA, NA, NA, NA, NA,
NA)), .Names = c("Company", "Date", "Revenue", "YOY", "Company.1",
"Date.1", "Revenue.1", "YOY.1", "Company.2", "Date.2", "Revenue.2",
"YOY.2", "Company.3", "Date.3", "Revenue.3", "YOY.3", "X"), class = "data.frame", row.names = c(NA,
-8L))
Thanks!