I need to reorganize a large dataset into a specific format for further analysis. Right now the data are in long format, with multiple records through time for each point. I need to reshape the data so that each point has a single record, but it will add many new columns of the time-specific data. I’ve looked at previous similar posts but I need to ultimately convert several of the current variables into columns, and I can’t find an example of such. Is there a way to accomplish this in a single reshape, or will I have to do several and then concatenate the new columns back together? Another wrinkle before I post the example is that not all points were sampled at each time-step, so I need those values to show up as NA. For example, (see data below) SitePoint A1 was not sampled at all in 2012, SitePoint A10 was not sampled during the first round in 2012, but K83 was sampled all nine times.
mydatain <- structure(list(SitePoint = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L), .Label = c("A1", "A10", "K145", "K83", "T15",
"T213"), class = "factor"), Year_Rotation = structure(c(1L, 2L,
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 1L, 2L, 4L, 5L,
6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 1L, 7L), .Label = c("2010_1", "2010_2",
"2010_3", "2011_1", "2011_2", "2011_3", "2012_1", "2012_2", "2012_3"
), class = "factor"), MR_Fire = structure(c(5L, 6L, 6L, 2L, 9L,
9L, 5L, 6L, 6L, 2L, 9L, 9L, 7L, 8L, 16L, 17L, 21L, 22L, 23L,
25L, 3L, 4L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 1L,
2L, 2L, 5L, 6L, 6L, 11L, 11L, 12L, 7L, 24L), .Label = c("0",
"1", "10", "11", "12", "13", "14", "15", "2", "23", "24", "25",
"35", "36", "37", "39", "40", "47", "48", "49", "51", "52", "53",
"8", "9"), class = "factor"), fire_seas = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 3L), .Label = c("dry", "fire", "wet"
), class = "factor"), OptTSF = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L,
0L, 1L, 1L)), .Names = c("SitePoint", "Year_Rotation", "MR_Fire",
"fire_seas", "OptTSF"), row.names = c(31L, 32L, 33L, 34L, 35L,
36L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 10543L, 10544L,
10545L, 10546L, 10547L, 10548L, 10549L, 10550L, 14988L, 14989L,
14990L, 14991L, 14992L, 14993L, 14994L, 14995L, 14996L, 17370L,
17371L, 17372L, 17373L, 17374L, 17375L, 17376L, 17377L, 17378L,
19353L, 19354L), class = "data.frame")
Ultimately I need something like this:
myfinal <- structure(list(SitePoint = structure(1:6, .Label = c("A1", "A10",
"K145", "K83", "T15", "T213"), class = "factor"), MR_Fire_2010_1 = c(12L,
12L, 39L, 23L, 0L, 14L), MR_Fire_2010_2 = c(13L, 13L, 40L, 24L,
1L, NA), MR_Fire_2010_3 = c(13L, 13L, NA, 25L, 1L, NA), MR_Fire_2011_1 = c(1L,
1L, 51L, 35L, 12L, NA), MR_Fire_2011_2 = c(2L, 2L, 52L, 36L,
13L, NA), MR_Fire_2011_3 = c(2L, 2L, 53L, 37L, 13L, NA), MR_Fire_2012_1 = c(NA,
NA, 9L, 47L, 24L, 8L), MR_Fire_2012_2 = c(NA, 14L, 10L, 48L,
24L, NA), MR_Fire_2012_3 = c(NA, 15L, 11L, 49L, 25L, NA), season_2010_1 = structure(c(2L,
2L, 1L, 2L, 2L, 1L), .Label = c("dry", "fire"), class = "factor"),
season_2010_2 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2010_3 = structure(c(1L,
1L, NA, 1L, 1L, NA), .Label = "fire", class = "factor"),
season_2011_1 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2011_2 = structure(c(2L,
2L, 1L, 2L, 2L, NA), .Label = c("dry", "fire"), class = "factor"),
season_2011_3 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry",
"fire"), class = "factor"), season_2012_1 = structure(c(NA,
NA, 2L, 1L, 1L, 2L), .Label = c("fire", "wet"), class = "factor"),
season_2012_2 = structure(c(NA, 1L, 2L, 1L, 1L, NA), .Label = c("fire",
"wet"), class = "factor"), season_2012_3 = structure(c(NA,
1L, 2L, 1L, 1L, NA), .Label = c("fire", "wet"), class = "factor"),
OptTSF_2010_1 = c(1L, 1L, 0L, 1L, 1L, 1L), OptTSF_2010_2 = c(1L,
1L, 0L, 1L, 1L, NA), OptTSF_2010_3 = c(1L, 1L, NA, 1L, 1L,
NA), OptTSF_2011_1 = c(1L, 1L, 0L, 0L, 1L, NA), OptTSF_2011_2 = c(1L,
1L, 0L, 0L, 1L, NA), OptTSF_2011_3 = c(1L, 1L, 0L, 0L, 1L,
NA), OptTSF_2012_1 = c(NA, NA, 1L, 0L, 0L, 1L), OptTSF_2012_2 = c(NA,
1L, 1L, 0L, 0L, NA), OptTSF_2012_3 = c(NA, 1L, 1L, 0L, 0L,
NA)), .Names = c("SitePoint", "MR_Fire_2010_1", "MR_Fire_2010_2",
"MR_Fire_2010_3", "MR_Fire_2011_1", "MR_Fire_2011_2", "MR_Fire_2011_3",
"MR_Fire_2012_1", "MR_Fire_2012_2", "MR_Fire_2012_3", "season_2010_1",
"season_2010_2", "season_2010_3", "season_2011_1", "season_2011_2",
"season_2011_3", "season_2012_1", "season_2012_2", "season_2012_3",
"OptTSF_2010_1", "OptTSF_2010_2", "OptTSF_2010_3", "OptTSF_2011_1",
"OptTSF_2011_2", "OptTSF_2011_3", "OptTSF_2012_1", "OptTSF_2012_2",
"OptTSF_2012_3"), class = "data.frame", row.names = c(NA, -6L
))
The actual dataset is about 23656 records X 15 variables, so doing it by hand is likely to cause major headaches and potential for mistakes. Any help or suggestions are appreciated. If this has been answered elsewhere, apologies. I couldn’t find anything directly applicable; everything seemed to related to three columns and only one of those being extracted as new variables. Thanks.
SP