1

I need to reorganize a large dataset into a specific format for further analysis. Right now the data are in long format, with multiple records through time for each point. I need to reshape the data so that each point has a single record, but it will add many new columns of the time-specific data. I’ve looked at previous similar posts but I need to ultimately convert several of the current variables into columns, and I can’t find an example of such. Is there a way to accomplish this in a single reshape, or will I have to do several and then concatenate the new columns back together? Another wrinkle before I post the example is that not all points were sampled at each time-step, so I need those values to show up as NA. For example, (see data below) SitePoint A1 was not sampled at all in 2012, SitePoint A10 was not sampled during the first round in 2012, but K83 was sampled all nine times.

mydatain <- structure(list(SitePoint = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 6L, 6L), .Label = c("A1", "A10", "K145", "K83", "T15", 
"T213"), class = "factor"), Year_Rotation = structure(c(1L, 2L, 
3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 1L, 2L, 4L, 5L, 
6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 1L, 7L), .Label = c("2010_1", "2010_2", 
"2010_3", "2011_1", "2011_2", "2011_3", "2012_1", "2012_2", "2012_3"
), class = "factor"), MR_Fire = structure(c(5L, 6L, 6L, 2L, 9L, 
9L, 5L, 6L, 6L, 2L, 9L, 9L, 7L, 8L, 16L, 17L, 21L, 22L, 23L, 
25L, 3L, 4L, 10L, 11L, 12L, 13L, 14L, 15L, 18L, 19L, 20L, 1L, 
2L, 2L, 5L, 6L, 6L, 11L, 11L, 12L, 7L, 24L), .Label = c("0", 
"1", "10", "11", "12", "13", "14", "15", "2", "23", "24", "25", 
"35", "36", "37", "39", "40", "47", "48", "49", "51", "52", "53", 
"8", "9"), class = "factor"), fire_seas = structure(c(2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
1L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 1L, 3L), .Label = c("dry", "fire", "wet"
), class = "factor"), OptTSF = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 
1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 
0L, 1L, 1L)), .Names = c("SitePoint", "Year_Rotation", "MR_Fire", 
"fire_seas", "OptTSF"), row.names = c(31L, 32L, 33L, 34L, 35L, 
36L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 10543L, 10544L, 
10545L, 10546L, 10547L, 10548L, 10549L, 10550L, 14988L, 14989L, 
14990L, 14991L, 14992L, 14993L, 14994L, 14995L, 14996L, 17370L, 
17371L, 17372L, 17373L, 17374L, 17375L, 17376L, 17377L, 17378L, 
19353L, 19354L), class = "data.frame")

Ultimately I need something like this:

myfinal <- structure(list(SitePoint = structure(1:6, .Label = c("A1", "A10", 
"K145", "K83", "T15", "T213"), class = "factor"), MR_Fire_2010_1 = c(12L, 
12L, 39L, 23L, 0L, 14L), MR_Fire_2010_2 = c(13L, 13L, 40L, 24L, 
1L, NA), MR_Fire_2010_3 = c(13L, 13L, NA, 25L, 1L, NA), MR_Fire_2011_1 = c(1L, 
1L, 51L, 35L, 12L, NA), MR_Fire_2011_2 = c(2L, 2L, 52L, 36L, 
13L, NA), MR_Fire_2011_3 = c(2L, 2L, 53L, 37L, 13L, NA), MR_Fire_2012_1 = c(NA, 
NA, 9L, 47L, 24L, 8L), MR_Fire_2012_2 = c(NA, 14L, 10L, 48L, 
24L, NA), MR_Fire_2012_3 = c(NA, 15L, 11L, 49L, 25L, NA), season_2010_1 = structure(c(2L, 
2L, 1L, 2L, 2L, 1L), .Label = c("dry", "fire"), class = "factor"), 
    season_2010_2 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry", 
    "fire"), class = "factor"), season_2010_3 = structure(c(1L, 
    1L, NA, 1L, 1L, NA), .Label = "fire", class = "factor"), 
    season_2011_1 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry", 
    "fire"), class = "factor"), season_2011_2 = structure(c(2L, 
    2L, 1L, 2L, 2L, NA), .Label = c("dry", "fire"), class = "factor"), 
    season_2011_3 = structure(c(2L, 2L, 1L, 2L, 2L, NA), .Label = c("dry", 
    "fire"), class = "factor"), season_2012_1 = structure(c(NA, 
    NA, 2L, 1L, 1L, 2L), .Label = c("fire", "wet"), class = "factor"), 
    season_2012_2 = structure(c(NA, 1L, 2L, 1L, 1L, NA), .Label = c("fire", 
    "wet"), class = "factor"), season_2012_3 = structure(c(NA, 
    1L, 2L, 1L, 1L, NA), .Label = c("fire", "wet"), class = "factor"), 
    OptTSF_2010_1 = c(1L, 1L, 0L, 1L, 1L, 1L), OptTSF_2010_2 = c(1L, 
    1L, 0L, 1L, 1L, NA), OptTSF_2010_3 = c(1L, 1L, NA, 1L, 1L, 
    NA), OptTSF_2011_1 = c(1L, 1L, 0L, 0L, 1L, NA), OptTSF_2011_2 = c(1L, 
    1L, 0L, 0L, 1L, NA), OptTSF_2011_3 = c(1L, 1L, 0L, 0L, 1L, 
    NA), OptTSF_2012_1 = c(NA, NA, 1L, 0L, 0L, 1L), OptTSF_2012_2 = c(NA, 
    1L, 1L, 0L, 0L, NA), OptTSF_2012_3 = c(NA, 1L, 1L, 0L, 0L, 
    NA)), .Names = c("SitePoint", "MR_Fire_2010_1", "MR_Fire_2010_2", 
"MR_Fire_2010_3", "MR_Fire_2011_1", "MR_Fire_2011_2", "MR_Fire_2011_3", 
"MR_Fire_2012_1", "MR_Fire_2012_2", "MR_Fire_2012_3", "season_2010_1", 
"season_2010_2", "season_2010_3", "season_2011_1", "season_2011_2", 
"season_2011_3", "season_2012_1", "season_2012_2", "season_2012_3", 
"OptTSF_2010_1", "OptTSF_2010_2", "OptTSF_2010_3", "OptTSF_2011_1", 
"OptTSF_2011_2", "OptTSF_2011_3", "OptTSF_2012_1", "OptTSF_2012_2", 
"OptTSF_2012_3"), class = "data.frame", row.names = c(NA, -6L
))

The actual dataset is about 23656 records X 15 variables, so doing it by hand is likely to cause major headaches and potential for mistakes. Any help or suggestions are appreciated. If this has been answered elsewhere, apologies. I couldn’t find anything directly applicable; everything seemed to related to three columns and only one of those being extracted as new variables. Thanks.

SP

A.Birdman
  • 161
  • 1
  • 2
  • 12

2 Answers2

2

dcast from the devel version of data.table i.e., v1.9.5 can cast multiple columns simultaneously. It can be installed from here.

library(data.table) ## v1.9.5+
dcast(setDT(mydatain), SitePoint~Year_Rotation,
         value.var=c('MR_Fire', 'fire_seas', 'OptTSF'))
Arun
  • 116,683
  • 26
  • 284
  • 387
akrun
  • 874,273
  • 37
  • 540
  • 662
  • 1
    This looks like just the thing I was hoping for. I'm downloading/updating now to try it out. Thanks much. – A.Birdman Mar 31 '15 at 20:27
  • @A.Birdman Thanks for you comments. I hope it works for you. – akrun Mar 31 '15 at 20:32
  • I finally got everything updated (needed to happen anyway) and installed v1.9.5 of data.table and it works like a charm with the limited data set I uploaded. Thanks again as you've made my life immeasurably more pleasant! – A.Birdman Apr 01 '15 at 15:30
  • @A.Birdman Thank you for notifying me. Also, check the `melt` in data.table (doing the opposite) which can do the same thing with multiple columns – akrun Apr 01 '15 at 15:31
1

You can use reshape to change the structure of your dataframe from long to wide using the following code:

reshape(mydatain,timevar="Year_Rotation",idvar="SitePoint",direction="wide")
User7598
  • 1,658
  • 1
  • 15
  • 28