0

I have a dataset like this (df1)

ID  2   4   6   8   10  12  14  16  18  20  22  24   Day
1   0   0   0   0   2   0   0   0   1   0   1   0    Sunday
1   0   0   0   0   0   4   0   0   0   0   0   0   Monday
1   0   0   0   0   0   0   0   0   2   0   0   0   Tuesday
1   0   0   0   0   0   0   2   0   0   0   0   0   Wednesday
1   0   0   0   0   0   0   0   2   0   0   0   0   Thursday
1   0   0   0   0   0   0   0   0   2   0   0   0   Friday
1   0   0   0   0   0   0   0   0   0   2   0   0   Saturday
2   0   0   0   0   0   0   0   0   0   0   0   0   Sunday
2   0   0   0   0   0   1   0   0   0   0   0   0   Monday
2   0   0   0   0   0   0   1   0   0   0   1   0   Tuesday
2   0   0   0   0   0   0   0   1   0   0   0   0   Wednesday
2   0   0   0   0   0   0   0   0   1   0   0   0   Thursday
2   0   0   0   0   0   2   0   0   0   1   0   0   Friday
2   0   0   0   0   0   0   0   0   0   0   0   0   Saturday
3   0   0   0   0   0   0   0   0   0   0   0   0   Sunday
3   0   0   0   0   0   0   2   0   0   0   0   0   Monday
3   0   0   0   0   0   1   0   0   2   0   0   0   Tuesday
3   0   0   0   0   0   0   0   0   0   0   0   0   Wednesday
3   0   0   0   0   0   0   0   2   0   0   0   0   Thursday
3   0   0   0   0   0   0   0   0   0   0   0   0   Friday
3   0   0   0   0   0   0   2   0   0   0   0   0   Saturday
3   0   0   0   0   0   0   0   2   0   0   0   0   Sunday

and I have an ID checklist like this:

ID
1
2
3

I want to convert the df1 into this kind of output:

ID  Var1    Var2    Var3    Var4    Var5 ...... Var82   Var83 Var84
1   0         0      0         0     2             2      0     0
2
3

where Var1 represents 'Sunday 2' (in the first dataframe) and var84 represents the 'Saturday24'. I want to export my result as a .csv file.

I am doing this by using a for loop (shown below) because there are too many ID's. However, the problem is these codes are running very slowly. Is there any faster way to get the same result?

library(dplyr)
library(reshape2)
for (i in ID_checklist$ID) {

  x= filter(df1$ID %in% i)
  x$Day = NULL
  df.melted = melt(t(x[,-1]), id.vars = NULL)
  myNewDF = data.frame(i, t(df.melted[,3]))
  write.table(myNewDF,file="my12x7.csv", append=TRUE,sep=",",col.names=FALSE,row.names=FALSE)
}
Matt Sandgren
  • 476
  • 1
  • 4
  • 10
Meixu Chen
  • 61
  • 4

1 Answers1

0

I think this is what you want:

library(reshape2)

# this may be unnecessary depending on your data
# it will make sure the weekday columns come in the same order
# as the weekdays appear in your original data
df1$Day = factor(df1$Day, levels = unique(df1$Day))

# convert to a fully long format
df_long = melt(df1, id.var = c("ID", "Day"))

# convert to the wide format you want
result = dcast(data = df_long, ID ~ Day + variable, fun.aggregate = sum)

This will append the day names with the current variable. If you'd rather have them as Var1 Var2 Var3, use paste() and rename the columns.

We can look at the first few columns to verify:

result[, 1:6]
#   ID Sunday_X2 Sunday_X4 Sunday_X6 Sunday_X8 Sunday_X10
# 1  1         0         0         0         0          2
# 2  2         0         0         0         0          0
# 3  3         0         0         0         0          0
Gregor Thomas
  • 136,190
  • 20
  • 167
  • 294