0

I'm trying to reshape my datatable in R.

I've tried using the melt function, but I can't seem to get it into the format I need.

This is my input:

structure(list(Name = c("Fred", "Peter"), first.sale = c("3/01/2019", 
"10/08/2018"), first.result = c(352L, 209L), second.sale = c("5/12/2018", 
"20/06/2018"), second.result = c(953L, 987L), third.sale = c("2/10/2018", 
"21/02/2018"), third.result = c(965L, 618L), fourth.sale = c("29/08/2018", 
"16/07/2018"), fourth.result = c(125L, 902L), fifth.sale = c("26/04/2018", 
"5/07/2018"), fifth.result = c(264L, 71L)), .Names = c("Name", 
"first.sale", "first.result", "second.sale", "second.result", 
"third.sale", "third.result", "fourth.sale", "fourth.result", 
"fifth.sale", "fifth.result"), row.names = c(NA, -2L), class = c("data.table", 
"data.frame"))

and this is how i'd like my output

structure(list(Name = c("Fred", "Fred", "Fred", "Fred", "Fred", 
"Peter", "Peter", "Peter", "Peter", "Peter", "Frank", "Frank"
), Sale = c("first.sale", "second.sale", "third.sale", "fourth.sale", 
"fifth.sale", "first.sale", "second.sale", "third.sale", "fourth.sale", 
"fifth.sale", "first.sale", "second.sale"), Result = c(352L, 
953L, 965L, 125L, 264L, 209L, 987L, 618L, 902L, 71L, 848L, 410L
), SaleDate = c("3/01/2019", "5/12/2018", "2/10/2018", "29/08/2018", 
"26/04/2018", "10/08/2018", "20/06/2018", "21/02/2018", "16/07/2018", 
"5/07/2018", "10/08/2018", "5/12/2018")), .Names = c("Name", 
"Sale", "Result", "SaleDate"), class = "data.frame", row.names = c(NA, 
-12L))

But this is what I get when I try and use melt

structure(list(Name = c("Fred", "Peter", "Fred", "Peter", "Fred", 
"Peter", "Fred", "Peter", "Fred", "Peter"), first.sale = c("3/01/2019", 
"10/08/2018", "3/01/2019", "10/08/2018", "3/01/2019", "10/08/2018", 
"3/01/2019", "10/08/2018", "3/01/2019", "10/08/2018"), second.sale = c("5/12/2018", 
"20/06/2018", "5/12/2018", "20/06/2018", "5/12/2018", "20/06/2018", 
"5/12/2018", "20/06/2018", "5/12/2018", "20/06/2018"), third.sale = c("2/10/2018", 
"21/02/2018", "2/10/2018", "21/02/2018", "2/10/2018", "21/02/2018", 
"2/10/2018", "21/02/2018", "2/10/2018", "21/02/2018"), fourth.sale = c("29/08/2018", 
"16/07/2018", "29/08/2018", "16/07/2018", "29/08/2018", "16/07/2018", 
"29/08/2018", "16/07/2018", "29/08/2018", "16/07/2018"), fifth.sale = c("26/04/2018", 
"5/07/2018", "26/04/2018", "5/07/2018", "26/04/2018", "5/07/2018", 
"26/04/2018", "5/07/2018", "26/04/2018", "5/07/2018"), variable = structure(c(1L, 
1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L), class = "factor", .Label = c("first.result", 
"second.result", "third.result", "fourth.result", "fifth.result"
)), value = c(352L, 209L, 953L, 987L, 965L, 618L, 125L, 902L, 
264L, 71L)), .Names = c("Name", "first.sale", "second.sale", 
"third.sale", "fourth.sale", "fifth.sale", "variable", "value"
), row.names = c(NA, -10L), class = c("data.table", "data.frame"
))

If anyone can please point me in the right direction, I'd be forever grateful.

I think my issue is that I have two values for my variables, but can't work out how to group them.

Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
nicshah
  • 345
  • 1
  • 8
  • I've already worked through those examples, but still can't get it to shape correctly. I think it's because I have 2 values for the variable? But I don't seem to understand how to group these together. Any tips would be great. – nicshah May 10 '19 at 00:48

1 Answers1

1

You can use melt like

library(data.table)
melt(setDT(df), id="Name", measure=patterns("sale$", "result$"),
                value.name=c("SaleDate", "Result"))


#     Name variable   SaleDate Result
# 1:  Fred        1  3/01/2019    352
# 2: Peter        1 10/08/2018    209
# 3:  Fred        2  5/12/2018    953
# 4: Peter        2 20/06/2018    987
# 5:  Fred        3  2/10/2018    965
# 6: Peter        3 21/02/2018    618
# 7:  Fred        4 29/08/2018    125
# 8: Peter        4 16/07/2018    902
# 9:  Fred        5 26/04/2018    264
#10: Peter        5  5/07/2018     71

To get the variable names correct based on this answer we can do

suff <- unique(sub('\\..*', '', names(df)[-1]))

B2 <- melt(setDT(df), id="Name", measure=patterns("sale$", "result$"),
                      value.name=c("SaleDate", "Result"))
setattr(B2$variable, "levels", suff)

B2
#     Name variable   SaleDate Result
# 1:  Fred    first  3/01/2019    352
# 2: Peter    first 10/08/2018    209
# 3:  Fred   second  5/12/2018    953
# 4: Peter   second 20/06/2018    987
# 5:  Fred    third  2/10/2018    965
# 6: Peter    third 21/02/2018    618
# 7:  Fred   fourth 29/08/2018    125
# 8: Peter   fourth 16/07/2018    902
# 9:  Fred    fifth 26/04/2018    264
#10: Peter    fifth  5/07/2018     71

Or the tidyverse way would be

library(tidyverse)
df %>%
  gather(key, value, -Name) %>%
  group_by(key = sub(".*\\.", "", key)) %>%
  mutate(row = row_number()) %>%
  spread(key, value) %>%
  select(-row)
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213