1

I have multiple cols and would like to reformat dataframe to have less cols.

Here is my cucrent df:


# Dataframe
df <- data.frame(
    ~Location, ~Product_Name, ~Category, ~Machine1, ~Machine2 ~Machine1_adds, ~Machine2_adds, ~Sales1, ~Saless2, Spoils1, Spoils2
    A,  "Snickers",   Candy,  0, 1,  $2.5, $3, 2, 1
    A,  "Kitcat",   Candy,  0, 1,  $3, $3, 2, 1
    A,  "Pepsi",   Bev,  1, 1,  $5, $4, 3, 0
    B,  "Coke",   Bev,  1, 0, $5,  $6.45, 1, 1
    B,  "Gatoraid",   Bev,  0, 1, $4,  $4.45, 1, 0
    B,  "Sprite",   Bev,  1, 1,  $8, $6, 1, 0
)
df

I would like to reformat data-frame to where machine is one col, sales is another, and spoils is the last with each value row adjusted to new col. Reference output:

# Dataframe
new_df <- data.frame(
    ~Location, ~Product_Name, ~Category, ~Machine, ~Machine_adds, ~Sales, Spoils
    A,  "Snickers",   Candy, 1,  0,  $2.5, 2
    A,  "Kitcat",   Candy, 1,  0,  $3, 2
    A,  "Pepsi",   Bev,  1, 1,  $5, 3
    B,  "Coke",   Bev,  2, 1, $6.45, 1
    B,  "Gatoraid",   Bev, 2,  0,  $4.45, 1
    B,  "Sprite",   Bev,  2, 1,  $8, 0
)
new_df


I implemented the melt function but I am not getting the 'Machine' Column to represent which machine I am pulling from (either Machine 1, 2, 3 etc..). 

Dinho
  • 704
  • 4
  • 15
  • 1
    yourr example is giving errors – akrun Nov 18 '19 at 19:20
  • can you paste the `dput(df)` here. – Onyambu Nov 18 '19 at 19:37
  • Does this answer your question? [Reshaping multiple sets of measurement columns (wide format) into single columns (long format)](https://stackoverflow.com/questions/12466493/reshaping-multiple-sets-of-measurement-columns-wide-format-into-single-columns) – camille Nov 18 '19 at 21:13
  • It's going to be easier to help if your example data is in a format that can be run directly. You set this up as a call to `tribble`, but with syntax that wouldn't actually work, e.g. unquoted strings, `$2.5` – camille Nov 18 '19 at 21:14

2 Answers2

1

Here is an option with melt from data.table

library(data.table)
melt(setDT(df), measure = patterns("^Machine", "^Sales", "^Spoils"), 
      value.name = c("Machine_adds", "Sales", "Spoils"))[, variable := NULL][]
#    Location Product_Name Category Machine_adds Sales Spoils
# 1:        A     Snickers    Candy       0  $2.5      2
# 2:        A       Kitcat    Candy       0    $3      2
# 3:        A        Pepsi      Bev       1    $5      3
# 4:        B         Coke      Bev       1    $5      1
# 5:        B     Gatoraid      Bev       0    $4      1
# 6:        B       Sprite      Bev       1    $8      1
# 7:        A     Snickers    Candy       1    $3      1
# 8:        A       Kitcat    Candy       1    $3      1
# 9:        A        Pepsi      Bev       1    $4      0
#10:        B         Coke      Bev       0 $6.45      1
#11:        B     Gatoraid      Bev       1 $4.45      0
#12:        B       Sprite      Bev       1    $6      0

Update

Based on the OP's updated example, if there are 'Machine' and Machine_adds' columns, we can slightly changed the patterns to

# creating new columns in the dataset
df[c('Machine1', 'Machine2')] <- df[c("Machine1_adds", "Machine2_adds")]
melt(setDT(df), measure = patterns("^Machine\\d+$", 
         "^Machine\\d+_adds$", "^Sales", "^Spoils"), 
   value.name = c("Machine", "Machine_adds", "Sales", "Spoils"))[, 
           variable := NULL][]

Or using pivot_longer from tidyr

library(dplyr)
library(tidyr)
library(stringr)
df %>% 
     rename_at(3:ncol(.), ~
           str_replace(., "(\\d+)_?.*", "_\\1")) %>% 
     pivot_longer(cols =  matches("^(Machine|Sales|Spoils)"),
        names_to = c(".value", "group"), names_sep = "_") %>%
     select(-group)
# A tibble: 12 x 6
#   Location Product_Name Category Machine Sales Spoils
#   <chr>    <chr>        <chr>      <dbl> <chr>  <dbl>
# 1 A        Snickers     Candy          0 $2.5       2
# 2 A        Snickers     Candy          1 $3         1
# 3 A        Kitcat       Candy          0 $3         2
# 4 A        Kitcat       Candy          1 $3         1
# 5 A        Pepsi        Bev            1 $5         3
# 6 A        Pepsi        Bev            1 $4         0
# 7 B        Coke         Bev            1 $5         1
# 8 B        Coke         Bev            0 $6.45      1
# 9 B        Gatoraid     Bev            0 $4         1
#10 B        Gatoraid     Bev            1 $4.45      0
#11 B        Sprite       Bev            1 $8         1
#12 B        Sprite       Bev            1 $6         0

Update

df %>%
   rename_at(vars(matches('^Machine.*adds$')), ~ 
         str_replace(., '(\\d+)_(\\w+)$', '_\\2\\1')) %>% 
   rename_at(3:ncol(.), ~ str_replace(., "(\\d+)_?.*", ":\\1")) %>%
   pivot_longer(cols =  matches("^(Machine|Sales|Spoils)"),
        names_to = c(".value", "group"), names_sep = ":") %>%
   select(-group)

data

df <- structure(list(Location = c("A", "A", "A", "B", "B", "B"), 
     Product_Name = c("Snickers", 
"Kitcat", "Pepsi", "Coke", "Gatoraid", "Sprite"), Category = c("Candy", 
"Candy", "Bev", "Bev", "Bev", "Bev"), Machine1_adds = c(0, 0, 
1, 1, 0, 1), Machine2_adds = c(1, 1, 1, 0, 1, 1), Sales1 = c("$2.5", 
"$3", "$5", "$5", "$4", "$8"), Sales2 = c("$3", "$3", "$4", "$6.45", 
"$4.45", "$6"), Spoils1 = c(2, 2, 3, 1, 1, 1), Spoils2 = c(1, 
1, 0, 1, 0, 0)), row.names = c(NA, -6L), class = c("tbl_df", 
"tbl", "data.frame"))
akrun
  • 874,273
  • 37
  • 540
  • 662
  • ``` t <- melt(setDT(test), measure = patterns("^Machine.Label","^Machine.Counts", "^Sales.Units", "^Sales.Dollars", "^Spoils"), value.name = c("Machine", "Sales_units", "Total_Sales", "Spoils"))[, Variable := NULL][] ``` – Dinho Nov 18 '19 at 22:55
  • Everything is working besides getting the machine label (either 1, 2 or 3.)..It just is showing variable. – Dinho Nov 18 '19 at 22:56
  • 1
    @Dinho It is `variable` instead of `Variable` – akrun Nov 18 '19 at 22:57
  • @Dinho Got it, I think you have both `Machine` and `Machine_adds`. I used an example with only `Machine_adds` column – akrun Nov 18 '19 at 22:58
  • @Dinho You can change the code to `melt(setDT(df), measure = patterns("^Machine\\d+$", "^Machine\\d+_adds$", "^Sales", "^Spoils"), value.name = c("Machine", "Machine_adds", "Sales", "Spoils"))[, variable := NULL][]` – akrun Nov 18 '19 at 23:05
0

according to what you have, you are looking for the reshape function whith the variables aligned as list(c(4,5),c(6,7),c(8,9)). You could use:

reshape(df,t(matrix(4:ncol(df),2)),idvar = 1:3,dir="long")

or

reshape(df,list(c(4,5),c(6,7),c(8,9)),idvar = 1:3,dir="long")

To get the names you have, I will use the v.names argument

reshape(df,list(c(4,5),c(6,7),c(8,9)),idvar = 1:3,dir="long",
         v.names = c("Machine_adds","Sales","Spoils"))[-4]# -4 removes the time variable.
                   Location Product_Name Category Machine_adds Sales Spoils
A.Snickers.Candy.1        A     Snickers    Candy            0  $2.5      2
A.Kitcat.Candy.1          A       Kitcat    Candy            0    $3      2
A.Pepsi.Bev.1             A        Pepsi      Bev            1    $5      3
B.Coke.Bev.1              B         Coke      Bev            1    $5      1
B.Gatoraid.Bev.1          B     Gatoraid      Bev            0    $4      1
B.Sprite.Bev.1            B       Sprite      Bev            1    $8      1
A.Snickers.Candy.2        A     Snickers    Candy            1    $3      1
A.Kitcat.Candy.2          A       Kitcat    Candy            1    $3      1
A.Pepsi.Bev.2             A        Pepsi      Bev            1    $4      0
B.Coke.Bev.2              B         Coke      Bev            0 $6.45      1
B.Gatoraid.Bev.2          B     Gatoraid      Bev            1 $4.45      0
B.Sprite.Bev.2            B       Sprite      Bev            1    $6      0
Onyambu
  • 67,392
  • 3
  • 24
  • 53