1

Link to original post

In my previous post (see link above), I wanted to know how to combine columns that have the same data and change the column name to reflect the range. I started with a function that produced enter image description here

and the accepted answer produced my desired output enter image description here

I applied that same function

library(dplyr)
library(flextable)
library(stringr)
library(tidyverse)

Shop_fcn <- function(data){
  data %>%
    group_by(Day) %>%
    mutate(N_nam = n_distinct(Names)) %>%
    group_by(Names, Day, N_nam, Store, Item) %>%
    summarize(n_item = n()) %>%
    group_by(Day, N_nam, Store, Item) %>%
    summarize(n_nam = n(),
              n_item = sum(n_item))%>%
    mutate(pct = round(n_nam/N_nam*100,digits = 1),
           txt = paste0( n_nam, " (", pct, "%)"),
           Day_n = (paste0("Day ", Day," (N=",  N_nam, ")")))%>%
    ungroup %>% 
    select(Day_n , Store, Item, txt) %>%
    group_by(Store, Item, txt) %>%
    summarise(Day_n = if(n() > 1) 
      sprintf('Day %s %s', paste(range(readr::parse_number(unique(Day_n))), 
                                 collapse=' - '), 
              str_remove(first(Day_n), '^[^(]+')) else Day_n) %>%
    pivot_wider(values_from = txt, names_from = Day_n) %>%
    mutate_at(vars(starts_with(c("Day"))), ~if_else(is.na(.), "", .)) %>%
    arrange(Store, Item) %>% 
    group_by(store2 = Store) %>% 
    mutate(Store = if_else(row_number() != 1, "", Store))%>%
    ungroup() %>%
    select(Store, Item, str_sort(names(.)[-(1:2)], numeric = TRUE), -store2)
  
}


to a larger data set

Names <- as.character(c('Adam','Morticia','Morticia','Morticia','Morticia','Morticia','Morticia','Morticia',
'Morticia','Morticia','Morticia','Morticia','Mickey','Minnie','Minnie','Minnie','Minnie','Minnie',
'Lucy', 'Lucy','Lucy','Morticia','Morticia','Morticia','Adam','Gomez','Olive','Olive','Olive',  
'Ricky','Morticia','Adam','Eve','Ricky','Morticia','Morticia','Minnie','Adam','Lucy','Ricky',
'Ricky','Ricky','Ricky','Ricky','Minnie','Adam','Adam', 'Morticia', 'Adam', 'Adam', 'Adam', 'Adam', 
'Adam','Lucy','Olive','Eve','Gomez','Morticia','Mickey','Olive'))

Day <- as.numeric(c(1,1,2,3,6,8,9,10,11,12,13,14,1,1,2,5,6,14,1,1,14,4,4,4,2,1,1,1,14,1,5,2,    
       1,1,4,5,3,2,1,1,14,14,14,14,4,2,2,4,2,2,2,2,14,1,1,14,14,7,14,1))

Store <- as.character(c('None','None','None','None','None','None','None','None','None','None',
'None','None','None','None','None','None','None','None','ACE','ACE','ACE','ACE','Amazon','Amazon',
'Best Buy','CVS','Hobby Lobby','Hobby Lobby','Hobby Lobby','Home Depot','Home Depot',   
'Ikea','Ikea','Ikea','Ikea','Ikea','Ikea','Lowes','Lowes','Petco','Petco','Petco','Petco',
'Petco','Petco','Target','Target','Target','Walgreens','Walgreens','Walgreens','Walgreens',
'Walgreens','Walgreens','Walgreens','Walmart','Walgreens','Walgreens','Walgreens','Walgreens'))

Item <- as.character(c('None','None','None','None','None','None','None','None','None','None','None','None',
'None','None', 'None','None','None','None', 'Hammer','Nails','Plywood', 'Bricks','Frame','Batteries','TV','Advil',
'Brush','Paint','Paint','Level','Wrench','Pillow',  'Blanket','Lamp','Vase','Table','Chair','Screwdriver','Plunger','Cat food',  
'Cat litter','Goldfish','Dog food','Dog treat','Hamster','Rug','Vacuum',
 'Gloves','Tylenol','Napkins','Benadryl','Soap','Soap','Shampoo','Conditioner','Lotion',    
'Lotion','Foil','Lotion','Foil'))


Shop_list <- as.data.frame(cbind(Names, Day, Store, Item), stringsAsFactors=FALSE)
Shop_day<- Shop_list %>%
  bind_rows() %>%
  Shop_fcn ()

flextable(Shop_day)

and got the following enter image description here

Days 1- 14 and Days 3 - 5 should not have been combined

Applying my original function gets me closer to my desired output,


Shop_fcn <- function(data){
  data %>%
    group_by(Day) %>%
    mutate(N_nam = n_distinct(Names)) %>%
    group_by(Names, Day, N_nam, Store, Item) %>%
    summarize(n_item = n()) %>%
    group_by(Day, N_nam, Store, Item) %>%
    summarize(n_nam = n(),
              n_item = sum(n_item))%>%
    mutate(pct = round(n_nam/N_nam*100,digits = 1),
           txt = paste0( n_nam, " (", pct, "%)"),
           Day_n = (paste0("Day ", Day," (N=",  N_nam, ")")))%>%
    ungroup %>% select(Day_n , Store, Item, txt) %>%
    pivot_wider(values_from = txt, names_from = Day_n) %>%
    mutate_at(vars(starts_with(c("Day"))), ~if_else(is.na(.), "", .)) %>%
    arrange(Store, Item) %>% 
    group_by(store2 = Store) %>% 
    mutate(Store = if_else(row_number() != 1, "", Store))%>%
    ungroup() %>% select(-store2)
}
Shop_day<- Shop_list %>%
  bind_rows() %>%
  Shop_fcn ()

flextable(Shop_day)

enter image description here however, I'm now stuck with the same problem of combining identical days (specifically, columns Days 8-13) and the new issue of the Days not being ordered 1-14.

I'm not sure if the best solution would be to modify the function, or to apply a new function to the flextable to combine columns and the respective column names.

I tried to remove the duplicate columns, but still couldn't come up with a solution on how to preserve the names of the duplicated columns appear as a range or how to get the columns in the proper order.

Shop_nodup <- Shop_day[!duplicated(as.list(Shop_day))]
flextable(Shop_nodup)

enter image description here

L.Sobble
  • 49
  • 1
  • 10
  • 1
    As for the ordering, see the [FAQ about ordering mixed alphanumeric strings](https://stackoverflow.com/q/12806128/903061). A function like `gtools::mixedorder()` will help you get and keep things in a sensical order. – Gregor Thomas May 13 '21 at 03:57
  • Hmmm, though I see you're already using `str_sort`, which should be taking care of that... – Gregor Thomas May 13 '21 at 03:59

1 Answers1

4
  • The reason why the column names are not in order is because Day column is of type character instead of numeric. Turning it to numeric class will bring them in required order. Numbers turn into characters because in your data generation code you are using as.data.frame(cbind(....)) where cbind converts data to matrix and as matrix can have data of only type it would turn numbers into character. Instead you should have used data.frame(....) which would have kept the type of classes intact.

  • To combine day columns together with similar values I use rleid after creating a unique key from the values in each day.

The function that you can use is -

library(tidyverse)
library(data.table)
library(flextable)

Shop_fcn <- function(data){
  Shop_list %>%
    group_by(Day = as.numeric(Day)) %>%
    mutate(N_nam = n_distinct(Names)) %>%
    group_by(Names, Day, N_nam, Store, Item) %>%
    summarize(n_item = n()) %>%
    group_by(Day, N_nam, Store, Item) %>%
    summarize(n_nam = n(),
              n_item = sum(n_item)) %>%
    ungroup -> tmp
  
  tmp %>%
    group_by(Day) %>%
    summarise(txt = paste(n_nam, n_item, Store, Item, sep = '-', collapse = ',')) %>%
    mutate(grp = rleid(txt)) %>%
    select(-txt) %>%
    left_join(tmp, by = 'Day') %>%
    group_by(grp) %>%
    mutate(pct = round(n_nam/N_nam*100,digits = 1),
           txt = paste0( n_nam, " (", pct, "%)"),
           Day_n = if(n_distinct(Day) > 1) sprintf('Day %s - %s (N = %s)', first(Day), last(Day), N_nam) else sprintf('Day %s (N=%s)', Day, N_nam)) %>% 
    ungroup %>% 
    select(Day_n, Store, Item, txt) %>%
    pivot_wider(values_from = txt, names_from = Day_n, values_fn = first, values_fill = '') %>%
    arrange(Store, Item) %>% 
    group_by(Store) %>% 
    mutate(Store = if_else(row_number() != 1, "", Store)) %>%
    ungroup()
}

For the data in your previous post this returns -

Shop_day<- Shop_list %>% Shop_fcn
flextable(Shop_day)

enter image description here

For the data in this post it returns -

enter image description here

Ronak Shah
  • 377,200
  • 20
  • 156
  • 213