3

Here's sample data set to give context to my problem:

df <- tibble(x1 = factor(sample(letters[1:6], 50, replace = T), levels=letters[1:6]),
             x2 = factor(sample(letters[1:6], 50, replace = T), levels=letters[1:6]),
             x3 = factor(sample(letters[1:6], 50, replace = T), levels=letters[1:6]),
             )

head(df)

I wrote the following function to generate a new tibble that I want to use in a ggplot2::ggplot() call:


plot_data_prep <- function(dsn, pvar){

  pvar <- enquo(pvar)


  #convert the new plot_df and get it ready for plotting.
  plot_data <- dsn %>% 
    group_by(!!pvar) %>% count(.drop = F) %>% ungroup() %>% 
    mutate(pct = n/sum(n)*100,
         pct_lab = paste0(format(pct, digits = 1),'%'),
         pct_pos = pct + 0.6)

  return(plot_data)

}

plot_data_prep(df, x3)

I can use this user-defined function when I make individual plots:


ggplot(plot_data_prep(df,x3), mapping = aes(x=x3, y=pct))+
  geom_bar(stat = 'identity') +
  geom_text(aes(x = x3, y = pct_pos, label = pct_lab ))

but when I try to loop through the variables 'x1', 'x2', and 'x3' in the dataframe, 'df', using my plot_data_prep() function to generate separate plots for each variable, I get an error: 'Error: Column i is unknown'


df_names <- names(df)

for (i in df_names){
    plot <- ggplot(plot_data_prep(df, i), mapping = aes_string(x=i, y='n')) +
      geom_bar(stat = 'identity') +
      geom_text(aes(x = i, y = pct_pos, label = pct_lab ))

  print(plot)
}

I'm hoping someone might be able to help me understand why my for loop index variable 'i' is not resolving the way I need it to to make my code work.

Thanks.

HumanityFirst
  • 305
  • 1
  • 8

1 Answers1

3

Change your function to accept string input instead.

library(dplyr)
library(ggplot2)
library(rlang)

plot_data_prep <- function(dsn, pvar){

  #convert the new plot_df and get it ready for plotting.
  plot_data <- dsn %>% 
                 group_by(!!sym(pvar)) %>% count(.drop = F) %>% ungroup() %>% 
                 mutate(pct = n/sum(n)*100,
                        pct_lab = paste0(format(pct, digits = 1),'%'),
                        pct_pos = pct + 0.6)

   return(plot_data)

}

Confirm it works :

plot_data_prep(df, 'x1')

Now you can loop over the names and store the plots in a list.

df_names <- names(df)
plot_list <- vector('list', length(df_names))

for (i in seq_along(df_names)) {
   plot <- ggplot(plot_data_prep(df, df_names[i]), 
                  mapping = aes(x= !!sym(df_names[i]), y=n)) +
           geom_bar(stat = 'identity') +
           geom_text(aes(x = !!sym(df_names[i]), y = pct_pos, label = pct_lab ))
  plot_list[[i]] <- plot
  print(plot)
}

Individual plots can be access by doing plot_list[[1]], plot_list[[2]] etc.

Ronak Shah
  • 377,200
  • 20
  • 156
  • 213