1

I have a dataframe containing results measured through Matthews correlation coefficient, F1 score, and accuracy, and I would like to generate barcharts for each of these metrics in R using ggplot2. I am trying to loop over these three rates, but R does not let me use a variable (this_rate) to select the proper dataframe column for my plot.

Here's my (not working) code;

library("ggplot2")

# For the printed files
num_to_return <- 1
exe_num <- sample(1:as.numeric(10000), num_to_return)

data_table <- data.frame(cancer_type = c("'aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii", "jjj", "kkk", "lll"), MCC=c(0.085, 0.051, 0.013, 0.124, 0.156, 0.124, 0.573, 0.091, 0.513, -0.074, 0.388, 0.305),  accuracy=c(0.095, 0.021, 0.023, 0.224, 0.256, 0.124, 0.576, 0.051, 0.543, -0.374, 0.588, 0.205), F1_score=c(0.065, 0.051, 0.073, 0.274, 0.276, 0.127, 0.577, 0.057, 0.547, -0.574, 0.588, 0.605))

list_of_rates <- colnames(data_table)[2:4]

for(this_rate in list_of_rates){

    ylim_low <- 0
    ylim_upp <- 1
    
    data_table <- data_table[order(-data_table[c(this_rate)]),]
    data_table$"cancer_type_factor" <- factor(data_table$"cancer_type", levels = data_table$"cancer_type")
    data_table$"cancer_type" <- data_table$"cancer_type_factor"
    
    p_this_rate_plot <- ggplot(data_table, aes(x=reorder(cancer_type, -this_rate), y=this_rate, fill=cancer_type)) + geom_bar(stat="identity", color="black",  position=position_dodge())  + ylab(paste0("mean ", this_rate)) + xlab("") + ggtitle("survival binary prediction")  +  theme(plot.title = element_text(hjust = 0.5), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank())
    
    p_this_rate_plot <- p_this_rate_plot + scale_y_continuous(limits=c(ylim_low, ylim_upp)) #, breaks=c(1:10)) 
    
    pdfThisPlotFile <- paste0("this_barplot_", exe_num,".pdf")
    cat("We're going to save the ", pdfThisPlotFile, "file\n")
    ggsave(pdfThisPlotFile)
}

And here's the log problem:

Error in -this_rate : invalid argument to unary operator

The problem happens with the two occurrences of the this_rate variable in the ggplot() function call: it's apparently impossible to use it to select the right columns in the dataframe. How can I solve this issue?

Thank you

DavideChicco.it
  • 3,318
  • 13
  • 56
  • 84

1 Answers1

1

When you are passing character values of column names use .data to subset them. This works and creates separate pdf for list_of_rates.

library(data.table)
library(ggplot2)

data_table$cancer_type_factor <- factor(data_table$cancer_type, 
                                   levels = data_table$cancer_type)

data_table$cancer_type <- data_table$cancer_type_factor
ylim_low <- 0
ylim_upp <- 1

for(this_rate in list_of_rates){
  
  data_table <- data_table[order(-data_table[[this_rate]]),]
  p_this_rate_plot <- ggplot(data_table, 
               aes(x=reorder(cancer_type, -.data[[this_rate]]), 
                             y=.data[[this_rate]], fill=cancer_type)) + 
    geom_bar(stat="identity", color="black",  position=position_dodge())  + 
    ylab(paste0("mean ", this_rate)) + 
    xlab("") + 
    ggtitle("survival binary prediction")  +  
    theme(plot.title = element_text(hjust = 0.5), 
          axis.title.x=element_blank(), 
          axis.text.x=element_blank(), 
          axis.ticks.x=element_blank()) + 
    scale_y_continuous(limits=c(ylim_low, ylim_upp))
  
  pdfThisPlotFile <- paste0("this_barplot_", this_rate,".pdf")
  cat("We're going to save the ", pdfThisPlotFile, "file\n")
  ggsave(pdfThisPlotFile)
}
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
  • Thanks, that's exactly what I needed! Another question: do you know how I can keep the colors of the elements consistent through the plots? Now the order of the colors goes from light pink to purple for each plot, but I would like the same elements to keep the same color in each plot. How can I do it? Thanks – DavideChicco.it May 27 '21 at 13:10
  • You can create a named variable and assign a fixed color to each value. See this post https://stackoverflow.com/questions/42891307/how-can-i-maintain-a-color-scheme-across-ggplots-while-dropping-unused-levels-i – Ronak Shah May 27 '21 at 13:18