0

I have a data frame of nine columns. Each column represents a different climate model. Now I would like to compare the frequency distributions between the single model outputs.

Here is some code for an reproducible example:

library(ggplot2)
library(gridExtra)

## create random sample data
# create an empty data frame
df <- as.data.frame(matrix(ncol= 9, nrow= 4000))

# fill the data frame with random values
for (i in 1:ncol(df)) {
  df[,i] <- sample(x = c(-3:3), size = 4000, replace = TRUE)
}

# give colnames to the data frame
colnames(df) <- c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6", "Model 7", "Model 8", "Model 9")

In the next step I perform the grid plot by using a for loop:

## histogram plots
hist1.list <- list() # create empty list 

# fill the list with the single histograms
for (i in 1:ncol(df)) {
  hist1.list[[i]] <- ggplot(df, aes(df[,i])) +
    geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
    xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
    ggtitle(colnames(df[i])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))
}

# create the grid plot
do.call(grid.arrange, hist1.list) ############## plots nine times the same data

The output looks as follows: enter image description here

So the same data, that of the last column of the data frame, is displayed in every single histogram. This seems very strange since the indexing in the plot titles worked.

Has anybody an idea, what the problem is here?

Just to make clear what the result is supposed to look like, here is some code with the single steps manually instead of a loop:

#################################### manually without loop
# create the single plot objects
p1 <- ggplot(df, aes(df[,1])) +
  geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
  xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
  ggtitle(colnames(df[1])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))

p2 <- ggplot(df, aes(df[,2])) +
  geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
  xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
  ggtitle(colnames(df[2])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))

p3 <- ggplot(df, aes(df[,3])) +
  geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
  xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
  ggtitle(colnames(df[3])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))

p4 <- ggplot(df, aes(df[,4])) +
  geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
  xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
  ggtitle(colnames(df[4])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))

p5 <- ggplot(df, aes(df[,5])) +
  geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
  xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
  ggtitle(colnames(df[5])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))

p6 <- ggplot(df, aes(df[,6])) +
  geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
  xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
  ggtitle(colnames(df[6])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))

p7 <- ggplot(df, aes(df[,7])) +
  geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
  xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
  ggtitle(colnames(df[7])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))

p8 <- ggplot(df, aes(df[,8])) +
  geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
  xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
  ggtitle(colnames(df[8])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))

p9 <- ggplot(df, aes(df[,9])) +
  geom_histogram(aes(y = stat(count) / sum(count)), binwidth= 0.4, color= "black", fill= "grey") + 
  xlim(-3.7,3.7) + ylim(0,0.2) + xlab("Index Value") + ylab("Relative Frequency") + 
  ggtitle(colnames(df[9])) + theme_bw() + theme(plot.title = element_text(hjust = 0.5))

hist2.list <- list(p1,p2,p3,p4,p5,p6,p7,p8,p9) # combine the single plot objects to a list

# plot
do.call(grid.arrange, hist2.list)  # does what it is supposed to do

This leads to the following output: enter image description here

This is how it is supposed to look like, nine different histograms.

I cannot see where the error is in the for loop above.

climsaver
  • 341
  • 2
  • 15
  • 1
    Use lapply instead of forloop to create the ggplots. – zx8754 Sep 01 '21 at 10:01
  • 2
    The issue arises because of ggplot2's lazy evaluation. See https://stackoverflow.com/questions/26235825/for-loop-only-adds-the-final-ggplot-layer for more detailed explanation. – stefan Sep 01 '21 at 10:02
  • Relevant post: https://stackoverflow.com/a/31994539/680068 – zx8754 Sep 01 '21 at 10:04
  • Thanks stefan for your comment. By viewing that question I found a super simple solution for the problem: use ```aes_()``` instead of ```aes()``` in the loop, then it overcomes the ggplot2's lazy evaluation, which I did not know about it exists. – climsaver Sep 01 '21 at 10:11

0 Answers0