1

I use this example dataset :

gene smp1_A smp1_B smp2_A smp2_B smp3_A smp3_B smp4_A smp4_B
geneA 10 12 30 33 26 22 44 42
geneB 15 13 11 16 15 16 21 26

I would like to plot smp1_A vs smp1_B, smp2_A vs smp2_B... = 4 plots
I would like to have a PDF with 2 pages, on the first page plot1 and plot2 and on the 2nd page plot3 et and plot4.
(Of course, I have a lot more plots in my real dataset).

library(ggplot2)
library(ggpubr)

data = read.table('test_data.txt',header=T)
samples = list('smp1','smp2','smp3','smp4')

for (i in 1:length(samples)){ 

    smp = samples[i]
    smpA = paste(smp,"A",sep="_")
    smpB = paste(smp,"B",sep="_")
        
    plot = ggplot(data, aes(x=data[,smpA], y=data[,smpB])) + geom_point()

    # I can't add the plot to a PDF in a loop, I have to generate it at the end
    # so I need to create a new variable each iteration to not overwrite the previous one 
    # I do it with assign

    nam <- paste("plot", i, sep = "")
    assign(nam, plot)
}

# at this point, if I try to plot my 4 plots separately, it's working fine.
# I have this 4 variables in my env : plot1, plot2, plot3, plot4 

# But now when I try to create my PDF I get 4 times the same plot and I can't figure out which one is it. 
page1 = ggarrange(plot1,plot2, ncol=2, nrow=1)
page2 = ggarrange(plot3,plot4, ncol=2, nrow=1)
plots = list(page1, page2)
pdf('test_plots.pdf')
plots
dev.off()

Like I said inside my code, when I print my plots separately it's working, but when I combine them in a PDF I have 4 times the same plot.
I don't understand where is my mistake.

Elysire
  • 693
  • 10
  • 23
  • 1
    In general, try to avoid using for loops and assign. this is why you get the same plot. https://stackoverflow.com/questions/26235825/for-loop-only-adds-the-final-ggplot-layer – StupidWolf Sep 25 '20 at 12:54

3 Answers3

2

I would suggest two approaches. You can reshape you data in log format and use facets or yu can split your reshaped data and using a function to create the plots in the desired order. Here the code for both options. First option would be using facets:

library(tidyverse)
#Code option 1
#Reshape data
df %>% pivot_longer(-gene) %>%
  #Separate sample type
  separate(name,into=c('sample','type'),sep = '_') %>%
  ggplot(aes(x=type,y=value,color=gene))+
  geom_point()+
  facet_wrap(.~sample,scales = 'free')+
  theme_bw()+
  ggsave(filename = 'Myplot.pdf',width = 35,height = 18,units = 'cm')

The output would be this and saved in the pdf Myplot.pdf:

enter image description here

The second option would be processing data and creating a key based on how many plots you want in each slide. Here the code:

#Code option 2
#Process data
dfp <- df %>% pivot_longer(-gene) %>%
  #Separate sample type
  separate(name,into=c('sample','type'),sep = '_')
#Keys
dfk <- data.frame(sample=unique(dfp$sample))
dfk$Key <- rep(1:2,each=2)
#Match
dfp <- dfp %>% left_join(dfk)
#Create list
List <- split(dfp,dfp$Key)
#Function for plot
myplot <- function(x)
{
  #Plot
  G <- ggplot(x,aes(x=type,y=value,color=gene))+
    geom_point()+
    facet_wrap(.~sample,scales = 'free')+
    theme_bw()
  return(G)
}
#Apply
List2 <- lapply(List,myplot)

The slides in the final pdf can be obtained with this:

#Export
pdf('Myexample.pdf',width = 14)
for(i in 1:length(List2))
{
  plot(List2[[i]])
}
dev.off()

And it will look like this:

enter image description here

enter image description here

It will appear in two slide pdf.

Duck
  • 39,058
  • 13
  • 42
  • 84
1

You can try keep the plots using a lapply :

data = data.frame(sapply(1:8,rnorm,n=10))
colnames(data) = paste(rep(c('smp1','smp2','smp3','smp4'),2),rep(c("A","B"),each=4),sep="_")

plts = lapply(list('smp1','smp2','smp3','smp4'),function(i){

 smpA = paste(i,"A",sep="_")
 smpB = paste(i,"B",sep="_")
        
 plt = ggplot(data, aes(x=!!ensym(smpA), y=!!ensym(smpB))) + 
 geom_point()
     
 return(plt)
})

names(plts) = paste0("plot",1:4)
 
page1 = ggarrange(plts[[1]],plts[[2]], ncol=2, nrow=1)
page2 = ggarrange(plts[[3]],plts[[4]], ncol=2, nrow=1)
pdf('test_plots.pdf')
print(page1);print(page2)
dev.off()
StupidWolf
  • 45,075
  • 17
  • 40
  • 72
1

if you create your plot this way:

plot = ggplot(data, aes(x=data[,smpA], y=data[,smpB])) + 
  geom_point() + 
  ggtitle(paste(smpA, "vs", smpB))

You will see that each plot is different, even if they look the same.


However, I believe your code could be adjusted a bit.

I suggest you the following:

# your data
data <- read.table(text = "gene smp1_A smp1_B smp2_A smp2_B smp3_A smp3_B smp4_A smp4_B
geneA 10 12 30 33 26 22 44 42
geneB 15 13 11 16 15 16 21 26", header = TRUE)


# libraries
library(ggplot2)
library(patchwork)
library(dplyr)
library(tidyr)


# set up data
data <- data %>%
 pivot_longer(-gene) %>% 
 separate(name, into = c("smp", "letter")) %>% 
 pivot_wider(names_from = letter, values_from = value) 

# create plots 
df_plots <- data %>%
 nest_by(smp) %>%
 summarise(plot = list(ggplot(data) + geom_point(aes(x = A, y = B)) + ggtitle(smp)),
           .groups = "drop")
 
# create custom groups of plots
df_plots$n <- rep(seq_len(nrow(df_plots)), each = 2, length.out = nrow(df_plots))

# combine plots together
df_plots <- df_plots %>% 
 group_by(n) %>% 
 summarise(plot = list(Reduce(`+`, plot)), .groups = "drop") # possible thanks to patchwork

# print pdf
pdf('test_plots.pdf')
pull(df_plots, plot)
dev.off()

The solution is flexible no matter how many pages you have or how many plots you have. If you want to have 3 or more plots per page, you just need to change each = 2 with the number you want.

Edo
  • 7,567
  • 2
  • 9
  • 19