Adapting Forest Plot R Script

Question

I am a novice coder and have been trying to understand the code posted here: Forest plot with table ggplot coding

I am hoping to use the script to display my own univariate analysis results for a project. I want the script to read the data from a csv file with the columns: "Predictor", "N", "rr", "rrlow", "rrhigh", and "arr". There are in total 19 variables ("Predictors") that I need to display. I have altered the script to read in the values into a single dataframe (rather than having a separate forestdf and fplottable like in the linked thread). However, I am getting multiple "replacement has x rows, data has y".

Here is the code in question:

###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)

forestdf<- read.csv("UnivariateAnalysis2.csv",header=T)
forestdf$Predictor <- factor(forestdf$Predictor,levels = forestdf$Predictor)
levels(forestdf$Predictor)  
forestdf$colour <- rep(c("white", "gray95"), length.out = 19)
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
  geom_hline(aes(yintercept = 1, colour = colour), size = 7) + 
  geom_pointrange(shape = 22, fill = "black") +
  geom_vline(xintercept = 1, linetype = 3) +
  xlab("Variable") +
  ylab("Hazard Ratio with 95% Confidence Interval") +
  theme_classic() +
  scale_colour_identity() +
  scale_y_discrete(limits = rev(forestdf$Predictor)) +
  scale_x_log10(limits = c(0.25, 4), 
                breaks = c(0.25, 0.5, 1, 2, 4), 
                labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
  theme(axis.text.y = element_blank(), axis.title.y = element_blank())

forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
forestdf$colour <- rep(c("white", "gray95"), length.out=19)

data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
  geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
  geom_text(aes(x = 0, label = Predictor), hjust = 0) +
  geom_text(aes(x = 5, label = N)) +
  geom_text(aes(x = 7, label = arr), hjust = 1) +
  scale_colour_identity() +
  theme_void() + 
  theme(plot.margin = margin(5, 0, 35, 0))

grid.arrange(data_table,p, ncol = 2)

And the errors I have been receiving:

> ###dataframe
> library(ggplot2)
> library(tidyr)
> library(grid)
> library(gridExtra)
> library(forcats)
> 
> forestdf<- read.csv("UnivariateAnalysis2.csv",header=T)
> forestdf$Predictor <- factor(forestdf$Predictor,levels = forestdf$Predictor)
Error in `$<-.data.frame`(`*tmp*`, Predictor, value = integer(0)) : 
  replacement has 0 rows, data has 19
> levels(forestdf$Predictor)  
NULL
> forestdf$colour <- rep(c("white", "gray95"), length.out = 19)
> p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
+   geom_hline(aes(yintercept = 1, colour = colour), size = 7) + 
+   geom_pointrange(shape = 22, fill = "black") +
+   geom_vline(xintercept = 1, linetype = 3) +
+   xlab("Variable") +
+   ylab("Hazard Ratio with 95% Confidence Interval") +
+   theme_classic() +
+   scale_colour_identity() +
+   scale_y_discrete(limits = rev(forestdf$Predictor)) +
+   scale_x_log10(limits = c(0.25, 4), 
+                 breaks = c(0.25, 0.5, 1, 2, 4), 
+                 labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
+   theme(axis.text.y = element_blank(), axis.title.y = element_blank())
> 
> forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
Error in `$<-.data.frame`(`*tmp*`, Predictor, value = integer(0)) : 
  replacement has 0 rows, data has 19
> forestdf$colour <- rep(c("white", "gray95"), length.out=19)
> 
> data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
+   geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
+   geom_text(aes(x = 0, label = Predictor), hjust = 0) +
+   geom_text(aes(x = 5, label = N)) +
+   geom_text(aes(x = 7, label = arr), hjust = 1) +
+   scale_colour_identity() +
+   theme_void() + 
+   theme(plot.margin = margin(5, 0, 35, 0))
> 
> grid.arrange(data_table,p, ncol = 2)
Error in FUN(X[[i]], ...) : object 'Predictor' not found

I greatly appreciate any help or suggestions you may provide.

Thanks!

EDIT:

###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)

forestdf<- read.csv("UnivariateAnalysis2.csv",header=TRUE)
names(forestdf)[1]<-"Predictor"
forestdf$Predictor <- factor(forestdf$Predictor)
forestdf$colour <- rep(c("white", "gray95"), length.out = length(unique(unlist(forestdf[c("Predictor")]))))
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
  geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) + 
  geom_pointrange(shape = 22, fill = "black") +
  geom_vline(xintercept = 1, linetype = 3, colour = "red") +
  xlab("Hazard Ratio") +
  ylab("Hazard Ratio with 95% Confidence Interval") +
  theme_classic() +
  scale_colour_identity() +
  scale_y_discrete(limits = rev(forestdf$Predictor)) +
  scale_x_log10(limits = c(0.25, 4), 
                breaks = c(0.25, 0.5, 1, 2, 4), 
                labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
  theme(axis.text.y = element_blank(), axis.title.y = element_blank())

forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))

data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
  geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
  geom_text(aes(x = 0, label = Predictor), hjust = 0) +
  geom_text(aes(x = 3, label = N)) +
  geom_text(aes(x = 7, label = arr), hjust = 1) +
  scale_colour_identity() +
  theme_void() + 
  theme(plot.margin = margin(5, 0, 35, 0))

grid.arrange(data_table,p, ncol = 2)

I have made some changes as per IRTFM (thank you!) and it now produces a plot and table. I'm not sure why but it wasn't reading the csv correctly. My main issues now are the following:

The alternating grey and white bars do not alternate correctly on the table side
The header for the columns does not show up on the table
The table is not aligned with the forestplot (ie. top row's forest plot is not the correct forest plot for Albumin) Example Plot

EDIT2:

I was able to fix the alternating colours and alignment with the forestplot. My issue now is that the column titles I've made are now cut off: New Plot. Also, how would I go about only bolding the values with an asterisk?

###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)

forestdf<- read.csv("UnivariateAnalysis2.csv",header=TRUE)
names(forestdf)[1]<-"Predictor"
forestdf$Predictor <- rev(factor(forestdf$Predictor))
forestdf$colour <- rep(c("white", "gray95"), length.out = length(unique(unlist(forestdf[c("Predictor")]))))
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
  geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) + 
  geom_pointrange(shape = 22, fill = "black") +
  geom_vline(xintercept = 1, linetype = 3, colour = "red") +
  xlab("Hazard Ratio") +
  ylab("Hazard Ratio with 95% Confidence Interval") +
  theme_classic() +
  scale_colour_identity() +
  scale_y_discrete(limits = forestdf$Predictor) +
  scale_x_log10(limits = c(0.25, 4), 
                breaks = c(0.25, 0.5, 1, 2, 4), 
                labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
  theme(axis.text.y = element_blank(), axis.title.y = element_blank())

#forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))

data_table <- ggplot(data = forestdf, aes(y = rev(factor(Predictor)))) +
  geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
  geom_text(aes(x = 0, label = Predictor), show.legend=TRUE, hjust = 0) +
  geom_text(aes(x = 3, label = N)) +
  geom_text(aes(x = 5.5, label = arr), hjust = 1) +
  geom_text(aes(x = 7, label = PVALUE), hjust = 1) +
  geom_text(aes(x = 0, y = 20, label = "Predictor"), hjust = 0) +
  geom_text(aes(x = 3, y= 20, label = "N")) +
  geom_text(aes(x = 5, y= 20, label = "95% CI"), hjust = 1) +
  geom_text(aes(x = 7, y= 20, label = "P Value"), hjust = 1) +
  scale_colour_identity() +
  theme_void() + 
  theme(plot.margin = margin(5, 0, 35, 0))
  
grid.arrange(data_table,p, ncol = 2)

Thanks!

It would be unusual that this would get processed correctly: `factor(forestdf$Predictor,levels = forestdf$Predictor)`. If you are not going to have different levels than the unique values in a vector, then just do this: `factor(forestdf$Predictor)`. The levels will be automatically set. You might want to look at the `forestdf` object carefully. Make sure there is a `Predictor` column spelled exactly that way. — IRTFM, May 21 '21 at 06:43
Thanks @IRTFM. It doesn't seem like it was reading the csv file correctly. I was able to create a plot, but I'm now having issues with aligning the table with the plot. — NewAtStats, May 21 '21 at 12:06
I've fixed the alignment with the table and plot so it is correct now. I can't seem to get the margins to include the column headings though (see edit2). Also, how would I go about only bolding the values that have an asterisk? — NewAtStats, May 21 '21 at 15:08

Adapting Forest Plot R Script

0 Answers0