I am a novice coder and have been trying to understand the code posted here: Forest plot with table ggplot coding
I am hoping to use the script to display my own univariate analysis results for a project. I want the script to read the data from a csv file with the columns: "Predictor", "N", "rr", "rrlow", "rrhigh", and "arr". There are in total 19 variables ("Predictors") that I need to display. I have altered the script to read in the values into a single dataframe (rather than having a separate forestdf and fplottable like in the linked thread). However, I am getting multiple "replacement has x rows, data has y".
Here is the code in question:
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf<- read.csv("UnivariateAnalysis2.csv",header=T)
forestdf$Predictor <- factor(forestdf$Predictor,levels = forestdf$Predictor)
levels(forestdf$Predictor)
forestdf$colour <- rep(c("white", "gray95"), length.out = 19)
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3) +
xlab("Variable") +
ylab("Hazard Ratio with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = rev(forestdf$Predictor)) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
forestdf$colour <- rep(c("white", "gray95"), length.out=19)
data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
geom_text(aes(x = 0, label = Predictor), hjust = 0) +
geom_text(aes(x = 5, label = N)) +
geom_text(aes(x = 7, label = arr), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
And the errors I have been receiving:
> ###dataframe
> library(ggplot2)
> library(tidyr)
> library(grid)
> library(gridExtra)
> library(forcats)
>
> forestdf<- read.csv("UnivariateAnalysis2.csv",header=T)
> forestdf$Predictor <- factor(forestdf$Predictor,levels = forestdf$Predictor)
Error in `$<-.data.frame`(`*tmp*`, Predictor, value = integer(0)) :
replacement has 0 rows, data has 19
> levels(forestdf$Predictor)
NULL
> forestdf$colour <- rep(c("white", "gray95"), length.out = 19)
> p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
+ geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
+ geom_pointrange(shape = 22, fill = "black") +
+ geom_vline(xintercept = 1, linetype = 3) +
+ xlab("Variable") +
+ ylab("Hazard Ratio with 95% Confidence Interval") +
+ theme_classic() +
+ scale_colour_identity() +
+ scale_y_discrete(limits = rev(forestdf$Predictor)) +
+ scale_x_log10(limits = c(0.25, 4),
+ breaks = c(0.25, 0.5, 1, 2, 4),
+ labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
+ theme(axis.text.y = element_blank(), axis.title.y = element_blank())
>
> forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
Error in `$<-.data.frame`(`*tmp*`, Predictor, value = integer(0)) :
replacement has 0 rows, data has 19
> forestdf$colour <- rep(c("white", "gray95"), length.out=19)
>
> data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
+ geom_hline(aes(yintercept = 1, colour = colour), size = 7) +
+ geom_text(aes(x = 0, label = Predictor), hjust = 0) +
+ geom_text(aes(x = 5, label = N)) +
+ geom_text(aes(x = 7, label = arr), hjust = 1) +
+ scale_colour_identity() +
+ theme_void() +
+ theme(plot.margin = margin(5, 0, 35, 0))
>
> grid.arrange(data_table,p, ncol = 2)
Error in FUN(X[[i]], ...) : object 'Predictor' not found
I greatly appreciate any help or suggestions you may provide.
Thanks!
EDIT:
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf<- read.csv("UnivariateAnalysis2.csv",header=TRUE)
names(forestdf)[1]<-"Predictor"
forestdf$Predictor <- factor(forestdf$Predictor)
forestdf$colour <- rep(c("white", "gray95"), length.out = length(unique(unlist(forestdf[c("Predictor")]))))
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3, colour = "red") +
xlab("Hazard Ratio") +
ylab("Hazard Ratio with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = rev(forestdf$Predictor)) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
data_table <- ggplot(data = forestdf, aes(y = Predictor)) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_text(aes(x = 0, label = Predictor), hjust = 0) +
geom_text(aes(x = 3, label = N)) +
geom_text(aes(x = 7, label = arr), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
I have made some changes as per IRTFM (thank you!) and it now produces a plot and table. I'm not sure why but it wasn't reading the csv correctly. My main issues now are the following:
- The alternating grey and white bars do not alternate correctly on the table side
- The header for the columns does not show up on the table
- The table is not aligned with the forestplot (ie. top row's forest plot is not the correct forest plot for Albumin) Example Plot
EDIT2:
I was able to fix the alternating colours and alignment with the forestplot. My issue now is that the column titles I've made are now cut off: New Plot. Also, how would I go about only bolding the values with an asterisk?
###dataframe
library(ggplot2)
library(tidyr)
library(grid)
library(gridExtra)
library(forcats)
forestdf<- read.csv("UnivariateAnalysis2.csv",header=TRUE)
names(forestdf)[1]<-"Predictor"
forestdf$Predictor <- rev(factor(forestdf$Predictor))
forestdf$colour <- rep(c("white", "gray95"), length.out = length(unique(unlist(forestdf[c("Predictor")]))))
p <- ggplot(forestdf, aes(x = rr, y = Predictor, xmin = rrlow, xmax = rrhigh)) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_pointrange(shape = 22, fill = "black") +
geom_vline(xintercept = 1, linetype = 3, colour = "red") +
xlab("Hazard Ratio") +
ylab("Hazard Ratio with 95% Confidence Interval") +
theme_classic() +
scale_colour_identity() +
scale_y_discrete(limits = forestdf$Predictor) +
scale_x_log10(limits = c(0.25, 4),
breaks = c(0.25, 0.5, 1, 2, 4),
labels = c("0.25", "0.5", "1", "2", "4"), expand = c(0,0)) +
theme(axis.text.y = element_blank(), axis.title.y = element_blank())
#forestdf$Predictor <- factor(forestdf$Predictor, rev(levels(forestdf$Predictor)))
data_table <- ggplot(data = forestdf, aes(y = rev(factor(Predictor)))) +
geom_hline(aes(yintercept = Predictor, colour = colour), size = 7) +
geom_text(aes(x = 0, label = Predictor), show.legend=TRUE, hjust = 0) +
geom_text(aes(x = 3, label = N)) +
geom_text(aes(x = 5.5, label = arr), hjust = 1) +
geom_text(aes(x = 7, label = PVALUE), hjust = 1) +
geom_text(aes(x = 0, y = 20, label = "Predictor"), hjust = 0) +
geom_text(aes(x = 3, y= 20, label = "N")) +
geom_text(aes(x = 5, y= 20, label = "95% CI"), hjust = 1) +
geom_text(aes(x = 7, y= 20, label = "P Value"), hjust = 1) +
scale_colour_identity() +
theme_void() +
theme(plot.margin = margin(5, 0, 35, 0))
grid.arrange(data_table,p, ncol = 2)
Thanks!