2

I am trying to plot a median time denoted ee$rfs per ee$Ki67, which is marker of many cells that proliferates in a tumor sample, ie. a continuous covariate too.

I have attached my data ee below. I am searching for a solution in either dplyr or ggplot. Obviously, I have sought for help, such as here, but without luck.

My current plot:

enter image description here

With the following

ggplot(ee, (aes(x=Ki67,y=rfs))) + 

  geom_point(aes(color=as.factor(WHO)),size=6,shape=20,alpha=0.5) +
  facet_wrap(.~EOR) 

I have tried variations of mutate, group_by, filter and geom_line. I tried geom_smooth but I am concerned that this draws the best fit (?) and not the median.

ee <- structure(list(rfs = c(26.4, 84, 42, 13.2, 18, 33.6, 39.6, 9.6, 
16.8, 19.2, 10.8, 7.2, 10.8, 76.8, 58.8, 31.2, 18, 182.4, 20.4, 
13.2, 8.4, 2.4, 123.6, 60, 100.8, 82.8, 12, 60, 18, 29.8, 68.3, 
27.2, 18.7, 64.9, 6.5, 50.3, 46.4, 29.9, 31.4, 42.7, 31.1, 98.1, 
80.9, 24.1, 49.2, 12.2, 20.5, 62.8, 9, 69, 30, 91.79, 8.57, 60.88, 
11.5, 56.87, 49.05, 16.95, 4.5, 8.74, 60.06, 37.85, 90.12, 123.76, 
47.41, 55.92, 3.09, 27.34, 4.99, 28.06, 26.71, 23.03, 6.34, 79.34, 
2.5, 19.32, 9.23, 2.6, 4.34, 45.9, 29.34, 8.58, 29.41, 30.72, 
15.97, 37.06, 17.05, 14.29, 5.95, 3.42, 60.58, 19.81, 72.91, 
16.99, 7.29, 74.32, 3.35, 39.95, 4.4, 15.44, 2.5, 28.32, 40.15, 
57.69, 27.86, 21.59, 10.09, 8.18, 21.59, 3.19, 3.12, 8.25, 14, 
14, 2, 23, 15, 9, 9, 28, 14, 23, 21, 26, 24, 63, 25, 34, 26.83333333, 
32.4, 28.76666667, 32.93333333, 32.16666667, 10.06666667, 46.66666667, 
58.06666667, 29.06666667, 30.33333333, 26.56666667, 24.23333333, 
36.5, 31.73333333, 5.733333333, 44.16666667, 46.93333333, 48.5, 
64.7, 37.16666667, 21.56666667, 14.8, 53.83333333, 59.06666667, 
8.7, 13.43333333, 12.56666667, 65.73333333, 54.83333333, 30.63333333, 
5, 65, 7, 12, 14, 6, 15, 36, 99, 16, 87, 6, 33, 3, 3, 11, 24, 
24, 15, 10, 28, 18, 14, 29, 20, 12, 42, 31, 14, 18, 29, 39, 62, 
62, 46), Ki67 = c(25, 15, 8, 15, 18, 5, 2, 18, 6, 12, 12, 13, 
13, 15, 20, 3, 30, 10, 18, 20, 7, 17, 5, 3, 20, 5, 20, 10, 2, 
5, 4, 7, 8, 12, 40, 17, 3, 5, 20, 5, 22, 6, 6, 18, 15, 12, 15, 
5, 15, 15, 3, 4, 10, 5, 2, 4, 3, 5, 7, 7, 4, 2, 4, 3, 20, 15, 
25, 20, 10, 15, 15, 8, 15, 8, 8, 10, 22, 18, 50, 30, 30, 45, 
50, 30, 8, 25, 25, 10, 25, 20, 15, 10, 8, 55, 10, 10, 10, 20, 
30, 5, 20, 8, 30, 10, 15, 25, 30, 38, 15, 30, 25, 15, 5, 8, 35, 
9, 14, 2, 1, 1, 20, 30, 2, 8, 2, 16, 20, 23, 4.5, 2.2, 9.43, 
8.95, 6.47, 1.81, 7.27, 12.4, 7.97, 21.99, 8.98, 17.3, 8, 15, 
15, 20, 6, 5, 12.5, 3, 20, 20, 11.5, 2.66, 14.7, 9.13, 5, 5, 
12, 11, 2, 8, 20, 50, 10, 15, 30, 8, 10, 20, 10, 10, 30, 10, 
10, 13, 10, 15, 10, 10, 40, 10, 5, 15, 15, 15, 25, 15, 30, 30, 
8, 30, 15, 20, 13), EOR = c(1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 
1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 
0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 
0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 
0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 
0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 
0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 
0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L), WHO = c(2L, 2L, 2L, 3L, 3L, 
2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 2L, 3L, 3L, 3L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 3L, 1L, 2L, 
1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 
2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA, 
-193L))
user213544
  • 2,046
  • 3
  • 22
  • 52
cmirian
  • 2,572
  • 3
  • 19
  • 59
  • Actually, i'm not really sure what median you want to plot exactly. Are you trying to get one median `rfs` value for each unique value of `Ki76`? What should the resulting plot look like exactly? – MrFlick Feb 20 '20 at 21:21
  • Are you simply trying to indicate the median value of rfs by WHO, Ki67, and EOR in the plot you provided? Or are you trying to plot only the median value? Or are you after something else entirely? – Giovanni Colitti Feb 20 '20 at 21:28

2 Answers2

2

If you just want to plot the median rfs value by WHO, Ki67, and EOR, then you can use group_by and summarise, and feed the summarised data to ggplot.

library(tidyverse)

ee %>%
  group_by(WHO, Ki67, EOR) %>%
  summarise(rfs = median(rfs)) %>%
  ggplot(aes(x = Ki67, y = rfs, color = factor(WHO))) +
  geom_point() +
  facet_wrap( ~ EOR)

Giovanni Colitti
  • 1,982
  • 11
  • 24
2

If you need to know how the median of rfs varies with Ki67 (instead of the mean in linear regression), you need to use a quantile regression.

Luckily this is implemented in ggplot2, I first make a plot without the facet to show that the regression line is very similar for the two facets:

library(quantreg)
library(ggplot2)

ggplot(ee,aes(x=Ki67,y=rfs)) + 
geom_point(aes(color=as.factor(WHO)),alpha=0.5) +
geom_quantile(aes(linetype=factor(EOR)),quantiles = 0.5,col="black")

enter image description here

And having the facet (you don't need to specify group):

ggplot(ee,aes(x=Ki67,y=rfs)) + 
geom_point(aes(color=as.factor(WHO)),alpha=0.5) +
geom_quantile(quantiles = 0.5,lty=8,col="black")+
facet_wrap(.~EOR)

enter image description here

Or you can derive the predictions per group, and plot:

func = function(df,X,id){
  fit = rq(rfs ~ Ki67,data=df)
  data.frame(Ki67=X,rfs=predict(fit,data.frame(Ki67=X)))
}

Xrange= 1:max(ee$Ki67)

pred = ee %>% group_by(EOR) %>% group_map(~func(.x,Xrange)) %>% bind_rows()
pred$EOR = rep(unique(ee$EOR),each=length(Xrange))

ggplot(ee,aes(x=Ki67,y=rfs)) + 
geom_point(aes(color=as.factor(WHO)),alpha=0.5) +
geom_line(data=pred)+
    facet_wrap(.~EOR)
StupidWolf
  • 45,075
  • 17
  • 40
  • 72