0

Question: how to force geom_density to start in (0,0)?

I have produced this plot:

enter image description here

Using this code (inputs on how to improve the script, are welcome):

ki60low <- subset(p, p$ki67in==0 & p$recurrence==1)
ki60in <- subset(p, p$ki67in==1 & p$recurrence==1)
ki60high <- subset(p, p$ki67in==2 & p$recurrence==1)

ki60low$time.recur.months1 <- ki60low$time.recur.months/12
ki60in$time.recur.months1 <- ki60in$time.recur.months/12
ki60high$time.recur.months1 <- ki60high$time.recur.months/12

theme <- theme(axis.line = element_line(colour = "black"),
               panel.grid.major = element_line(colour = "gray98"),
               panel.grid.minor = element_line(colour = "gray98"),
               panel.border = element_blank(),
               panel.background = element_blank())

ggplot()  + theme +
  scale_x_continuous(name="Years to recurrence", breaks=c(0,1,2,3,4,5,6,7,8,9,10,11), labels=c("0","1","2","3","4","5","6","7","8","9","10","11"), limits=c(-1,11)) +
  scale_y_continuous(name="Number of recurrences", limits=c(0, 6), seq(0,6,by=1)) +

  geom_bar(aes(x=ki60low$time.recur.months1), colour="#1C73C2", fill="#1C73C2", alpha=0.2)  + 
  geom_bar(aes(x=ki60in$time.recur.months1), colour="red", fill="red", alpha=0.2) +
  geom_bar(aes(x=ki60high$time.recur.months1), colour="black", fill="black", alpha=0.7) +

  geom_density(aes(x=ki60low$time.recur.months1, y=..count..), colour="#1C73C2", fill="#1C73C2", alpha=0.1)  + 
  geom_density(aes(x=ki60in$time.recur.months1, y=..count..), colour="red", fill="red", alpha=0.1) +
  geom_density(aes(x=ki60high$time.recur.months1, y=..count..), colour="black", fill="black", alpha=0.18) +

  annotate("label", x = 8.28, y = 5.5, label = "Ki-67 proliferative index: 0 - 4%", label.size = 0.5, cex=8, colour="#1C73C2") +
  annotate("label", x = 8.28, y = 4.5, label = "Ki-67 proliferative index: 5 - 9%", label.size = 0.5, cex=8, colour="red") +
  annotate("label", x = 8.28, y = 3.5, label = "Ki-67 proliferative index: \u226510% ", label.size = 0.5, cex=8, colour="black") +

  theme(axis.text.x = element_text(color = "grey20", size = 12), axis.title.x = element_text(color = "grey20", size = 14, face="bold", margin=margin(t=12))) +
  theme(axis.text.y = element_text(color = "grey20", size = 11), axis.title.y = element_text(color = "grey20", size = 14, face="bold", margin=margin(r=12))) +
  theme(legend.text=element_text(size=12)) + theme(legend.title=element_text(size=14)) 

I would like the geom_density to start in (0,0). How can this be done? I found this, which did not help.

My data p

p <- structure(list(ki67in = c(0L, 2L, 0L, 0L, 1L, 0L, 2L, 2L, 1L, 
0L, 1L, 2L, 0L, 2L, 0L, 1L, 1L, 1L, 0L, 2L, 2L, 0L, 1L, 1L, 0L, 
0L, 0L, 1L, 0L, 1L, 2L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 2L, 
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 1L, 0L, 
0L, 1L, 0L, 0L, 1L, 2L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 
0L, 0L, 1L, 1L, 2L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 
1L, 0L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 2L, 
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L), time.recur.months = c(0.75, 0.6, 4.665297741, 
0.1, 0.75, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 6, 7, 8, 8, 
8, 9, 11, 12, 13, 13, 15, 15, 15, 16, 17, 17, 18, 27, 28, 29, 
30, 33, 34, 35, 37, 37, 38, 39, 40, 41, 45, 49, 49, 50, 52, 53, 
54, 56, 56, 56, 56, 57, 58, 58, 60, 60, 60, 60, 61, 62, 63, 64, 
66, 67, 67, 72, 72, 74, 78, 80, 80, 80, 81, 82, 83, 83, 84, 84, 
85, 85, 86, 86, 88, 88, 88, 88, 89, 89, 89, 90, 90, 91, 91, 92, 
92, 92, 92, 93, 93, 93, 93, 93, 93, 94, 97, 98, 98, 99, 99, 99, 
100, 101, 101, 101, 103, 103, 103, 103, 104, 104, 106, 106, 109, 
110, 111, 111, 112, 114, 114, 115, 116, 117, 118, 118, 118, 119, 
120, 120, 120, 120, 120, 120, 121, 121, 123, 124, 124, 125, 125, 
125, 125), recurrence = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 
0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 
1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 
0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 1L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(1L, 
2L, 3L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 18L, 19L, 20L, 
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 44L, 45L, 46L, 47L, 48L, 
49L, 50L, 51L, 52L, 53L, 54L, 55L, 57L, 59L, 60L, 61L, 62L, 63L, 
64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 
77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 87L, 89L, 90L, 91L, 
92L, 93L, 94L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 103L, 104L, 
105L, 106L, 107L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 
117L, 118L, 119L, 120L, 121L, 123L, 124L, 125L, 126L, 127L, 128L, 
130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 
141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L, 
152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L, 
163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 
174L, 175L))
cmirian
  • 2,572
  • 3
  • 19
  • 59
  • 1
    I am bit confused, you want to start your plot at 0,0, but your limit is -1. So what does the graph you want look like? Do you want the data moved so it starts at 0, 0 or do you want a plot that is exactly like this but just not showing everything before 0? Also what is it about the expand that doesn't work? Is it simply not giving the result you want (removing the space between the axis and the plot) or is it not doing anything at all? – Annet Feb 23 '20 at 19:59
  • Hi @Annet. Thank you for replying. My apologies if my question was a bit unclear. To clarify, I want the plot not to show anything before 0. – cmirian Feb 23 '20 at 20:09
  • 1
    So why setting your xlimit to -1 rather than 0? Why didn't chancing it to 0 provide the desired result? To be specific I mean scale_x_continuous(limits = c(0,11)) – Annet Feb 23 '20 at 20:16

1 Answers1

4

A few changes will allow you to generate your plot with much less code:

1) Use a single data frame with a category column that will be mapped to the colour and fill aesthetics, rather than separate data frames. Then you need to call each geom only once.

2) Generate the legend directly from the data, rather than with annotation.

3) Tweak an existing plot theme (theme_classic() in this case).

4) When setting breaks with scale_x/y_continuous(), you can often take advantage of various short cuts. For example: 0:11 instead of c(0,1,2,3,4,5,6,7,8,9,10,11). Also, if the labels are the same as the break values, then there's no need to add a labels argument.

In addition: I've switched from geom_bar to geom_rug, but you can of course go with a bar plot if you wish. For the rug plot, I've jittered the markers a bit so that points with the same x value will all be shown, rather than plotted on top of each other. Also, with ggplot, use bare column names rather than restating the data frame name. For example, in your original code, aes(time.recur.months1, ...) rather than aes(ki60low$time.recur.months1, ...).

For the axis limits, I'm not sure how you want them to look. You've set the x-axis limits at c(-1,11). To start at zero, there are two choices:

  • scale_x_continuous(limits=c(0,11)) will exclude data outside that range when calculating the density estimate.

  • coord_cartesian(xlim=c(0,11)) will include all data in the density estimate, even if it's outside the xlim range.

In either case, ggplot by default adds some padding before and after the axis limits. If you want less or no padding, use the expand argument in scale_x/y_continuous.

library(tidyverse)
library(ggstance)

p.for.plot = p %>% 
  filter(recurrence==1) %>% 
  arrange(ki67in) %>% 
  mutate(time.recur.years=time.recur.months/12,
         ki67in=recode(ki67in, 
                       "0"="Ki-67 proliferative index: 0 - 4%",
                       "1"="Ki-67 proliferative index: 5 - 9%",
                       "2"="Ki-67 proliferative index: \u226510%"),
         ki67in=factor(ki67in, levels=unique(ki67in)))

cols = c("#1C73C2", "red", "black")

ggplot(p.for.plot, aes(time.recur.years, colour=ki67in, fill=ki67in)) +
  geom_density(aes(y=..count..), alpha=0.2) + 
  #geom_bar(alpha=0.7) +
  geom_rug(aes(y=0), position=position_jitter(width=0.05, height=0), 
           length=unit(0.05, "npc"), show.legend=FALSE) +
  coord_cartesian(xlim=c(0,11)) +
  scale_x_continuous(name="Years to recurrence", breaks=0:11, expand=c(0,0)) +
  scale_y_continuous(name="Number of recurrences", limits=c(0, 6), breaks=0:6, expand=c(0,0)) +
  scale_colour_manual(values=cols) +
  scale_fill_manual(values=cols) + 
  labs(colour="", fill="") +
  theme_classic() +
  theme(panel.grid.major = element_line(colour = "gray98"),
        panel.grid.minor = element_line(colour = "gray98"),
        legend.position=c(0.7,0.8))

enter image description here

eipi10
  • 91,525
  • 24
  • 209
  • 285
  • Thanks man. That sure did the ting. Perhaps, one thing, it seem that the order of the Ki-67 groups is wrong. Can you change the order so that it goes (0-4%, 5-9%, >10%)? – cmirian Feb 23 '20 at 21:01
  • 1
    Sorry, I missed that. See updated code. I now set the order when generating the `p.for.plot` data frame. – eipi10 Feb 23 '20 at 21:06