4

I want to reproduce the following drc::plot.drc graphs with ggplot2.

enter image description here

df1 <-
      structure(list(TempV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L, 13L, 13L, 13L, 
    13L, 13L, 13L, 13L, 13L, 13L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
    11L, 11L, 11L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 14L, 14L, 14L, 14L, 14L, 14L, 
    14L, 14L, 14L, 14L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
    12L), .Label = c("22.46FH-142", "27.59FH-142", "26.41FH-142", 
    "29.71FH-142", "31.66FH-142", "34.11FH-142", "33.22FH-142", "22.46FH-942", 
    "27.59FH-942", "26.41FH-942", "29.71FH-942", "31.66FH-942", "34.11FH-942", 
    "33.22FH-942"), class = "factor"), Start = c(0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 
    192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 
    0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 
    48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 
    192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 
    0L, 24L, 48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 
    48L, 72L, 96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 
    96L, 120L, 144L, 168L, 192L, 216L, 0L, 24L, 48L, 72L, 96L, 120L, 
    144L, 168L, 192L, 216L), End = c(24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 
    24, 48, 72, 96, 120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 
    120, 144, 168, 192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 
    192, 216, Inf, 24, 48, 72, 96, 120, 144, 168, 192, 216, Inf), 
        Germinated = c(0L, 0L, 0L, 0L, 3L, 67L, 46L, 12L, 101L, 221L, 
        0L, 0L, 0L, 0L, 57L, 50L, 44L, 31L, 32L, 236L, 0L, 0L, 0L, 
        31L, 68L, 50L, 31L, 34L, 29L, 207L, 0L, 0L, 8L, 30L, 31L, 
        55L, 27L, 22L, 4L, 273L, 0L, 0L, 46L, 64L, 16L, 8L, 15L, 
        15L, 20L, 266L, 0L, 0L, 0L, 0L, 4L, 13L, 63L, 51L, 147L, 
        172L, 0L, 0L, 4L, 26L, 92L, 31L, 91L, 14L, 7L, 185L, 0L, 
        0L, 0L, 0L, 0L, 32L, 59L, 36L, 50L, 273L, 0L, 0L, 0L, 4L, 
        13L, 32L, 42L, 52L, 42L, 265L, 0L, 0L, 0L, 6L, 22L, 40L, 
        57L, 44L, 73L, 208L, 0L, 1L, 2L, 24L, 55L, 41L, 68L, 24L, 
        33L, 202L, 0L, 0L, 18L, 31L, 26L, 30L, 61L, 25L, 58L, 201L, 
        0L, 0L, 36L, 54L, 33L, 55L, 12L, 27L, 55L, 178L, 0L, 0L, 
        6L, 28L, 26L, 31L, 53L, 48L, 33L, 225L)), .Names = c("TempV", 
    "Start", "End", "Germinated"), row.names = c(NA, -140L), class = "data.frame")

library(data.table)

dt1 <- data.table(df1)

library(drc)

dt1fm1 <- 
  drm(
        formula   = Germinated ~ Start + End
      , curveid   = TempV
  #   , pmodels   = 
  #   , weights   = 
      , data      = dt1
  #   , subset    = 
      , fct       = LL.2()
      , type      = "event"
      , bcVal     = NULL
      , bcAdd     = 0
  #   , start     =
      , na.action = na.fail
      , robust    = "mean"
      , logDose   = NULL
      , control   = drmc(
                            constr      = FALSE
                            , errorm      = TRUE
                            , maxIt       = 1500
                            , method      = "BFGS"
                            , noMessage   = FALSE
                            , relTol      = 1e-07
                            , rmNA        = FALSE
                            , useD        = FALSE
                            , trace       = FALSE
                            , otrace      = FALSE
                            , warnVal     = -1
                            , dscaleThres = 1e-15
                            , rscaleThres = 1e-15
                            )
      , lowerl    = NULL
      , upperl    = NULL
      , separate  = FALSE
      , pshifts   = NULL 
      )



## ----dt1fm1Plot1----
plot(
        x      = dt1fm1
    , xlab     = "Time (Hours)"
    , ylab     = "Proportion Germinated (\\%)"    
  # , ylab     = "Proportion Germinated (%)"    
    , add      = FALSE
    , level    = NULL
    , type     = "average" # c("average", "all", "bars", "none", "obs", "confidence")
    , broken   = FALSE
  # , bp
    , bcontrol = NULL
    , conName  = NULL
    , axes     = TRUE
    , gridsize = 100
    , log      = ""
  # , xtsty
    , xttrim   = TRUE
    , xt       = NULL
    , xtField    = NULL
    , xField     = "Time (Hours)"
    , xlim     = c(0, 200)
    , yt       = NULL
    , ytField    = NULL
    , yField     = "Proportion Germinated"
    , ylim     = c(0, 1.05)
    , lwd      = 1
    , cex      = 1.2
    , cex.axis = 1
    , col      = TRUE
  # , lty
  # , pch
    , legend     = TRUE
  # , legendText  
    , legendPos  = c(40, 1.1)
    , cex.legend = 0.6
    , normal     = FALSE
    , normRef    = 1
    , confidence.level = 0.95
    )


## ----dt1fm1Plot2----
dt1fm1Means1 <- dt1[, .(Germinated=mean(Germinated)/450), by=.(TempV, Start, End)]
dt1fm1Means2 <- dt1fm1Means1[, .(Start=Start, End=End, Cum_Germinated=cumsum(Germinated)), by=.(TempV)]
dt1fm1Means  <- data.table(dt1fm1Means2[End!=Inf], Pred=predict(object=dt1fm1))

dt1fm1Plot2 <- 
       ggplot(data= dt1fm1Means, mapping=aes(x=End, y=Cum_Germinated, group=TempV, color=TempV, shape=TempV)) + 
        geom_point() +
        geom_line(aes(y = Pred)) +
        scale_shape_manual(values=seq(0, 13)) +
        labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
        theme_bw() +
        scale_x_continuous(expand = c(0, 0), breaks = c(0, unique(dt1fm1Means$End))) +
        scale_y_continuous(expand = c(0, 0), labels = function(x) paste0(100*x,"\\%")) +
      # scale_y_continuous(expand = c(0, 0), labels = percent) +
        expand_limits(x = c(0, max(dt1fm1Means$End)+20), y = c(0, max(dt1fm1Means$Pred)+0.1)) +
        theme(axis.title.x = element_text(size = 12, hjust = 0.54, vjust = 0),
              axis.title.y = element_text(size = 12, angle = 90,  vjust = 0.25))
print(dt1fm1Plot2)

enter image description here

Question

There are few discrepancies in ggplot2 output. These discrepancies occur because the predict function gives output in different pattern than the given levels in the data.

Edited

Actually drm function changed the order of levels of TempV and this is clear from summary(dt1fm1) output and the graph of drc::plot.drc output.

halfer
  • 19,824
  • 17
  • 99
  • 186
MYaseen208
  • 22,666
  • 37
  • 165
  • 309
  • I've had a similar question before: http://stackoverflow.com/questions/36780357/plotting-dose-response-curves-with-ggplot2-and-drc/37043751#37043751. In brief, I was able to use the stat_smooth function to plot a drm model directly but for some reason this behaviour does not repeat on all machines I tested. The other option is to add predicted values to your data set and plot these. – biomiha Jul 10 '16 at 15:36
  • biomiha: With the help of @dww, I figured out how to plot `drc` model with `ggplot2` for one curve (See [here](http://stackoverflow.com/a/38268934/707145)). However, I am struggling when `curveid = TempV` is being used. Any thoughts, please. – MYaseen208 Jul 10 '16 at 15:56
  • MYaseen208: What exactly is the issue with curveid? Is it just the order of the legend items? You can change that by transforming your TempV values as factors and defining the levels (http://stackoverflow.com/questions/6919025/how-to-assign-colors-to-categorical-variables-in-ggplot2-that-have-stable-mappin) – biomiha Jul 11 '16 at 10:11

1 Answers1

3

As noted in the question, there is an issue related to drm shuffling the order of factor levels. Un-shuffling this mess proved more tricky than I expected.

In the end I approached this by calling the drm function once per factor level to build up a table of results one factor level at a time.

Doing it this long-winded way uncovered the fact that your 1st plot from plot.drc and the ggplot version are both incorrect.

Let's start by wrapping your function call to drm() inside another wrapper function, to facilitate calling it repeatedly for each trace:

drcmod <- function(dt1){
  drm(formula   = Germinated ~ Start + End
    , curveid   = TempV
    , data      = dt1
    , fct       = LL.2()
    , type      = "event"
    , bcVal     = NULL
    , bcAdd     = 0
    , na.action = na.fail
    , robust    = "mean"
    , logDose   = NULL
    , control   = drmc(
      constr      = FALSE
      , errorm      = TRUE
      , maxIt       = 1500
      , method      = "BFGS"
      , noMessage   = FALSE
      , relTol      = 1e-07
      , rmNA        = FALSE
      , useD        = FALSE
      , trace       = FALSE
      , otrace      = FALSE
      , warnVal     = -1
      , dscaleThres = 1e-15
      , rscaleThres = 1e-15
    )
    , lowerl    = NULL
    , upperl    = NULL
    , separate  = FALSE
    , pshifts   = NULL 
  )
}

Now we can use this wrapper to fit the drc model to each factor level in turn:

dt2 <- data.table()
for (i in 1:nlevels(dt1$TempV)) {
  dt <- dt1[TempV==levels(TempV)[i]]
  dt[, TempV:=as.character(TempV)]
  dt[, Germ_frac := mean(Germinated)/450, by=.(Start)]
  dt[, cum_Germinated := cumsum(Germ_frac)]
  dt[, Pred := c(predict(object=drcmod(dt)), NA)] 
  dt2 <- rbind(dt2, dt)
}

and plot:

ggplot(dt2[End != Inf], aes(x=End, y=cum_Germinated, group=TempV, color=TempV, shape=TempV)) + 
  geom_point() +
  geom_line(aes(y = Pred)) +
  scale_shape_manual(values=seq(0, 13)) +
  labs(x = "Time (Hours)", y = "Proportion Germinated", shape="Temp", color="Temp") +
  theme_bw()

enter image description here

Edit

If we run the original code in the question using a subset of the data with fewer factor levels, for example using

dt1 <- dt1[TempV %in% levels(TempV)[1:5],]
dt1 <- droplevels(dt1)

all the plots (the 2 versions in OP, and the version in this answer) give the same result. The discrepancies only seem to arise when a large number of factor levels are used. The fact that both the ggplot and the plot.drc in OP give incorrect matching of traces to factor levels indicates that the problem is most likely to be in the drm() function, rather than in plot.drc.

dww
  • 30,425
  • 5
  • 68
  • 111
  • Thanks @dww for your time and effort. What I understood from your answer that `plot.drc` bug, if that is true please report it. Thanks – MYaseen208 Sep 01 '16 at 09:52
  • @myaseen208 See my edit to this answer. After looking again, it appears that the problem seems to be in the drm function rather than in plot.drc. and it only seems to arise when a large number of factor levels are used. My recommendation is to run the drm model one trace at a time, as i do in my answer, until this can be fixed by the package maintainers. I suggest also that you raise this with the package maintainers, and point them towards this SO page. in the mean time, i think my work-around should work for you. – dww Sep 01 '16 at 16:00
  • Thanks @dww for your efforts. I have written an email to the maintainer of drc package and looking forward to his notice and reply. Thanks – MYaseen208 Sep 01 '16 at 16:20