1

I've been trying to create a line plot where the x-axis consists of dates and y is a continuous variable. Right now, the x-axis labels just show the years, but I want labels specifically for July 2017, July 2018, July 2019, ..., July 2022, ... I tried to use the following code to specify ticks but the output does not give me what I want. Is there any way I can achieve my desired output?

p <- plot_ly(mode = 'lines', hoverinfo = "x+y")
p <- p %>%
  add_trace(name = "Cumulative enrollment", type = "scatter", x = test2$month, y = test2$cumsum, color = I("blue")) %>%
  add_lines(x = pred$month, y = pred$cumsum, color = I("red"), linetype = I("dash"), name = 'Projected enrollment \n (19 subjects/month)') %>%
  layout(xaxis = list(autotick = F, tickmode = "array", tickvals = c(as.Date('2017-07-01'), as.Date('2018-07-01'), as.Date('2019-07-01'), as.Date('2020-07-01'), as.Date('2021-07-01'), as.Date('2022-07-01'))),
         yaxis = list(title = "Number of Subjects (Cumulative)", tickmode = "array", tickvals = c(0, 200, 400, 600, 800, 1000, 1109, 1200)), 
         shapes = list(hline(1109)), 
         legend = list(orientation = 'h', xanchor = "center", x = 0.5),
         annotations = list(list(
           y = 1150,
           xref = 'paper',
           yref = 'y',
           text = 'Required number of subjects',
           showarrow = FALSE
         )))
p

Data:

> dput(test2)
structure(list(month = structure(c(17348, 17379, 17410, 17440, 
17471, 17501, 17532, 17563, 17591, 17622, 17652, 17683, 17713, 
17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
18017, 18048, 18078, 18109, 18140, 18170, 18201, 18231, 18262, 
18293, 18322, 18353), class = "Date"), count = c(1L, 2L, 13L, 
10L, 22L, 11L, 18L, 20L, 24L, 16L, 28L, 18L, 16L, 30L, 18L, 21L, 
21L, 21L, 22L, 17L, 24L, 30L, 24L, 17L, 38L, 24L, 23L, 21L, 20L, 
10L, 24L, 16L, 17L, 1L), cumsum = c(1L, 3L, 16L, 26L, 48L, 59L, 
77L, 97L, 121L, 137L, 165L, 183L, 199L, 229L, 247L, 268L, 289L, 
310L, 332L, 349L, 373L, 403L, 427L, 444L, 482L, 506L, 529L, 550L, 
570L, 580L, 604L, 620L, 637L, 638L)), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -34L))

> dput(pred)
structure(list(month = structure(c(18383, 18414, 18444, 18475, 
18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718, 18748, 
18779, 18809, 18840, 18871, 18901, 18932, 18962, 18993, 19024, 
19052, 19083, 19113, 19144, 19174, 19205, 19236, 19266, 19297, 
19327), class = "Date"), count = c(19, 19, 19, 19, 19, 19, 19, 
19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 
19, 19, 19, 19, 19, 19, 19, 19, 19), cumsum = c(657, 676, 695, 
714, 733, 752, 771, 790, 809, 828, 847, 866, 885, 904, 923, 942, 
961, 980, 999, 1018, 1037, 1056, 1075, 1094, 1113, 1132, 1151, 
1170, 1189, 1208, 1227, 1246)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -32L))

Output: enter image description here

user122514
  • 397
  • 5
  • 13

2 Answers2

1

It's a little tricky. You'll need to create a dummy index and assign a ticktext to it. Usually I'd prefere row-binding both datasets befor passing them to plot_ly but I didn't want to change your approach too much.

library(plotly)

test2 <- structure(list(month = structure(c(17348, 17379, 17410, 17440, 
                                            17471, 17501, 17532, 17563, 17591, 17622, 17652, 17683, 17713, 
                                            17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
                                            18017, 18048, 18078, 18109, 18140, 18170, 18201, 18231, 18262, 
                                            18293, 18322, 18353), class = "Date"), count = c(1L, 2L, 13L, 
                                                                                             10L, 22L, 11L, 18L, 20L, 24L, 16L, 28L, 18L, 16L, 30L, 18L, 21L, 
                                                                                             21L, 21L, 22L, 17L, 24L, 30L, 24L, 17L, 38L, 24L, 23L, 21L, 20L, 
                                                                                             10L, 24L, 16L, 17L, 1L), cumsum = c(1L, 3L, 16L, 26L, 48L, 59L, 
                                                                                                                                 77L, 97L, 121L, 137L, 165L, 183L, 199L, 229L, 247L, 268L, 289L, 
                                                                                                                                 310L, 332L, 349L, 373L, 403L, 427L, 444L, 482L, 506L, 529L, 550L, 
                                                                                                                                 570L, 580L, 604L, 620L, 637L, 638L)), class = c("tbl_df", "tbl", 
                                                                                                                                                                                 "data.frame"), row.names = c(NA, -34L))
pred <- structure(list(month = structure(c(18383, 18414, 18444, 18475, 
                                           18506, 18536, 18567, 18597, 18628, 18659, 18687, 18718, 18748, 
                                           18779, 18809, 18840, 18871, 18901, 18932, 18962, 18993, 19024, 
                                           19052, 19083, 19113, 19144, 19174, 19205, 19236, 19266, 19297, 
                                           19327), class = "Date"), count = c(19, 19, 19, 19, 19, 19, 19, 
                                                                              19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 
                                                                              19, 19, 19, 19, 19, 19, 19, 19, 19), cumsum = c(657, 676, 695, 
                                                                                                                              714, 733, 752, 771, 790, 809, 828, 847, 866, 885, 904, 923, 942, 
                                                                                                                              961, 980, 999, 1018, 1037, 1056, 1075, 1094, 1113, 1132, 1151, 
                                                                                                                              1170, 1189, 1208, 1227, 1246)), class = c("tbl_df", "tbl", "data.frame"
                                                                                                                              ), row.names = c(NA, -32L))



ticks <- c(as.Date('2017-07-01'), as.Date('2018-07-01'), as.Date('2019-07-01'), as.Date('2020-07-01'), as.Date('2021-07-01'), as.Date('2022-07-01'))
tickDF <- data.frame(index = seq_along(c(test2$month, pred$month)), timestamp = c(test2$month, pred$month))
filteredTicks <- tickDF[tickDF$timestamp %in% ticks,]

hline <- function(y = 0, color = "blue") {
  list(
    type = "line", 
    x0 = 0, 
    x1 = 1, 
    xref = "paper",
    y0 = y, 
    y1 = y, 
    line = list(color = color)
  )
}

p <- plot_ly(mode = 'lines')
p <- p %>%
  add_trace(name = "Cumulative enrollment", type = "scatter", x = seq_along(test2$month), y = test2$cumsum, color = I("blue"), hoverinfo = 'text',
            text = paste('</br> X: ', as.character(format(test2$month, "%B %Y")),
                         '</br> Y: ', test2$cumsum)) %>%
  add_lines(x = seq_along(pred$month)+length(test2$month), y = pred$cumsum, color = I("red"), linetype = I("dash"), name = 'Projected enrollment \n (19 subjects/month)', hoverinfo = 'text',
            text = paste('</br> X: ', as.character(format(pred$month, "%B %Y")),
                         '</br> Y: ', pred$cumsum)) %>%
  layout(xaxis = list(autotick = F, tickmode = "array", tickvals = filteredTicks$index, ticktext = as.character(format(filteredTicks$timestamp, "%B %Y"))),
         yaxis = list(title = "Number of Subjects (Cumulative)", tickmode = "array", tickvals = c(0, 200, 400, 600, 800, 1000, 1109, 1200)), 
         shapes = list(hline(1109)), 
         legend = list(orientation = 'h', xanchor = "center", x = 0.5),
         annotations = list(list(
           y = 1150,
           xref = 'paper',
           yref = 'y',
           text = 'Required number of subjects',
           showarrow = FALSE
         )))
p

The hline function was taken from here. Regarding cutom hoverinfos please check this article.

ismirsehregal
  • 30,045
  • 5
  • 31
  • 78
  • Hi! Thanks so much for your reply. I ran the code you provided, and I think it's "too much." Right now, your code labels every single month, starting from July 2017 to December 2022. However, I only want to label (have a tick) for July 2017, July 2018, July 2019, July 2020, July 2021, July 2022, so 6 marks. Is that possible? Perhaps I should've been clearer - sorry about that! – user122514 Apr 14 '20 at 21:20
  • This is perfect - exactly what I wanted. Thanks so much! – user122514 Apr 14 '20 at 21:29
  • Hi, sorry to ask again, but is it possible to make sure that the hover info displays the actual month and year? Right now, I think it's displaying the dummy values. Thank you! – user122514 Apr 14 '20 at 21:34
  • sure, please check my answer [here](https://stackoverflow.com/questions/60066748/how-can-i-plotly-a-ggplot-treemap/60146733#60146733) for custom hoverinfos. – ismirsehregal Apr 14 '20 at 21:37
  • Hello, I added the following based on your response: `p <- plot_ly(mode = 'lines', labels = ~month, parents = NA, values = ~cumsum, hovertemplate = "Date: %{label}
    Count: %{value}")`. However, I get the following warning message: 1: 'scatter' objects don't have these attributes: 'labels', 'parents', 'values'. I'm not sure what the equivalent attribute is for scatter plots. Thanks so much.
    – user122514 Apr 14 '20 at 21:43
  • Using `p <- plot_ly(mode = 'lines', x = ~month, y = ~cumsum, hovertemplate = "Date: %{x}
    Count: %{y}")` got me to a better place, but it's still showing the dummy values.
    – user122514 Apr 14 '20 at 21:47
  • No problem - will try to play around a bit, though I don't think my efforts will be fruitful. Thanks so much. – user122514 Apr 14 '20 at 22:03
  • This worked perfectly. I can't thank you enough for all the help! – user122514 Apr 15 '20 at 13:53
0

You setup up custom values from here

   plot_ly(df, x = x, y = y) %>%
    layout(xaxis = list(
        range = 
            c(as.numeric(as.POSIXct("2019-01-01", format="%Y-%m-%d"))*1000,
              as.numeric(as.POSIXct("2020-08-31", format="%Y-%m-%d"))*1000),
        type = "date"))
Transformer
  • 6,963
  • 2
  • 26
  • 52
  • Hi there! I edited what you provided: `xaxis = list(range = c(as.numeric(as.POSIXct("2017-07-01", format="%Y-%m-%d"))*1000, as.numeric(as.POSIXct("2022-07-01", format="%Y-%m-%d"))*1000), type = "date")` and for some reason, it's not making a difference :( – user122514 Apr 14 '20 at 20:58
  • remove this `xaxis = list(autotick = F, tickmode = "array", tickvals = c(as.Date('2017-07-01'), as.Date('2018-07-01'), as.Date('2019-07-01'), as.Date('2020-07-01'), as.Date('2021-07-01'), as.Date('2022-07-01'))` and break it down to something more simple. Try just one date or a simple range first. Make sure you map the dataframe correctly, here is a [link](https://stackoverflow.com/a/39870409/6085193) to help – Transformer Apr 14 '20 at 22:16