Solution Below
Original Question:
I'm attempting to create a drilldown plot in highcharter that uses boxplots.
The goal here is to start with boxplots on a monthly timescale. The process is as follows:
- Initial view - Boxplot with months as x-axis
- Clicking on a specific month drills down into a new series of boxplots where the x-axis is the weeks that make up the month selected
- Lastly, clicking on a week within the month selected then drills down into a new series of boxplots where the x-axis is the days that are specific to that week.
A couple things to note, I've modified the data_to_boxplot
function in two ways. First by adding the ability to show no. of observations in get_box_values
. Second, I've added a drilldown field in data_to_boxplot
that corresponds to the name of the series. Which is used in hc_drilldown
The Code:
library(purrr)
library(dplyr)
library(tidyr)
library(lubridate)
library(highcharter)
library(data.table)
# Helper functions
group_by_timescale = function(x,unit="day") {
if (unit=="month") {
lubridate::rollback(x, roll_to_first = TRUE)
} else if (unit=="week") {
floor_date(x, "week", week_start = 1)+6
} else if (unit=="day") {
x
}
}
get_box_values <- function(x) {
boxplot.stats(x)$stats %>%
t() %>%
cbind(boxplot.stats(x)$n) %>%
as.data.frame() %>%
setNames(c("low", "q1", "median", "q3", "high", "obs"))
}
get_outliers_values <- function(x) {
boxplot.stats(x)$out
}
# Modified highcharter function
data_to_boxplot_2 = function (data, variable, group_var = NULL, group_var2 = NULL, add_outliers = FALSE, ...) {
stopifnot(is.data.frame(data), !missing(variable))
# browser()
dx <- data %>%
transmute(`:=`(x, {
{
variable
}
}))
if (!missing(group_var)) {
dg <- data %>% select({
{
group_var
}
})
}
else {
dg <- data.frame(rep(0, nrow(dx)))
}
if (!missing(group_var2)) {
dg2 <- data %>% select({
{
group_var2
}
})
}
else {
dg2 <- data.frame(rep(NA, nrow(dx)))
}
dg <- dg %>% setNames("name")
dg2 <- dg2 %>% setNames("series")
dat <- bind_cols(dx, dg, dg2)
dat1 <- dat %>%
group_by(series, name) %>%
summarise(data = list(get_box_values(x)),.groups = "drop") %>%
unnest(cols = data) %>%
mutate(drilldown = name) %>% # add drilldown name to series
group_nest(series) %>%
mutate(data = map(data, list_parse)) %>%
rename(name = series) %>%
mutate(id = name) %>%
mutate(type = "boxplot", ...)
if (add_outliers) {
dat2 <- dat %>%
mutate(name = as.numeric(factor(name)) - 1) %>%
group_by(series, name) %>%
summarise(y = list(get_outliers_values(x)),.groups = "drop") %>%
unnest(cols = y) %>%
rename(x = name) %>%
group_nest(series) %>%
mutate(data = map(data, list_parse)) %>%
rename(linkedTo = series) %>%
mutate(type = "scatter", showInLegend = FALSE, ...)
dout <- bind_rows(dat1, dat2)
}
else {
dout <- dat1
}
dout
}
# Sample data
dates = sort(rep(seq.Date(from = as_date("2021-01-01"), to = as_date("2021-12-31"), by = "day"),15))
data = data.table(
day = dates,
values = floor(runif(length(dates), 0, 1000))
)
data[, `:=` (
weeks = group_by_timescale(day, "week"),
months = group_by_timescale(day, "month")
)]
# Create Boxplot series
month_dt = data_to_boxplot_2(data, variable = values, group_var = months, name = "month")
week_dt = data_to_boxplot_2(data, variable = values, group_var = weeks, group_var2 = months, name = "week")
day_dt = data_to_boxplot_2(data, variable = values, group_var = day, group_var2 = weeks, name = "day")
# Drilldown HC plot
hc <- highchart() %>%
hc_title(text = "Basic drilldown") %>%
hc_xAxis(type = "category") %>%
hc_legend(enabled = FALSE) %>%
hc_plotOptions(series = list(borderWidth = 0,dataLabels = list(enabled = TRUE))) %>%
hc_add_series_list(month_dt) %>%
hc_drilldown(allowPointDrilldown = TRUE,
series = list(week_dt, day_dt))
Below is an image of the plot output. The x-axis values, when clicked, should result in a new boxplot but nothing happens. I'm suspect that it has to do with how my data is grouped but not sure. Any help on this would be great! Thanks
EDIT: Solution
I'm still a bit new to R so my explanation may unintentionally omit details. After tinkering around, I've discovered that hc_drilldown(series)
expects an array of series configurations that are in the highcharts series option. When using the data_to_boxplot
function the output is a tibble with a nested list. So in order to add the drill down series to hc_drilldown. The tibble needs to be parsed to a list. Using highcharter::list_parse2
removes all names and I need to keep the name values in the series like "name", "id" etc.
I created a function to output a hc drilldown box plot for n number of drilldown series. In my example I ended up using rlist::list.parse(series) %>% setNames(NULL)
since that only removes the top level names in the list, but pretty sure highcharter::list_parse
will work as well now that I think about it (thanks @Kat). Then, all I needed to do was append the lists using c()
in hc_drilldown. One thing to note, the values in the drilldown columns used need to be unique i.e. have unique "ids" otherwise it's possible that clicking on the first series drilldown can bypass the middle levels and go directly to the most granular series. In my example below, clicking on Monthly "2021-08-01" will bypass the weeks in August and go to the day drilldown series.
New Working Code
library(purrr)
library(dplyr)
library(tidyr)
library(lubridate)
library(highcharter)
library(data.table)
# Helper functions
group_by_timescale = function(x,unit="day") {
if (unit=="month") {
lubridate::rollback(x, roll_to_first = TRUE)
} else if (unit=="week") {
floor_date(x, "week", week_start = 1)+6
} else if (unit=="day") {
x
}
}
get_box_values <- function(x) {
boxplot.stats(x)$stats %>%
t() %>%
cbind(boxplot.stats(x)$n) %>%
as.data.frame() %>%
setNames(c("low", "q1", "median", "q3", "high", "obs"))
}
get_outliers_values <- function(x) {
boxplot.stats(x)$out
}
# Modified HC function
data_to_boxplot_2 = function (data, variable, group_var = NULL, group_var2 = NULL,
drilldown = FALSE, add_outliers = FALSE, ...) {
stopifnot(is.data.frame(data), !missing(variable))
# browser()
dx <- data %>%
transmute(`:=`(x, {
{
variable
}
}))
if (!missing(group_var)) {
dg <- data %>% select({
{
group_var
}
})
}
else {
dg <- data.frame(rep(0, nrow(dx)))
}
if (!missing(group_var2)) {
dg2 <- data %>% select({
{
group_var2
}
})
}
else {
dg2 <- data.frame(rep(NA, nrow(dx)))
}
dg <- dg %>% setNames("name")
dg2 <- dg2 %>% setNames("series")
dat <- bind_cols(dx, dg, dg2)
dat1 <- dat %>%
group_by(series, name) %>%
summarise(data = list(get_box_values(x)),.groups = "drop") %>%
unnest(cols = data)
if(drilldown) {
dat1 <- dat1 %>%
mutate(drilldown = name)
}
dat1 <- dat1 %>%
group_nest(series) %>%
mutate(data = map(data, list_parse)) %>%
rename(name = series) %>%
mutate(id = name) %>%
mutate(type = "boxplot", ...)
if (add_outliers) {
dat2 <- dat %>%
mutate(name = as.numeric(factor(name)) - 1) %>%
group_by(series, name) %>%
summarise(y = list(get_outliers_values(x)),.groups = "drop") %>%
unnest(cols = y) %>%
rename(x = name) %>%
group_nest(series) %>%
mutate(data = map(data, list_parse)) %>%
rename(linkedTo = series) %>%
mutate(type = "scatter", showInLegend = FALSE, ...)
dout <- bind_rows(dat1, dat2)
}
else {
dout <- dat1
}
dout
}
# Sample data
dates = sort(rep(seq.Date(from = as_date("2021-01-01"), to = as_date("2021-12-31"), by = "day"),15))
data = data.table(
day = dates,
values = floor(runif(length(dates), 0, 1000))
)
data[, `:=` (
weeks = group_by_timescale(day, "week"),
months = group_by_timescale(day, "month")
)]
# vector indicating the relationship between each drilldown series
# the first position is the top level
groups = c("months", "weeks", "day")
# create hc drilldown boxplot
drilldown_boxplot = function(dt, var, dd_groups, parent_name = "Monthly") {
dd_size = length(dd_groups)
all_dd = list()
# create boxplot series lists
for (idx in 1:dd_size) {
if (idx == 1) {
all_dd[[idx]] = data_to_boxplot_2(dt, variable = get(var), group_var = dd_groups[idx],
drilldown = TRUE, name = parent_name)
} else if (idx == length(dd_groups)) {
all_dd[[idx]] = data_to_boxplot_2(dt, variable = get(var), group_var = dd_groups[idx],
group_var2 = dd_groups[idx-1])
} else {
all_dd[[idx]] = data_to_boxplot_2(dt, variable = get(var), group_var = dd_groups[idx],
group_var2 = dd_groups[idx-1], drilldown = TRUE)
}
}
parent_series = all_dd[[1]]
child_series = tail(all_dd, dd_size-1)
child_series_exp = c()
# parse lists to be readable in hc_drilldown
for (i in 1:length(child_series)) {
s = rlist::list.parse(child_series[[i]]) %>% setNames(NULL)
child_series_exp = c(child_series_exp, s)
}
# create hc drilldown boxplot
hc = highchart() %>%
hc_xAxis(type = "category") %>%
hc_legend(enabled = FALSE) %>%
hc_plotOptions(series = list(borderWidth = 0, dataLabels = list(enabled = TRUE))) %>%
hc_add_series_list(parent_series) %>%
hc_drilldown(allowPointDrilldown = TRUE,
series = child_series_exp)
return(hc)
}
drilldown_boxplot(data, "values", groups)