I have a dataset that I want to visualize overall and disaggregated by a few different variables. I created a flexdashboard with a toy shiny app to select the type of disaggregation, and working code to plot the correct subset.
My approach is repetitive, which is a hint to me that I'm missing out on a better way to do this. The piece that's tripping me up is the need to count by date and expand the matrix. I'm not sure how get group counts by week in one pipe. I do it in several steps and combine.
Thoughts?
(ps. I asked this question on RStudio Community, but I think it's probably more of a "SO question". I don't have permissions to delete it from RSC, so apologies for the cross-post.)
---
title: "test"
output:
flexdashboard::flex_dashboard:
theme: bootstrap
runtime: shiny
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(tibbletime)
library(dygraphs)
library(magrittr)
library(xts)
```
```{r global, include=FALSE}
set.seed(1)
dat <- data.frame(date = seq(as.Date("2018-01-01"),
as.Date("2018-06-30"),
"days"),
sex = sample(c("male", "female"), 181, replace=TRUE),
lang = sample(c("english", "spanish"), 181, replace=TRUE),
age = sample(20:35, 181, replace=TRUE))
dat <- sample_n(dat, 80)
```
Sidebar {.sidebar}
=====================================
```{r}
radioButtons("diss", label = "Disaggregation",
choices = list("All" = 1, "By Sex" = 2, "By Language" = 3),
selected = 1)
```
Page 1
=====================================
```{r}
# all
all <- reactive(
dat %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>% # convert to tibble time object
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total = 0))
)
# males only
males <- reactive(
dat %>%
filter(sex=="male") %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>%
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total_m = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total_m = 0))
)
# females only
females <- reactive(
dat %>%
filter(sex=="female") %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>%
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total_f = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total_f = 0))
)
# english only
english <- reactive(
dat %>%
filter(lang=="english") %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>%
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total_e = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total_e = 0))
)
# spanish only
spanish <- reactive(
dat %>%
filter(lang=="spanish") %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
as_tbl_time(index = date) %>%
select(date, new) %>%
collapse_by('1 week', side="start", clean=TRUE) %>%
group_by(date) %>%
mutate(total_s = sum(new, na.rm=TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(date = seq(date[1],
date[length(date)],
by = "1 week"),
fill = list(total_s = 0))
)
# combine
totals <- reactive({
all <- all()
females <- females()
males <- males()
english <- english()
spanish <- spanish()
all %>%
select(date, total) %>%
full_join(select(females, date, total_f), by = "date") %>%
full_join(select(males, date, total_m), by = "date") %>%
full_join(select(english, date, total_e), by = "date") %>%
full_join(select(spanish, date, total_s), by = "date")
})
# convert to xts
totals_ <- reactive({
totals <- totals()
xts(totals, order.by = totals$date)
})
# plot
renderDygraph({
totals_ <- totals_()
if (input$diss == 1) {
dygraph(totals_[, "total"],
main= "All") %>%
dySeries("total", label = "All") %>%
dyRangeSelector() %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
} else if (input$diss == 2) {
dygraph(totals_[, c("total_f", "total_m")],
main = "By sex") %>%
dyRangeSelector() %>%
dySeries("total_f", label = "Female") %>%
dySeries("total_m", label = "Male") %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
} else {
dygraph(totals_[, c("total_e", "total_s")],
main = "By language") %>%
dyRangeSelector() %>%
dySeries("total_e", label = "English") %>%
dySeries("total_s", label = "Spanish") %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
}
})
```
Update:
@Jon Spring suggested writing a function to reduce some repetition (applied below), which is a nice improvement. The basic approach is the same, however. Segment, calculate, combine, plot. Is there a way to do this without breaking apart and putting back together?
---
title: "test"
output:
flexdashboard::flex_dashboard:
theme: bootstrap
runtime: shiny
---
```{r setup, include=FALSE}
library(flexdashboard)
library(tidyverse)
library(tibbletime)
library(dygraphs)
library(magrittr)
library(xts)
```
```{r global, include=FALSE}
# generate data
set.seed(1)
dat <- data.frame(date = seq(as.Date("2018-01-01"),
as.Date("2018-06-30"),
"days"),
sex = sample(c("male", "female"), 181, replace=TRUE),
lang = sample(c("english", "spanish"), 181, replace=TRUE),
age = sample(20:35, 181, replace=TRUE))
dat <- sample_n(dat, 80)
# Jon Spring's function
prep_dat <- function(filtered_dat, col_name = "total") {
filtered_dat %>%
mutate(new = 1) %>%
arrange(date) %>%
# time series analysis
tibbletime::as_tbl_time(index = date) %>% # convert to tibble time object
select(date, new) %>%
tibbletime::collapse_by("1 week", side = "start", clean = TRUE) %>%
group_by(date) %>%
mutate(total = sum(new, na.rm = TRUE)) %>%
distinct(date, .keep_all = TRUE) %>%
ungroup() %>%
# expand matrix to include weeks without data
complete(
date = seq(date[1], date[length(date)], by = "1 week"),
fill = list(total = 0)
)
}
```
Sidebar {.sidebar}
=====================================
```{r}
radioButtons("diss", label = "Disaggregation",
choices = list("All" = 1, "By Sex" = 2, "By Language" = 3),
selected = 1)
```
Page 1
=====================================
```{r}
# all
all <- reactive(
prep_dat(dat)
)
# males only
males <- reactive(
prep_dat(
dat %>%
filter(sex == "male")
) %>%
rename("total_m" = "total")
)
# females only
females <- reactive(
prep_dat(
dat %>%
filter(sex == "female")
) %>%
rename("total_f" = "total")
)
# english only
english <- reactive(
prep_dat(
dat %>%
filter(lang == "english")
) %>%
rename("total_e" = "total")
)
# spanish only
spanish <- reactive(
prep_dat(
dat %>%
filter(lang == "spanish")
) %>%
rename("total_s" = "total")
)
# combine
totals <- reactive({
all <- all()
females <- females()
males <- males()
english <- english()
spanish <- spanish()
all %>%
select(date, total) %>%
full_join(select(females, date, total_f), by = "date") %>%
full_join(select(males, date, total_m), by = "date") %>%
full_join(select(english, date, total_e), by = "date") %>%
full_join(select(spanish, date, total_s), by = "date")
})
# convert to xts
totals_ <- reactive({
totals <- totals()
xts(totals, order.by = totals$date)
})
# plot
renderDygraph({
totals_ <- totals_()
if (input$diss == 1) {
dygraph(totals_[, "total"],
main= "All") %>%
dySeries("total", label = "All") %>%
dyRangeSelector() %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
} else if (input$diss == 2) {
dygraph(totals_[, c("total_f", "total_m")],
main = "By sex") %>%
dyRangeSelector() %>%
dySeries("total_f", label = "Female") %>%
dySeries("total_m", label = "Male") %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
} else {
dygraph(totals_[, c("total_e", "total_s")],
main = "By language") %>%
dyRangeSelector() %>%
dySeries("total_e", label = "English") %>%
dySeries("total_s", label = "Spanish") %>%
dyOptions(useDataTimezone = FALSE,
stepPlot = TRUE,
drawGrid = FALSE,
fillGraph = TRUE)
}
})
```