1

Given two monthly time series data sample from this link.

I will need to create one plot containing 3 subplots: plot1 for the original values, plot2 for month over month changes, and plot3 for year over year changes.

I'm able to draw the plot with code below, but the code is too redundant. So my question is how could achieve that in a concise way? Thanks.

library(xlsx)
library(ggplot2)
library(reshape)
library(dplyr)
library(tidyverse)
library(lubridate)
library(cowplot)
library(patchwork)

df <- read.xlsx('./sample_data.xlsx', 'Sheet1')
colnames(df)
# df

cols <- c('food_index', 'energy_index')
df <- df %>% mutate(date=as.Date(date)) %>% 
  mutate(across(-contains('date'), as.numeric)) %>% 
  mutate(date= floor_date(date, 'month')) %>%
  group_by(date) %>%
  summarise_at(vars(cols), funs(mean(., na.rm=TRUE))) %>%
  mutate(across(cols, list(yoy = ~(. - lag(., 12))/lag(., 12)))*100) %>%
  mutate(across(cols, list(mom = ~(. - lag(., 1))/lag(., 1)))*100) %>% 
  filter(date >= '2018-01-01' & date <= '2021-12-31') %>%
  as.data.frame()

df1 <- df %>%
  select(!grep('mom|yoy', names(df))) 

df1_long <- melt(df1, id.vars = 'date')
plot1 <- ggplot(df1_long[!is.na(df1_long$value), ],
       aes(x = date,
           y = value,
           col = variable)) +
  geom_line(size=0.6, alpha=0.5) +
  geom_point(size=1, alpha=0.8) +
  labs(
    x='',
    y='Unit: $'
  ) 

# MoM changes
df2 <- df %>%
  select(grep('date|mom', names(df)))

df2_long <- melt(df2, id.vars = 'date')
plot2 <- ggplot(df2_long[!is.na(df2_long$value), ],
       aes(x = date,
           y = value,
           col = variable)) +
  geom_line(size=0.6, alpha=0.5) +
  geom_point(size=1, alpha=0.8) +
  labs(
    x='',
    y='Unit: %'
  ) 

# YoY changes
df3 <- df %>%
  select(grep('date|yoy', names(df))) 

df3_long <- melt(df3, id.vars = 'date')
plot3 <- ggplot(df3_long[!is.na(df3_long$value), ],
       aes(x = date,
           y = value,
           col = variable)) +
  geom_line(size=0.6, alpha=0.5) +
  geom_point(size=1, alpha=0.8) +
  labs(
    x='',
    y='Unit: %'
  )
plot <- plot1 + plot2 + plot3 + plot_layout(ncol=1)
# plot <- plot_grid(plot1, plot2, plot3, labels = c('Value', 'MoM', 'YoY'), label_size = 12)
plot

Out:

enter image description here

The expected result will be similar to the plot below (the upper plot will display the original data, the middle plot will display the mom changes data, and the lower plot will display the yoy changes data):

enter image description here

References:

https://waterdata.usgs.gov/blog/beyond-basic-plotting/

http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/

Side-by-side plots with ggplot2

ah bon
  • 9,293
  • 12
  • 65
  • 148

2 Answers2

2

Maybe this is what you are looking for? By reshaping your data to the right shape, using a plot function and e.g. purrr::map2 you could achieve your desired result without duplicating your code like so.

Using some fake random example data to mimic your true data:

library(tidyr)
library(dplyr)
library(ggplot2)

df_long <- df |> 
  rename(food_index_raw = food_index, energy_index_raw = energy_index) |> 
  pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")

plot_fun <- function(x, y, ylab) {
  x <- x |> 
    select(date, variable, value = .data[[y]]) |> 
    filter(!is.na(value))
  
  ggplot(
    x,
    aes(
      x = date,
      y = value,
      col = variable
    )
  ) +
    geom_line(size = 0.6, alpha = 0.5) +
    geom_point(size = 1, alpha = 0.8) +
    labs(
      x = "",
      y = ylab
    )  
}

yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))

plots <- purrr::map2(yvars, ylabs, plot_fun, x = df_long)

library(patchwork)

wrap_plots(plots) + plot_layout(ncol = 1)

enter image description here

DATA

set.seed(123)

date <- seq.POSIXt(as.POSIXct("2017-01-31"), as.POSIXct("2022-12-31"), by = "month")
food_index <- runif(length(date))
energy_index <- runif(length(date))

df <- data.frame(date, food_index, energy_index)

EDIT Adding subtitles to each plot when using patchwork is (as of the moment) a bit tricky. What I would do in this case would be to use a faceting "hack". To this end I slightly adjusted the function to take a subtitle argument and switched to purrr::pmap:

library(tidyr)
library(dplyr)
library(ggplot2)

df_long <- df |> 
  rename(food_index_raw = food_index, energy_index_raw = energy_index) |> 
  pivot_longer(-date, names_to = c("variable", ".value"), names_pattern = "^(.*?_index)_(.*)$")

plot_fun <- function(x, y, ylab, subtitle) {
  x <- x |> 
    select(date, variable, value = .data[[y]]) |> 
    filter(!is.na(value))
  
  ggplot(
    x,
    aes(
      x = date,
      y = value,
      col = variable
    )
  ) +
    geom_line(size = 0.6, alpha = 0.5) +
    geom_point(size = 1, alpha = 0.8) +
    facet_wrap(~.env$subtitle) +
    labs(
      x = "",
      y = ylab
    ) +
    theme(strip.background = element_blank(), strip.text.x = element_text(hjust = 0))
}

yvars <- c("raw", "mom", "yoy")
ylabs <- paste0("Unit: ", c("$", "%", "%"))
subtitle <- c("Original", "Month-to-Month", "Year-to-Year")

plots <- purrr::pmap(list(y = yvars, ylab = ylabs, subtitle = subtitle), plot_fun, x = df_long)

library(patchwork)

wrap_plots(plots) + plot_layout(ncol = 1)

enter image description here

stefan
  • 90,330
  • 6
  • 25
  • 51
  • Thanks for sharing this wonderful answer, I believe it quite helpful to others as well. – ah bon May 18 '22 at 07:45
  • One more question, is there a way to add titles for each subplots or y-axis labels to indicate `raw`, `mom` and `yoy`? – ah bon May 18 '22 at 07:47
  • 1
    Hi @ahbon. I just made an edit showing one approach to add subtitles to the plots. – stefan May 18 '22 at 08:19
  • Maybe easier to use facets Stefan? – Allan Cameron May 18 '22 at 09:02
  • @AllanCameron. Hm. Yes and no. IMHO it depends on the use case. I always switch between both approaches in my work. But you are right. In the present case faceting is probably the easier way to achieve the desired result. – stefan May 18 '22 at 09:16
  • 2
    @stefan I'm not a huge fan of facets in general - I just don't like the way strips look, unless their boxes are removed. I also prefer plots that tell a single story as simply as possible, so unless the facets are making a clear point about the difference or similarity of two data series, I would present the plots on separate pages altogether. As you say, it depends on the use case. I think in this case I would present the plots individually, but then again it's a matter of taste. Nice to have both options! – Allan Cameron May 18 '22 at 09:47
2

The target output is done with facets rather than stitching plots together. You could do this too if you like, but it requires reshaping your data in a different way. Which approach you take is really a matter of taste.

library(ggplot2)
library(dplyr)

yoy <- function(x) 100 * (x - lag(x, 13)) / lag(x, 12)
mom <- function(x) 100 * (x - lag(x)) / lag(x)

df %>%
  mutate(date = as.Date(date, origin = "1899-12-30"),
         `Actual value (Dollars).Food Index` = food_index,
         `Month-on-month change (%).Food Index` = mom(food_index),
         `Year-on-year change (%).Food Index` = yoy(food_index),
         `Actual value (Dollars).Energy Index` = energy_index,
         `Month-on-month change (%).Energy Index` = mom(energy_index),
         `Year-on-year change (%).Energy Index` = yoy(energy_index)) %>%
  select(-food_index, -energy_index) %>%
  tidyr::pivot_longer(-1) %>%
  filter(date > as.Date("2018-01-01")) %>%
  tidyr::separate(name, into = c("series", "index"), sep = "\\.") %>%
  ggplot(aes(date, value, color = index)) +
  geom_point(na.rm = TRUE) +
  geom_line() +
  facet_grid(series~., scales = "free_y") +
  theme_bw(base_size = 16)

enter image description here


Reproducible data taken from link in question

df <- structure(list(date = c(42766, 42794, 42825, 42855, 42886, 42916, 
42947, 42978, 43008, 43039, 43069, 43100, 43131, 43159, 43190, 
43220, 43251, 43281, 43312, 43343, 43373, 43404, 43434, 43465, 
43496, 43524, 43555, 43585, 43616, 43646, 43677, 43708, 43738, 
43769, 43799, 43830, 43861, 43890, 43921, 43951, 43982, 44012, 
44043, 44074, 44104, 44135, 44165, 44196, 44227, 44255, 44286, 
44316, 44347, 44377, 44408, 44439, 44469, 44500, 44530, 44561
), food_index = c(58.53, 61.23, 55.32, 55.34, 61.73, 56.91, 54.27, 
59.08, 60.11, 66.01, 60.11, 63.41, 69.8, 72.45, 81.11, 89.64, 
88.64, 88.62, 98.27, 111.11, 129.39, 140.14, 143.44, 169.21, 
177.39, 163.88, 135.07, 151.28, 172.81, 143.82, 162.13, 172.22, 
176.67, 179.3, 157.27, 169.12, 192.51, 194.2, 179.4, 169.1, 193.17, 
174.92, 181.92, 188.41, 192.14, 203.41, 194.19, 174.3, 174.86, 
182.33, 182.82, 185.36, 192.41, 195.59, 202.6, 201.51, 225.01, 
243.78, 270.67, 304.57), energy_index = c(127.36, 119.87, 120.96, 
112.09, 112.19, 109.24, 109.56, 106.89, 109.35, 108.35, 112.39, 
117.77, 119.52, 122.24, 120.91, 125.41, 129.72, 135.25, 139.33, 
148.6, 169.62, 184.23, 204.38, 198.55, 189.29, 202.47, 220.23, 
240.67, 263.12, 249.74, 240.84, 243.42, 261.2, 256.76, 258.69, 
277.98, 289.63, 293.46, 310.81, 318.68, 310.04, 302.17, 298.62, 
260.92, 269.29, 258.84, 241.68, 224.18, 216.36, 226.57, 235.98, 
253.86, 267.37, 261.99, 273.37, 280.91, 291.84, 297.88, 292.78, 
289.79)), row.names = c(NA, 60L), class = "data.frame")
Allan Cameron
  • 147,086
  • 7
  • 49
  • 87
  • Thanks for sharing your solution, the output plot seems quite awesome. I would like to ask two questions, 1. why `yoy`s are calculated by `function(x) 100 * (x - lag(x, 13)) / lag(x, 12)`, not `function(x) 100 * (x - lag(x, 12)) / lag(x, 12)`, 2. could you pls explain more regading `pivot_longer(-1)`? Thanks. – ah bon May 18 '22 at 10:55