7

I'm plotting data which have both a category and a sub-category (see the example data below) and I'd like to display these with them nested (this example was created in Excel):

Nested categories from Excel
The best I've come up with in R is to create a new column with the desired names, like this:

df <- data.frame(main.cat = c("A", "A", "B", "B", "B", "C"),
                 second.cat = c("a1", "a2", "b1", "b2", "b3", "c1"),
                 value = c(2, 3, 4, 2.5, 1.5, 2.3))

df$x.labels <- paste(df$second.cat, df$main.cat, sep = "\n")

ggplot(data = df, aes(x = x.labels, y = value)) + geom_point()

This at least retains both levels of categories, but duplicates all of the main category labels: enter image description here

Does anyone know of anything better, which would look more like Excel's output?

Mike Lynch
  • 451
  • 6
  • 13

2 Answers2

5

I think the facet approach is fine:

library(ggplot2)
library(gtable)
library(grid)

df <- data.frame(main.cat = c("A", "A", "B", "B", "B", "C"),
                 second.cat = c("a1", "a2", "b1", "b2", "b3", "c1"),
                 value = c(2, 3, 4, 2.5, 1.5, 2.3))

p = ggplot(data = df, aes(x = second.cat, y = value)) + 
   geom_point() + facet_grid(.~main.cat, space = "free_x", scales = "free_x") +
   theme(strip.background = element_rect(fill = NA))

But if you want something closer to the excel look, one approach is to use gtable functions to extract the strip and insert it below the tick mark labels, then insert some boundary lines between the major categories. Note that the code below is specific to your sample data.

p = p + theme(panel.spacing = unit(0, "lines"))
g = ggplotGrob(p)
 gtable_show_layout(g)  # to see the layout

# Add a row below the x-axis tick mark labels,
# the same height as the strip
g = gtable_add_rows(g, g$height[7], 9)

# Get the strip grob
stripGrob = gtable_filter(g, "strip")

# Insert the strip grob into the new row
g = gtable_add_grob(g, stripGrob, 10, 5, 10, 9)

# remove the old strip
g = g[-7, ]

# Insert line grobs as boundary lines between major categories
linesGrob = linesGrob(gp = gpar(col = "grey75"))
for(i in c(6,8))  g = gtable_add_grob(g, linesGrob, t=8, l=i, b=9, r=i)

# Insert new columns of zero width to take the line grobs for the first and last boundary lines
for(i in c(4, 10)) {
   g = gtable_add_cols(g, unit(0, "lines"), i)
   g = gtable_add_grob(g, linesGrob, t=8, l=i+1, b=9, r=i+1)
}

grid.newpage()
grid.draw(g)

enter image description here

Edit A crude attempt at generalising

library(ggplot2)
library(gtable)
library(grid)

df <- data.frame(main.cat = c("A", "A", "B", "B", "C", "D"),
                 second.cat = c("a1", "a2", "b1", "b2", "c1", "d1"),
                 value = c(2, 3, 4, 2.5, 1.5, 2.3))

p = ggplot(data = df, aes(x = second.cat, y = value)) + 
   geom_point() + facet_grid(.~main.cat, space = "free_x", scales = "free_x") +
   theme(strip.background = element_rect(fill = NA))


p = p + theme(panel.spacing = unit(0, "lines"))
g = ggplotGrob(p)
 gtable_show_layout(g)  # to see the layout

# Get the indices for the panels (t=top, l=left, ...
panels <- c(subset(g$layout, grepl("panel", g$layout$name), se=t:r))

# Get the strip grob
stripGrob = gtable_filter(g, "strip")

 # Its height is
 height = stripGrob$height

# Add a row below the x-axis tick mark labels,
# the same height as the strip. 
g = gtable_add_rows(g, height, unique(panels$b+1))

# Insert the strip grob into the new row
g = gtable_add_grob(g, stripGrob, 
                      t = unique(panels$b+2), 
                      l = min(panels$l), 
                      r = max(panels$r))

# Insert line grobs as boundary lines between major categories
linesGrob = linesGrob(gp = gpar(col = "grey75"))
panelsR = panels$r[-length(panels$r)]
for(i in panelsR+1)  g = gtable_add_grob(g, linesGrob, 
                       t=unique(panels$b+1), 
                       l=i, 
                       b=unique(panels$b+2))

# Insert new columns of zero width to take the line grobs for the first and last boundary lines
 panelBound = c(4, max(panels$r)+1)
for(i in panelBound) {
   g = gtable_add_cols(g, unit(0, "lines"), i)
   g = gtable_add_grob(g, linesGrob, 
                    t=unique(panels$b+1), 
                    l=i+1, 
                    b=unique(panels$b+2))
}

# remove the old strip
g = g[-7, ]

# Draw it
grid.newpage()
grid.draw(g)
Sandy Muspratt
  • 31,719
  • 12
  • 116
  • 122
  • The second version is exactly what I wanted, thank you. I'm not familiar with the gtable package so I evidently have some reading up to do to work out how to generalise this to handle different sets of categories! – Mike Lynch Mar 05 '15 at 09:31
  • Baptiste has prepared some notes on `gtable` functions, available at [https://github.com/baptiste/gtable/wiki/Description](https://github.com/baptiste/gtable/wiki/Description) – Sandy Muspratt Mar 06 '15 at 07:42
3

Untested, but try:

ggplot(data=df, aes(x=second.cat, y=value)) + geom_point() + facet_grid(~ main.cat, scales = 'free')

Albeit, the width of each main.cat would be the same, and the label of same can only be positioned at top.

MrGumble
  • 5,631
  • 1
  • 18
  • 33
  • The faceting sort of works, but you end up with all of the sub-divisions appearing for each facet (i.e. for the A section, you still have b1, b2, b3, c1; for the B section, you still have a1, a2, c1; etc). Is there any way to remove the unneeded categories? – Mike Lynch Mar 05 '15 at 09:09
  • 1
    With the argument ``scales = 'free'`` to ``facet_grid`` (edited answer). – MrGumble Mar 05 '15 at 10:06