14

Consider the following data.frame and chart:

library(ggplot2)
library(scales)
df <- data.frame(L=rep(LETTERS[1:2],each=4),
                 l=rep(letters[1:4],2),
                 val=c(96.5,1,2,0.5,48,0.7,0.3,51))
#   L l  val
# 1 A a 96.5
# 2 A b  1.0
# 3 A c  2.0
# 4 A d  0.5
# 5 B a 48.0
# 6 B b  0.7
# 7 B c  0.3
# 8 B d 51.0

ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=percent(val/100)),position=position_stack(vjust =0.5))

plot1 Some labels are hard to read due to small values. I'd like to jitter those vertically. I'm aware of position_jitter but it doesn't seem compatible with a stacked bar chart.

moodymudskipper
  • 46,417
  • 11
  • 121
  • 167
  • Related: [*Alternate geom_text position with hjust*](https://stackoverflow.com/q/24626769/2204410) – Jaap Apr 27 '18 at 10:41

2 Answers2

18

We can create a new Position, position_jitter_stack().

 position_jitter_stack <- function(vjust = 1, reverse = FALSE, 
                                  jitter.width = 1, jitter.height = 1,
                                  jitter.seed = NULL, offset = NULL) {
  ggproto(NULL, PositionJitterStack, vjust = vjust, reverse = reverse, 
          jitter.width = jitter.width, jitter.height = jitter.height,
          jitter.seed = jitter.seed, offset = offset)
}

PositionJitterStack <- ggproto("PositionJitterStack", PositionStack,
  type = NULL,
  vjust = 1,
  fill = FALSE,
  reverse = FALSE,
  jitter.height = 1,
  jitter.width = 1,
  jitter.seed = NULL,
  offset = 1,

  setup_params = function(self, data) {
    list(
      var = self$var %||% ggplot2:::stack_var(data),
      fill = self$fill,
      vjust = self$vjust,
      reverse = self$reverse,
      jitter.height = self$jitter.height,
      jitter.width = self$jitter.width,
      jitter.seed = self$jitter.seed,
      offset = self$offset
    )
  },

  setup_data = function(self, data, params) {
    data <- PositionStack$setup_data(data, params)
    if (!is.null(params$offset)) {
      data$to_jitter <- sapply(seq(nrow(data)), function(i) {
        any(abs(data$y[-i] - data$y[i]) <= params$offset)
      })
    } else {
      data$to_jitter <- TRUE
      }
    data
  },

  compute_panel = function(data, params, scales) {
    data <- PositionStack$compute_panel(data, params, scales)

    jitter_df <- data.frame(width = params$jitter.width,
                            height = params$jitter.height)

    if (!is.null(params$jitter.seed)) jitter_df$seed = params$jitter.seed
    jitter_positions <- PositionJitter$compute_layer(
      data[data$to_jitter, c("x", "y")],
      jitter_df
    )

    data$x[data$to_jitter] <- jitter_positions$x
    data$y[data$to_jitter] <- jitter_positions$y

    data
  }
)

And plot it ...

ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=percent(val/100)),
            position = position_jitter_stack(vjust =0.5,
             jitter.height = 0.1,
             jitter.width =  0.3, offset = 1))

enter image description here

Alternatively, we could write a very simple repel function.

library(rlang)

position_stack_repel <- function(vjust = 1, reverse = FALSE, 
                                 offset = 1) {
  ggproto(NULL, PositionStackRepel, vjust = vjust, reverse = reverse,
          offset = offset)
}

PositionStackRepel <- ggproto("PositionStackRepel", PositionStack,
  type = NULL,
  vjust = 1,
  fill = FALSE,
  reverse = FALSE,
  offset = 1,

  setup_params = function(self, data) {
    list(
      var = self$var %||% ggplot2:::stack_var(data),
      fill = self$fill,
      vjust = self$vjust,
      reverse = self$reverse,
      offset = self$offset
    )
  },

  setup_data = function(self, data, params) {
    data <- PositionStack$setup_data(data, params)
    data <- data[order(data$x), ]
    data$to_repel <- unlist(by(data, data$x, function(x) {
      sapply(seq(nrow(x)), function(i) {
        (x$y[i]) / sum(x$y) < 0.1 & (
          (if (i != 1) (x$y[i-1] / sum(x$y)) < 0.1 else FALSE) | (
            if (i != nrow(x)) (x$y[i+1] / sum(x$y)) < 0.1 else FALSE))
      })
    }))
    data
  },

  compute_panel = function(data, params, scales) {
    data <- PositionStack$compute_panel(data, params, scales)
    data[data$to_repel, "x"] <- unlist(
      by(data[data$to_repel, ], data[data$to_repel, ]$x, 
         function(x) seq(x$x[1] - 0.3, x$x[1] + 0.3, length.out = nrow(x))))
    data
  }
)

Plot it:

ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=percent(val/100)),
            position = position_stack_repel(vjust =0.5))

enter image description here

erocoar
  • 5,723
  • 3
  • 23
  • 45
  • That's a great solution, I believe it's the strict but neater equivalent of my `position_jitter` solution (if we set `jitter.width = 0`). It suffers from the same flaws unfortunately. Isn't it possible to move the computation of `inds` in your function code ? It spoils a bit the beauty of the solution. – moodymudskipper Apr 27 '18 at 10:56
  • It's actually not a strict equivalent of my `position_jitter` solution, your solution has the nice feature of not jittering isolated labels. On your capture the `99%` is offset on the left but this is not something I reproduce when running your code. – moodymudskipper Apr 27 '18 at 11:13
  • Yeah that is what is supposed to happen (since you calculate the indices to be jittered) - not even sure what happened in my picture. You can certainly put the calculation in the `position`, see my edit – erocoar Apr 27 '18 at 11:31
  • thanks this is very useful, your second proposal is the perfect answer for Jaap's linked question https://stackoverflow.com/a/24627761/2270475 . I was thinking vertical jittering would look better but I'll go with this one in the end. – moodymudskipper Apr 27 '18 at 11:46
  • your solution "fails" (wrong alignment) with `df <- data.frame(L=rep(LETTERS[1:2],each=2), l=rep(letters[1:2],2), val=c(99,1,1,99))` and `df <- data.frame(L=LETTERS[1:2], l=letters[1:2], val=c(100,100))` – moodymudskipper Apr 27 '18 at 13:04
  • Good point, I've edited the `PositionStackRepel` to calculate for each bar separately! – erocoar Apr 27 '18 at 13:13
  • almost there :)! but it fails with `val=c(0.99,0.01,0.01,0.99)` or `val=c(3,4,5,6)`. And I think you removed the definition of `position_stack_repel` from your answer by accident – moodymudskipper Apr 27 '18 at 13:38
  • 1
    Also `%||%` comes from `rlang` so you should add a library call. – moodymudskipper Apr 27 '18 at 13:47
  • I've changed it to a percentage-based approach now, that should work on all the cases (given the right offset, I chose `offset = 0.1` for testing). I didn't know `%||%` comes from `rlang` - `ggplot > utilities` for me :) But will add – erocoar Apr 27 '18 at 14:03
  • It seems that it works as long as `df` is sorted by `L`. e.g. `df <- data.frame(L=rep(LETTERS[1:2],3), l=rep(letters[1:3],each=2), val=c(96,96,3,3,1,1))` will permute some 'columns'. – moodymudskipper Apr 27 '18 at 15:17
  • Just had to order by `data$x` first, now it does not do that! – erocoar Apr 27 '18 at 15:28
  • And the green tick mark is back :). Thanks so much for your efforts I'll be using this a lot, probably as a full replacement of `position_stack` in fact. This also inspires me to become a `ggproto` ninja too. – moodymudskipper Apr 27 '18 at 15:36
  • 1
    Glad it helped! I'll look into improving the repel (i.e. adding vertical one / better detection) over the weekend and add to my `ggpol` library - will update here too :) – erocoar Apr 27 '18 at 15:41
  • So, I think `geom_text` might be better to build on after all because you can directly access the widths and heights of the labels. Check [this](https://github.com/erocoar/ggpol/blob/master/R/geom_bartext.R) out! You can set `dir=v` or `dir=h` depending on where you want to unstack. It still needs some improvements but in the cases you provided it already does work fine :) – erocoar May 03 '18 at 12:32
  • great, it works very nicely for my example, which is my main usecase. Thanks for including the dir="v" option, which works smoothly as well. for this new solution the positions of the label depends on the device's dimension, which is probably a good thing most of the time, it wasn't the case before though so now for consistency one should first create a device with given dimensions. I would suggest your rename dir/v/h to direction/x/y as this way it's consistent to ggrepel, unless you're being consistent with something else I don't know of. – moodymudskipper May 03 '18 at 13:54
  • Unfortunately it fails for `position_stack()` with no `vjust` – moodymudskipper May 03 '18 at 13:55
  • A small detail that you might disagree with but I think it'd be nice to take y as the default value for the label so syntax can be shortened for simplest cases. – moodymudskipper May 03 '18 at 14:02
  • And finally, using my example data the `spacing` argument didn't do anything. I was trying to set a minimum vertical space between labels as your function was considering that the left side didn't require repelling though it overlapped slightly. Option `check_overlap` made a label disappear. – moodymudskipper May 03 '18 at 14:07
  • Good points! I fixed the overlap checking to take into account the additional spacing, and it should also work without `vjust` now. Also, `dir="y"` is default now. `x` and `y` actually come from the `facet_wrap()` syntax, although I'll look into how `ggrepel` does it – erocoar May 03 '18 at 14:50
  • Thanks, I'll check it asap, better be consistent with ggplot then, I'm not sure why `ggrepel` used a different parameter, maybe with some axis switching operation x becomes vertical and they weren't comfortable with the h/v, so changed the parameter name as well. – moodymudskipper May 03 '18 at 15:24
8

I found 2 solutions that involve computing the base position of labels beforehand, one using position_jitter and one using ggrepel (suggested by user @gfgm in deleted answer)

create positions:

Note that I need to put NAs first here so I used: How to have NA's displayed first using arrange()

library(dplyr)
df <- df %>%
  group_by(L) %>%
  arrange(!is.na(l), desc(l)) %>% 
  mutate(pos = cumsum(val) - val/2)) # the -val/2 is to center the text

position_jitter solution

set.seed(2)
ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text(aes(y=pos,label=percent(val/100)),position = position_jitter(width = 0,height=4))

plot1 ggrepel solution

library(ggrepel)
ggplot(df,aes(x=L,y=val,fill=l)) +
  geom_bar(stat="identity") +
  geom_text_repel(aes(y=pos,label=percent(val/100)),direction="y",box.padding=0)

plot2 comparison of both

ggrepel solution doesn't require manual calibration, the output isn't perfect but it's consistent, it also has great flexibility though and would be the solution of choice for most variants of my issue. Note that geom_text_repel has a seed parameter, but in my case it doesn't affect the results.

position_jitter doesn't give consistent result, positions are randomized, and for most cases it's a less good solution as text overlays (I think it's jittering as if we were dealing with points). For a given chart though it can give a better solution than ggrepel using set.seed beforehand, so maybe better for some reporting, worse the rest of the time.

If geom_text_repel supported position_stack I wouldn't have to go through the pain of the first step, but it doesn't unfortunately.

Both solutions have the slightly annoying effect of jittering isolated labels that shouldn't be jittered at all (this issue is handled by @erocoar's solution).

moodymudskipper
  • 46,417
  • 11
  • 121
  • 167
  • Please post screenshots of the two approaches? – smci Apr 27 '18 at 10:23
  • 1
    This is nice. I deleted my answer precisely because of ggrepel lacking `position_stack`. Was looking at it now actually to see if there was a hack around -- would be great if could be added. – gfgm Apr 27 '18 at 10:23