3

I have the following data

structure(list(id = 1:7, date = c(2019L, 2019L, 2019L, 2019L, 
2019L, 2019L, 2019L), station = structure(1:7, .Label = c("41B004", 
"41B011", "41MEU1", "41N043", "41R001", "41R012", "41WOL1"), class = "factor"), 
    days = c(6L, 21L, 5L, 9L, 13L, 14L, 3L), mean3y = c(8.33, 
    21.3, NA, 10, 11.3, 16.3, 3.67), environ = structure(c(3L, 
    4L, 2L, 1L, 3L, 4L, 3L), .Label = c("Industriel avec influence modérée du trafic", 
    "Urbain avec faible influence du trafic", "Urbain avec influence modérée du trafic", 
    "Urbain avec très faible influence du trafic"), class = "factor")), class = "data.frame", row.names = c(NA, 
-7L))

which is plotted with the following ggplot code

ggplot(data, aes(x = reorder(station, -days), 
                 y = days, fill = environ)) + 
  geom_col(width = 0.5, colour = "black", size = 0.5) + 
  guides(fill = guide_legend(ncol = 2)) +
  geom_text(aes(label = days), 
            vjust=-0.3, color="black", size = 3.5) +
  geom_hline(aes(yintercept = 25), 
             linetype = 'dashed', colour = 'red', size = 1) +
  labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
  theme_minimal() + 
  theme(legend.position="bottom", legend.title = element_blank(), 
        legend.margin=margin(l = -2, unit='line'),
        legend.text = element_text(size = 11),
        axis.text.y = element_text(size = 12), 
        axis.title.y = element_text(size = 11), 
        axis.text.x = element_text(size = 11),
        panel.grid.major.x = element_blank()) + 
  geom_hline(yintercept = 0)

generating this figure.

I would like to also add in this figure the variable mean3y besides days for each x value using another geom_col, such as

p <- ggplot(data, aes(x = reorder(station, -days), 
                      y = days, fill = environ)) + 
  geom_col(width = 0.5, colour = "black", size = 0.5) + 
  guides(fill = guide_legend(ncol = 2)) +
  geom_text(aes(label = days), 
            vjust=-0.3, color="black", size = 3.5) +
  geom_col(aes(x = reorder(station, -days), 
               y = mean3y, fill = environ), 
           inherit.aes = FALSE,
           width = 0.5, colour = "black", size = 0.5) +
  geom_hline(aes(yintercept = 25), 
             linetype = 'dashed', colour = 'red', size = 1) +
  labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
  theme_minimal() + 
  theme(legend.position="bottom", 
        legend.title = element_blank(), 
        legend.margin=margin(l = -2, unit='line'),
        legend.text = element_text(size = 11),
        axis.text.y = element_text(size = 12), 
        axis.title.y = element_text(size = 11), 
        axis.text.x = element_text(size = 11),
        panel.grid.major.x = element_blank()) + 
  geom_hline(yintercept = 0)

However, I was not able to achieve the desired result, despite the use of position = "dodge", as illustrated by this figure where both variables are overlapping.

Is there a way to achieve this, please ? Many thanks.

stefan
  • 90,330
  • 6
  • 25
  • 51
Alessandro
  • 129
  • 1
  • 8

3 Answers3

2

Position dodges only work in a single layer and not between multiple layers. You could either solve the problem by manually nudging them or by formatting the data in such a way that it can be dodged. Examples of both in code below.

Your data was hard to copy into my R session and your code was more elaborate than necessary to demonstrate the problem, so I've kept both to a minimum.

library(ggplot2)

df <- data.frame(
  x = c("A", "B"), 
  y = c(10, 15),
  z = c(12, 9)
)

# Example of nudging
# Choose width and nudge values manually to fit your data
ggplot(df, aes(x, y)) +
  geom_col(aes(fill = "first col"), 
           width = 0.45,
           position = position_nudge(x = -0.225)) +
  geom_col(aes(y = z, fill = "second_col"), 
           width = 0.45,
           position = position_nudge(x = 0.225))


library(dplyr)
#> Warning: package 'dplyr' was built under R version 3.6.3
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

# Example of dodging + data formatting
ggplot(mapping = aes(x, y)) +
  geom_col(data = rbind(mutate(df, a = "first_col"),
                        mutate(df, y = z, a = "second_col")),
           aes(fill = a),
           position = "dodge")

Created on 2020-04-16 by the reprex package (v0.3.0)

teunbrand
  • 33,645
  • 4
  • 37
  • 63
1

One way to achieve this is to convert the data to long format via e.g. tidyr::pivot_longer, so that the variables we want to plot are categories of one variable. To get the order of the stations right I reorder station according to days before converting to long. To get the bars side-by-side I use position_dodge2 both in geom_col and geom_text. To show which bar corresponds to which var I put the names of the vars in the labels above the bars.

library(ggplot2)
library(dplyr)
library(tidyr)

data1 <- data %>% 
  mutate(station = forcats::fct_reorder(station,-days)) %>% 
  pivot_longer(c(days, mean3y), names_to = "var", values_to = "value")

my_labels <- function(x) {
  gsub("(days.|mean3y.)", "", x)
}

    p <- ggplot(data1, aes(x = station, y = value, fill = environ)) + 
  geom_col(position = position_dodge2(preserve = "single"), colour = "black") + 
  guides(fill = guide_legend(ncol = 2)) +
  geom_text(aes(label = paste(var, "\n", value)), position = position_dodge2(width = .9, preserve = "single"), vjust=-0.3, color="black", size = 3.5) +
  scale_x_discrete(labels = my_labels) +
  geom_hline(aes(yintercept = 25), linetype = 'dashed', colour = 'red', size = 1) +
  labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
  theme_minimal() + theme(legend.position="bottom", legend.title = element_blank(), legend.margin=margin(l = -2, unit='line'),
                          legend.text = element_text(size = 11),
                          axis.text.y = element_text(size = 12), axis.title.y = element_text(size = 11), 
                          axis.text.x = element_text(size = 11),
                          panel.grid.major.x = element_blank()) + geom_hline(yintercept = 0)

enter image description here

stefan
  • 90,330
  • 6
  • 25
  • 51
1

Consider this possible solution for your dataset - although you may want to play around with the aesthetics. I attempted to keep the aesthetics as similar as possible and set the bars to be the same color (based on df$environ), but make the difference between "days" and "mean3y" clear with text labels.

Data Preparation

First, we need to take the information from two columns and combine them: "days" and "mean3y". In your original data frame, these two columns can (and should) be combined to show type of value and the value itself. What we want to do is convert this type of data:

  day.type.1 day.type.2
1          4          1
2          5          3
3          6          4
4          7          5

To this type of data:

    day.type day.value
1 day.type.1         4
2 day.type.1         5
3 day.type.1         6
4 day.type.1         7
5 day.type.2         1
6 day.type.2         3
7 day.type.2         4
8 day.type.2         5

In the above example, you can use the gather() function from dplyr:

t %>% gather('day.type', 'day.value')

If we apply that to your data frame, we have to specify to do that to the data frame, but ignore the other columns:

df1 <- df %>% gather('variable', 'value', -date, -station, -environ)

This converts your "days" and "mean3y" columns into two new columns called "variable" (which is either "days" or "mean3y") and "value" (which is the actual number)

I also had to convert the new column "value" into numeric... but that could have been due to how I had to import your data, which was... difficult. Please note, it is recommended that you include your dataset in future questions via the output of dput(your.data.frame)... believe me it makes all the difference. ;)

Plotting the new Dataset

Here the idea is to keep your same x axis, but we are now setting "value" as the y aesthetic. In addition, you want to make sure to include a group= aesthetic of "variable" so that dodging works appropriately for text and columns. If you are not familiar, "dodging" is the term for when a geom is kind of "split" across an axis aesthetic: like "subsetting" of discrete axis values.

The geom_col call is set for position='dodge'... not much else changes there. You need this because the default position is set to "stacked" (which is why your attempt resulted in columns "stacked" on top of one another.

The geom_text call has a few things going on:

  • The dodge is set here with position=position_dodge(), which allows you to specify how far apart the "dodge" will be. It allowed me to "push apart" the labels to be a bit wider so that the text looks okay and doesn't run into the adjacent column. A larger width= argument in position_dodge() results in "pushing" the labels further apart. A value of 0 would be putting the labels in the center of the x axis aesthetic... 0.5 is default.

  • The label aesthetic is actually using both "variable" and "value" columns as a way to differentiate your columns from one another. I used paste0 and stuck a '\n' in-between so that you had two lines and could fit them. Had to adjust the size a bit too.

  • By default, the labels would be positioned right at y (value), which would mean they would overlap with your columns. You need to "nudge" them up, but cannot use nudge_y to push them up because you cannot combine nudge_y with position. What to do? Well, we can just overwrite the default y aesthetic by setting it equal to y + "a number" to nudge them up. Much better to do it this way.

Here's the final code:

ggplot(df1, aes(x = reorder(station, -value),
                 y = value, fill = environ,
                group=variable)) + 
    geom_col(width = 0.5, colour = "black", size = 0.5, position='dodge') + 
    guides(fill = guide_legend(ncol = 2)) +
    geom_text(aes(label = paste0(variable,'\n', value), y=value+1.5), 
              color="black", size = 3,
              position=position_dodge(0.7)) +
    geom_hline(aes(yintercept = 25), 
               linetype = 'dashed', colour = 'red', size = 1) +
    labs(x = '', y = bquote("Nombre de jours de dépassement de NET60" ~ O[3] ~ "en 2019")) +
    theme_minimal() + 
    theme(legend.position="bottom", legend.title = element_blank(), 
          legend.margin=margin(l = -2, unit='line'),
          legend.text = element_text(size = 11),
          axis.text.y = element_text(size = 12), 
          axis.title.y = element_text(size = 11), 
          axis.text.x = element_text(size = 11),
          panel.grid.major.x = element_blank()) + 
    geom_hline(yintercept = 0)

enter image description here

chemdork123
  • 12,369
  • 2
  • 16
  • 32