0

I would like to reorder stacked barplot datapoints so that in each bar they are sorted from largest to smallest COMPETITOR by its total VALUE and not alphabetically.

I generated the data to use fct_reorder (the line commented out) and the datapoints get sorted but the labels do not follow the changed order. How can I make the labels on the plot follow suit and be located in the right positions in the middle of the bar segments?

Here is my working reproducible example with the fct_reorder line commented out. If you uncomment it, the datapoints will get sorted but labels will remain in wrong positions.

library(tidyverse)
library(scales)


data<- tibble::tribble(
  ~CUSTOMER, ~COMPETITOR, ~VALUE,
      "AAA",    "XXX",  23400,
      "AAA",    "YYY",  10000,
      "AAA",    "ZZZ",  80000,
      "AAA",    "YYY",  60000,
      "BBB",    "XXX",  10000,
      "BBB",    "YYY",  20000,
      "BBB",    "ZZZ",  10000,
      "BBB",    "YYY",  80000,
      "CCC",    "YYY",  30000,
      "CCC",    "ZZZ",  20000,
      "DDD",    "YYY",   7000,
      "CCC",    "VVV",  10000
  )


unit_mln <-
  scales::unit_format(
    unit = "mln",
    sep = " ",
    scale = 1e-6,
    digits = 2,
    justify = "right"
  )

col_competitors <-
  scale_fill_manual( "legend", 
                     values = c(
                       "XXX" = "navyblue",   "YYY" = "red",
                       "ZZZ" = "lightyellow", "VVV" = "green"))



df_cust<- data %>% mutate(COMPETITOR=as.factor(COMPETITOR)) %>% 
  group_by(CUSTOMER) %>%                                                    
  mutate(CUST_VALUE=sum(VALUE)) %>%                                 
  ungroup() %>% 
  group_by(COMPETITOR) %>%      
  mutate(COMP_VALUE=sum(VALUE)) %>%                                 
  ungroup() %>% 
  group_by(CUSTOMER,  COMPETITOR) %>%                                           
  summarise(CUST_VALUE=max(CUST_VALUE), COMP_VALUE=max(COMP_VALUE), VALUE=sum(VALUE))%>% 
  arrange(desc(CUST_VALUE))

# df_cust<-df_cust %>% mutate(COMPETITOR= fct_reorder(COMPETITOR, -COMP_VALUE))



df_comp<- data %>% group_by(COMPETITOR) %>% summarise(VALUE=sum(VALUE)) 

df_cust$CUSTOMER = str_wrap(df_cust$CUSTOMER, width = 30)



plt_main<-df_cust %>% 
  ggplot(aes(x = fct_reorder(CUSTOMER, -CUST_VALUE), y = VALUE)) +
  geom_col(
    aes(fill = COMPETITOR),
    alpha = 0.5,
    position = position_stack(reverse = T),
    col = "darkgray",
    show.legend = F ) +
  geom_text(aes(label = unit_mln(round(VALUE,-4))),
            size = 3,
            position = position_stack(vjust = 0.5)) +
  xlab(" ") + ylab("Market share (GROSS PLN)") + ggtitle(paste("Top competitors in top customers: ", "Poland")) +
  theme_bw(base_size = 11) +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      vjust = 0.5 ),
    legend.position = c(0.94, 0.75)) +
  col_competitors +
  scale_y_continuous(
    labels = function(n) {
      unit_mln(n)
    },
    sec.axis = sec_axis(~ . / sum(df$VALUE), labels = scales::percent)
  )
Jacek Kotowski
  • 620
  • 16
  • 49
  • 1
    Have you tried using the `group` aesthetic as shown [in this answer](https://stackoverflow.com/a/50845682/2461552)? It might involve making a new "order" variable based on your sorting variable. – aosmith Jun 18 '18 at 14:15
  • I think I have succeeded by trial and error by reordering factors. I will paste the answer hoping it will be helpful or maybe someone will elaborate on it. – Jacek Kotowski Jun 19 '18 at 06:30
  • Glad you figured something out. Just as an FYI, I think you might have gotten more attention if you'd given a *minimal* reproducible example. Since your plotting dataset is pretty small, you could have `dput` the summary datasets you were plotting (skipping all data manipulation) and then given basic plotting code (just geoms; no themes, no scales, no labels) so folks could easily see and help with the problem. – aosmith Jun 19 '18 at 13:18

2 Answers2

1

I think the following should do it:

ggplot(df, aes(x = reorder(CUSTOMER, -COMP_VALUE), y = VALUE))

This orders the CUSTOMER column by COMP_VALUE.

M.Punt
  • 101
  • 4
  • Thanks for the help unfortunately it does not work. Bars are ordered descending by total value, the issue is order within the bars. If changed, the labels do not follow the new order. – Jacek Kotowski Jun 18 '18 at 09:35
0

I commented '#****description****` the rows that made possible ordering of datapoints and labels within stacked barplots. Now they are ordered by total sales of COMPETITOR and not alphabetically. I admit I achieved it by trial and error and it may not be the optimal answer.

library(tidyverse)
library(scales)

# Example data

data<- tibble::tribble(
  ~CUSTOMER, ~COMPETITOR, ~VALUE,
      "AAA",    "XXX",  123400,
      "AAA",    "YYY",  10000,
      "AAA",    "ZZZ",  80000,
      "AAA",    "YYY",  60000,
      "BBB",    "XXX",  110000,
      "BBB",    "YYY",  20000,
      "BBB",    "ZZZ",  10000,
      "BBB",    "YYY",  80000,
      "CCC",    "YYY",  30000,
      "CCC",    "ZZZ",  12000,
      "DDD",    "YYY",   7000,
      "CCC",    "VVV",  10000)

# Format labels with scales package

unit_mln <-
  unit_format(
    unit = "mln",
    sep = " ",
    scale = 1e-6,
    digits = 2,
    justify = "right"
  )

# Set your own colors for competitors

col_competitors <-
  scale_fill_manual( "legend", 
                     values = c(
                       "XXX" = "navyblue",   "YYY" = "red",
                       "ZZZ" = "lightyellow", "VVV" = "green"))


# Generate helper data for ordering: totals for CUSTOMER and COMPETITOR.

df_cust<- data %>% mutate(COMPETITOR=as.factor(COMPETITOR)) %>% 
  group_by(CUSTOMER) %>%                                                    
  mutate(CUST_VALUE=sum(VALUE)) %>%                                 
  ungroup() %>% 
  group_by(COMPETITOR) %>%      
  mutate(COMP_VALUE=sum(VALUE)) %>%                                 
  ungroup() %>% 
  group_by(CUSTOMER,  COMPETITOR) %>%                                           
  summarise(CUST_VALUE=max(CUST_VALUE), COMP_VALUE=max(COMP_VALUE), VALUE=sum(VALUE))%>% 
  arrange(desc(CUST_VALUE))

# Reorder COMPETITOR by total VALUE descening                       #***this is needed to reorder labels***
df_cust<-df_cust %>% mutate(COMPETITOR= reorder(COMPETITOR, -COMP_VALUE))


# Prepare data for a small "legend" plot
df_comp<- data %>% group_by(COMPETITOR) %>% summarise(VALUE=sum(VALUE)) 

# Wrap CUSTOMER names if too long
df_cust$CUSTOMER = str_wrap(df_cust$CUSTOMER, width = 30)

# Main plot
(
plt_main<-df_cust %>% 
  ggplot(aes(x = fct_reorder(CUSTOMER, -CUST_VALUE), y = VALUE)) +  #***this fct_ reorders bars***
  geom_col(
    aes(fill = COMPETITOR),
    alpha = 0.5,
    position = position_stack(reverse = T),
    col = "darkgray",
    show.legend = F ) +
  geom_text(aes(label = unit_mln(round(VALUE,-4)), 
                group=fct_reorder(COMPETITOR, COMP_VALUE)),         #***this fct_ reorders labels***
            size = 3, 
            position = position_stack(vjust = 0.5, reverse=F)) +
  xlab(" ") + ylab("Market share (GROSS PLN)") + ggtitle(paste("Top competitors in top customers: ", "Poland")) +
  theme_bw(base_size = 11) +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      vjust = 0.5 ),
    legend.position = c(0.94, 0.75)) +
  col_competitors +
  scale_y_continuous(
    labels = function(n) {
      unit_mln(n)
    },
    sec.axis = sec_axis(~ . / sum(df_cust$VALUE), labels = scales::percent)
  )
)
Jacek Kotowski
  • 620
  • 16
  • 49