0

I acknowledge that there are many similar questions and I checked them, but still cannot resolve my problem.

Trying to create a stacked barchart in ggplot, but cannot order bars properly.

enter image description here

My data looks like this

structure(list(`Aimag/Capital` = c("Arkhangai", "Arkhangai", 
"Bayan-Ulgii", "Bayankhongor", "Bayankhongor", "Bulgan", "Darkhan-Uul", 
"Darkhan-Uul", "Dornod", "Dornod", "Dornogobi", "Dornogobi", 
"Dundgobi", "Gobi-Altai", "Gobi-Altai", "Gobisumber", "Khentii", 
"Khentii", "Khovd", "Khovd", "Khuvsgul", "Orkhon", "Selenge", 
"Selenge", "Sukhbaatar", "Tuv", "Tuv", "Ulaanbaatar", "Ulaanbaatar", 
"Umnugobi", "Umnugobi", "Uvs", "Uvs", "Uvurkhangai", "Uvurkhangai", 
"Zavkhan"), Type = c("Actual usage of water m3 /Ground water/", 
"Actual usage of water m3 /Surface water/", "Actual usage of water m3 
/Ground water/", 
"Actual usage of water m3 /Ground water/", "Actual usage of water m3 
/Surface water/", 
"Actual usage of water m3 /Surface water/", "Actual usage of water m3 
/Ground water/", 
"Actual usage of water m3 /Surface water/", "Actual usage of water m3 
/Ground water/", 
"Actual usage of water m3 /Surface water/", "Actual usage of water m3 
/Ground water/", 
"Actual usage of water m3 /Surface water/", "Actual usage of water m3 
/Ground water/", 
"Actual usage of water m3 /Ground water/", "Actual usage of water m3 
/Surface water/", 
"Actual usage of water m3 /Ground water/", "Actual usage of water m3 /Ground 
water/", 
"Actual usage of water m3 /Surface water/", "Actual usage of water m3 
/Ground water/", 
"Actual usage of water m3 /Surface water/", "Actual usage of water m3 
/Surface water/", 
"Actual usage of water m3 /Ground water/", "Actual usage of water m3 /Ground 
water/", 
"Actual usage of water m3 /Surface water/", "Actual usage of water m3 
/Ground water/", 
"Actual usage of water m3 /Ground water/", "Actual usage of water m3 
/Surface water/", 
"Actual usage of water m3 /Ground water/", "Actual usage of water m3 
/Surface water/", 
"Actual usage of water m3 /Ground water/", "Actual usage of water m3 
/Surface water/", 
"Actual usage of water m3 /Ground water/", "Actual usage of water m3 
/Surface water/", 
"Actual usage of water m3 /Ground water/", "Actual usage of water m3 
/Surface water/", 
"Actual usage of water m3 /Ground water/"), sumAmount = c(101278.1, 
272246.7, 7528, 50421.54, 12550.4, 882605, 669312.5, 137418.5, 
3587701.32, 244252.09, 266421.5, 1856, 116723, 1453, 277, 3515190, 
744539.2, 9749.4, 59142, 9520, 1000, 21324857, 167905, 1077338.19, 
772753.76, 572085.5, 5035012.18, 5645963.45, 68067, 22230322, 
68351, 2564, 2238, 15950, 180928, 299912)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -36L), spec = 
structure(list(
cols = list(`Company's registration number` = structure(list(), class = 
c("collector_double", 
"collector")), `Company name` = structure(list(), class = 
c("collector_character", 
"collector")), `Aimag/Capital` = structure(list(), class = 
c("collector_character", 
"collector")), `Soum/ District` = structure(list(), class = 
c("collector_character", 
"collector")), Type = structure(list(), class = c("collector_character", 
"collector")), Amount = structure(list(), class = c("collector_double", 
"collector"))), default = structure(list(), class = c("collector_guess", 
"collector")), skip = 1), class = "col_spec"), groups = structure(list(
`Aimag/Capital` = c("Arkhangai", "Bayan-Ulgii", "Bayankhongor", 
"Bulgan", "Darkhan-Uul", "Dornod", "Dornogobi", "Dundgobi", 
"Gobi-Altai", "Gobisumber", "Khentii", "Khovd", "Khuvsgul", 
"Orkhon", "Selenge", "Sukhbaatar", "Tuv", "Ulaanbaatar", 
"Umnugobi", "Uvs", "Uvurkhangai", "Zavkhan"), .rows = list(
    1:2, 3L, 4:5, 6L, 7:8, 9:10, 11:12, 13L, 14:15, 16L, 
    17:18, 19:20, 21L, 22L, 23:24, 25L, 26:27, 28:29, 30:31, 
    32:33, 34:35, 36L)), row.names = c(NA, -22L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))

My code looks like this

ggplot(eiti_water_stacked_aimag, aes(reorder(`Aimag/Capital`, sumAmount), 
sumAmount, fill = Type)) +
geom_bar(stat = "identity") +
coord_flip() +
ylim(0, 30000000) +
theme_tufte() +
theme(axis.title.y = element_blank()) +
theme(axis.title.x = element_blank()) +
theme(legend.position = "bottom") +
theme(legend.title = element_blank()) +
scale_fill_manual(values = c("#A6CEE3","#1F78B4"),
                labels = c("Ground water          ",
                           "Surface water"))

I usually use reorder function, but now this order is pretty strange. Any ideas how to fix this? I tried forcats, but it did not work too.

Anakin Skywalker
  • 2,400
  • 5
  • 35
  • 63
  • 1
    Tried `forcats` how? – camille Apr 29 '19 at 18:43
  • @camille, yes, `fct_infreq()` did not work for me – Anakin Skywalker Apr 29 '19 at 18:50
  • 2
    It's easiest to help when your example code posted here includes the code that *didn't* work, so we're not just suggesting stuff you've tried. But also, have you looked at the docs for `fct_infreq`? It orders factor levels based on how many times they appear. Guessing by your use of `stat = "identity"`, you already *have* a number you want to order by—you probably actually want `fct_reorder` – camille Apr 29 '19 at 18:53
  • @camille, the code above does not work, it gives me the chart, which I posted above – Anakin Skywalker Apr 29 '19 at 18:59
  • 1
    The problem I'm having is that the posted data is the result of aggregation/summarise. And since `Aimag/Capital` is a grouping variable it cannot be changed. Try changing the factor levels *before* grouping. – Rui Barradas Apr 29 '19 at 19:01
  • @RuiBarradas, true, I used this `filter(Type == "Actual usage of water m3 /Ground water/" | Type == "Actual usage of water m3 /Surface water/") %>% group_by(`Aimag/Capital`, Type) %>% summarize(sumAmount = sum(Amount))` – Anakin Skywalker Apr 29 '19 at 19:05
  • Please put code in your question, not comments, so it's easy to read. [See here](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) on making reproducible R examples – camille Apr 29 '19 at 19:45

1 Answers1

2

I think the reason you are not getting the order from reorder('Aimag/Capital', sumAmount) that you were expecting is because there is more than one value of sumAmount for each instance of 'Aimag/Capital' (one for Ground and the other for Surface). So I think the reorder function must be getting kind of lost. I imagine that you would like to order by the Total water usage (Ground + Surface). The way I did it was to compute the total water usage for each location and create an ordered factor according to this ranking. Then I can change the data format of column 'Aimag/Capital' to an ordered factor and feed it into ggplot.


library(ggplot2)
library(dplyr)

eiti_water_stacked_aimag <- structure(list(`Aimag/Capital` = c("Arkhangai", "Arkhangai", 
                                        "Bayan-Ulgii", "Bayankhongor", "Bayankhongor", "Bulgan", "Darkhan-Uul", 
                                        "Darkhan-Uul", "Dornod", "Dornod", "Dornogobi", "Dornogobi", 
                                        "Dundgobi", "Gobi-Altai", "Gobi-Altai", "Gobisumber", "Khentii", 
                                        "Khentii", "Khovd", "Khovd", "Khuvsgul", "Orkhon", "Selenge", 
                                        "Selenge", "Sukhbaatar", "Tuv", "Tuv", "Ulaanbaatar", "Ulaanbaatar", 
                                        "Umnugobi", "Umnugobi", "Uvs", "Uvs", "Uvurkhangai", "Uvurkhangai", 
                                        "Zavkhan"), 
                    Type = c("Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Surface water/", "Actual usage of water m3 /Ground water/", 
                             "Actual usage of water m3 /Ground water/", "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/", "Actual usage of water m3 /Surface water/", 
                             "Actual usage of water m3 /Ground water/"), 
                    sumAmount = c(101278.1, 272246.7, 7528, 50421.54, 12550.4, 882605, 669312.5, 137418.5, 3587701.32, 
                                  244252.09, 266421.5, 1856, 116723, 1453, 277, 3515190, 744539.2, 9749.4, 59142, 9520, 
                                  1000, 21324857, 167905, 1077338.19, 772753.76, 572085.5, 5035012.18, 5645963.45, 68067, 
                                  22230322, 68351, 2564, 2238, 15950, 180928, 299912)), 
               class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -36L), 
               spec = structure(list(cols = list(`Company's registration number` = structure(list(), class = c("collector_double", "collector")), 
                                                 `Company name` = structure(list(), class = c("collector_character", "collector")), 
                                                 `Aimag/Capital` = structure(list(), class = c("collector_character", "collector")), 
                                                 `Soum/ District` = structure(list(), class = c("collector_character", "collector")), 
                                                 Type = structure(list(), class = c("collector_character", "collector")), 
                                                 Amount = structure(list(), class = c("collector_double", "collector"))), 
                                     default = structure(list(), class = c("collector_guess", "collector")), skip = 1), class = "col_spec"), 
               groups = structure(list(`Aimag/Capital` = c("Arkhangai", "Bayan-Ulgii", "Bayankhongor", "Bulgan", "Darkhan-Uul", "Dornod", "Dornogobi", "Dundgobi", 
                                                           "Gobi-Altai", "Gobisumber", "Khentii", "Khovd", "Khuvsgul", 
                                                           "Orkhon", "Selenge", "Sukhbaatar", "Tuv", "Ulaanbaatar", 
                                                           "Umnugobi", "Uvs", "Uvurkhangai", "Zavkhan"), 
                                       .rows = list(1:2, 3L, 4:5, 6L, 7:8, 9:10, 11:12, 13L, 14:15, 16L, 
                                                    17:18, 19:20, 21L, 22L, 23:24, 25L, 26:27, 28:29, 30:31, 
                                                    32:33, 34:35, 36L)), row.names = c(NA, -22L), 
                                  class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))

# get order of places according to total water usage (ground + surface)
order.places <- eiti_water_stacked_aimag %>% group_by(`Aimag/Capital`) %>% 
  mutate(Total=sum(sumAmount)) %>% select(`Aimag/Capital`, Total) %>%
  distinct() %>% arrange(Total) %>% select(`Aimag/Capital`) %>% unlist()

#change first column to ordered factors
x <- eiti_water_stacked_aimag %>% ungroup() %>% mutate(`Aimag/Capital`=factor(x=`Aimag/Capital`, levels = order.places))


#create plot
ggplot(x, aes(x=`Aimag/Capital`, y=sumAmount, fill = Type)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  ylim(0, 30000000) +
  #theme_tufte() +
  theme(axis.title.y = element_blank()) +
  theme(axis.title.x = element_blank()) +
  theme(legend.position = "bottom") +
  theme(legend.title = element_blank()) +
  scale_fill_manual(values = c("#A6CEE3","#1F78B4"),
                    labels = c("Ground water          ",
                               "Surface water"))

enter image description here

kikoralston
  • 1,176
  • 5
  • 6