0

So I am having trouble making a stacked bar chart showing proportion of cases vs deaths. This is the data:

df <- structure(list(Date = structure(c(19108, 19108, 19108, 19108, 
19108, 19108, 19108, 19108, 19108, 19108), class = "Date"), Country = c("US", 
"India", "Brazil", "France", "Germany", "United Kingdom", "Russia", 
"Korea, South", "Italy", "Turkey"), Confirmed = c(81100599L, 
43065496L, 30378061L, 28605614L, 24337394L, 22168390L, 17887152L, 
17086626L, 16191323L, 15023662L), Recovered = c(0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L), Deaths = c(991940L, 523654L, 663108L, 
146464L, 134489L, 174778L, 367692L, 22466L, 162927L, 98720L), 
    Active = c(80108659L, 42541842L, 29714953L, 28459150L, 24202905L, 
    21993612L, 17519460L, 17064160L, 16028396L, 14924942L)), row.names = c(163539L, 
163431L, 163375L, 163414L, 163418L, 163537L, 163496L, 163444L, 
163437L, 163533L), class = "data.frame")

and I want to generate something that looks like this except with proportions of deaths vs cases.

enter image description here

TarJae
  • 72,363
  • 6
  • 19
  • 66

2 Answers2

1

This is a modification of @Allan Cameron's answer with adding the percent label and some other different approaches:


library(tidyverse)
library(scales)

df %>%
  rename_with(., ~str_replace_all(., 'top10.', '')) %>% 
  pivot_longer(
    cols = -Country,
    names_to = "Status",
    values_to = "value", 
    values_transform = list(value = as.integer)
  ) %>% 
  mutate(Status = fct_rev(fct_infreq(Status))) %>% 
  group_by(Country) %>%
  mutate(pct= prop.table(value) * 100) %>%
  ggplot(aes(x= Country, y = pct, fill=Status)) +
  geom_col(position = position_fill())+
  scale_fill_manual(values = c("#ff34b3", "#4976ff")) +
  scale_y_continuous(labels = scales::percent)+
  ylab("Percentage") +
  geom_text(aes(label=paste0(sprintf("%1.1f", pct),"%")),
            position=position_fill(vjust = 0.1)) +
  ggtitle("Your Title")

enter image description here

TarJae
  • 72,363
  • 6
  • 19
  • 66
0

I had to use OCR to convert the image of your data into actual data I could use. It's far better to include your data as text for this reason.

The plot is not particularly informative because the percentages are low, and difficult to read, but in any case, you can do it like this:

library(tidyverse)

p <- df %>%
  mutate(top10.Confirmed = top10.Confirmed - top10.Deaths,
         top10.Country = factor(top10.Country, top10.Country)) %>%
  rename(Country = top10.Country, 
         Survived = top10.Confirmed,
         Died = top10.Deaths) %>%
  pivot_longer(-Country, names_to = "Outcome", values_to = "Count") %>%
  mutate(Outcome = factor(Outcome, c("Survived", "Died"))) %>%
  ggplot(aes(Country, Count, fill = Outcome)) +
  geom_col(position = "fill") +
  scale_fill_manual(values = c("#4976ff", "#ff34b3")) +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Covid outcomes by country", y = "Percent")

p

enter image description here

To make it easier to read, you could zoom into the bottom:

p + coord_cartesian(ylim = c(0, 0.05))

enter image description here


Data in reproducible format

df <- structure(list(top10.Country = c("US", "India", "Brazil", "France", 
"Germany", "United Kingdom", "Russia", "Korea, South", "Italy", 
"Turkey"), top10.Confirmed = c(81100599L, 43065496L, 30378061L, 
28605614L, 24337394L, 22168390L, 17887152L, 17086626L, 16191323L, 
15023662L), top10.Deaths = c(991940L, 523654L, 663108L, 146464L, 
134489L, 174778L, 367692L, 22466L, 162927L, 98720L)), class = "data.frame", 
row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))

df
#>     top10.Country top10.Confirmed top10.Deaths
#> 1              US        81100599       991940
#> 2           India        43065496       523654
#> 3          Brazil        30378061       663108
#> 4          France        28605614       146464
#> 5         Germany        24337394       134489
#> 6  United Kingdom        22168390       174778
#> 7          Russia        17887152       367692
#> 8    Korea, South        17086626        22466
#> 9           Italy        16191323       162927
#> 10         Turkey        15023662        98720

Created on 2022-05-01 by the reprex package (v2.0.1)

Allan Cameron
  • 147,086
  • 7
  • 49
  • 87
  • Thanks again Allan, you are a lifesaver! I appreciate the work that you do. Next time I shall improve the template for the questions. – simple114141 May 01 '22 at 11:32
  • @Allan Cameron I find the approach with `p + coord_cartesian(ylim = c(0, 0.05))` very informative!. I tried in my answer to flip the factors and plot the deaths on top? What do you think? – TarJae May 01 '22 at 14:56