I'm looking for a way to arrange the bar plot produced with ggplot2 by descending order of "utsugds2gp_22(happy condition)" that equal to 1 (category=not happy) within each scode1_22 group (area). The original bar plot I generated is below:
My codes are as following :
# Create the scode1_22 vector
scode1_22 <- c(
"12217001", "12217001", "12217001", "12217001", "12217001",
"12217002", "12217002", "12217002", "12217002", "12217002",
"12217002", "12217002", "12217002", "12217002", "12217003",
"12217003", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217004", "12217004", "12217004",
"12217004", "12217004", "NA", "12217004", "12217004",
"12217004", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217003", "12217004", "12217004",
"12217004", "12217004", "12217004", "12217004", "12217004",
"12217004", "NA", "12217004", "12217004", "12217004",
"12217004", "12217004", "12217004", "12217005", "12217005",
"12217005", "12217005", "12217005", "12217005", "12217005",
"12217005", "12217005"
)
# Create the utsugds2gp_22 vector
utsugds2gp_22 <- c(
0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
0, 0, 0, NA, 0, 1, 0, 0, 1, 1,
1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, NA, 0,
0, 0, 0, 1, 1, 0, 1, 0, 0, 0,
0, 0, NA, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
1, 0, 0, 1, 0, 0,1
)
# Create the income_3gp vector
income_3gp <- c(
1, 2, 1, 3, 1, 1, 1, 2, 1, 2,
2, 1, 1, 2, 3, 1, 1, 2, 1, 1,
1, 3, 3, 1, 3, 1, 1, 2, 2, 2,
3, 3, 3, 3, 1, 2, 2, 2, 3, 3,
3, 2, 2, 1, 1, 2, 1, 3, 3, 3,
3, 3, 2, 1, 3, 1, 3, 2, 2, 2,
3, 3, 1, 2, 2, 1, 2, 1, 2, 2,
1, 2, 2, 1, 2, 3, 1
)
dataset <- data.frame(scode1_22, utsugds2gp_22)
print(dataset)
fill <- c("white", "#E1B378")
dataset %>%
filter(!is.na(scode1_22), !is.na(utsugds2gp_22)) %>%
mutate(
scode1_22 = factor(scode1_22, levels = c("12217001", "12217002", "12217003", "12217004", "12217005"), labels = c("area1", "area2", "area3", "area4", "area5")),
utsugds2gp_22 = factor(utsugds2gp_22, levels = c(0:1), labels = c(" happy", "not happy"))
) %>%
group_by(scode1_22) %>%
mutate(scode1_22 = factor(scode1_22, levels = unique(scode1_22))) %>%
arrange(scode1_22, desc(utsugds2gp_22 == "not happy")) %>%
ggplot(aes(x = scode1_22, fill = utsugds2gp_22)) +
geom_bar(position = "fill", col = "black") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(name = "happy condition(%)", values = fill) +
labs(x = "area", y = "happy condition(%)")
Ideally, I would like my result to resemble the one shown in this image as a reference:How to re-order bar plot with ggplot2 by distribution proportions of variables
I have explored various methods, but unfortunately, none of them have been successful. Any guidance or suggestions would be greatly appreciated. Thanks!
2023/06/08 New question
I have a query regarding the previous result. My next step involves using the 'facet_wrap' function to create separate panels (STEP4) of bars and add the average value(STEP5) .
My codes are as following :
# Create the scode1_22 vector
scode1_22 <- c(
"12217001", "12217001", "12217001", "12217001", "12217001",
"12217002", "12217002", "12217002", "12217002", "12217002",
"12217002", "12217002", "12217002", "12217002", "12217003",
"12217003", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217004", "12217004", "12217004",
"12217004", "12217004", "NA", "12217004", "12217004",
"12217004", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217003", "12217003", "12217003",
"12217003", "12217003", "12217003", "12217004", "12217004",
"12217004", "12217004", "12217004", "12217004", "12217004",
"12217004", "NA", "12217004", "12217004", "12217004",
"12217004", "12217004", "12217004", "12217005", "12217005",
"12217005", "12217005", "12217005", "12217005", "12217005",
"12217005", "12217005"
)
# Create the utsugds2gp_22 vector
utsugds2gp_22 <- c(
0, 0, 1, 0, 1, 0, 1, 0, 1, 0,
0, 0, 0, NA, 0, 1, 0, 0, 1, 1,
1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, NA, 0,
0, 0, 0, 1, 1, 0, 1, 0, 0, 0,
0, 0, NA, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
1, 0, 0, 1, 0, 0,1
)
# Create the income_3gp vector
income_3gp <- c(
1, 2, 1, 3, 1, 1, 1, 2, 1, 2,
2, 1, 1, 2, 3, 1, 1, 2, 1, 1,
1, 3, 3, 1, 3, 1, 1, 2, 2, 2,
3, 3, 3, 3, 1, 2, 2, 2, 3, 3,
3, 2, 2, 1, 1, 2, 1, 3, 3, 3,
3, 3, 2, 1, 3, 1, 3, 2, 2, 2,
3, 3, 1, 2, 2, 1, 2, 1, 2, 2,
1, 2, 2, 1, 2, 3, 1
)
dataset <- data.frame(scode1_22, utsugds2gp_22)
fill <- c("white", "#E1B378")
library(dplyr)
dataset %>%
# STEP1: scode1_22 has values of "NA" text, different from NA missing value indivator
filter(scode1_22 != "NA", !is.na(utsugds2gp_22)) %>%
mutate(
scode1_22 = factor(scode1_22, levels = c("12217001", "12217002", "12217003", "12217004", "12217005"), labels = c("area1", "area2", "area3", "area4", "area5")),
utsugds2gp_22 = factor(utsugds2gp_22, levels = c(0:1), labels = c(" happy", "not happy"))
) %>%
# STEP2: order
count(scode1_22, utsugds2gp_22) %>%
mutate(share = n / sum(n), .by = scode1_22) %>%
arrange(utsugds2gp_22, -share) %>% # sorts lowest happy share first
mutate(scode1_22 = forcats::fct_inorder(scode1_22)) %>% # factor in that order
ggplot(aes(x = scode1_22, y = share, fill = utsugds2gp_22)) +
geom_col(col = "black") +
# STEP3: FROM ORIG
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(name = "happy condition(%)", values = fill) +
labs(x = "area", y = "happy condition(%)")+
# STEP4: grouped by income_3gp
facet_wrap(~income_3gp, ncol=1, strip.position = "left")
# STEP5: add the average value on each graph
geom_hline(aes(yintercept = mean(utsugds2gp_22)), color = "blue", size = 1, by=income_3gp)
My ideal outcome is