1

This is what I want to achieve: Target Plot

The values within the stacked bars are the count. They should be centered. The count shouldn't be displayed if it is 8 or less.

I've come this far: Current Plot

Data

str(exampledata$V43)
 Factor w/ 5 levels "5 Sehr unzufrieden",..: 3 5 4 4 4 4 4 5 5 4 ...

str(exampledata$A_REF)
 Factor w/ 18 levels "Zertifikat eines Aufbau- oder Ergänzungsstudiums",..: 18 18 18 18 18 17 18 18 18 18 ...

str(exampledata$V101)
 Factor w/ 2 levels "Weiblich","Männlich": 2 NA 2 2 2 2 1 1 1 2 ...

dput(df[1:100,])
structure(list(exampledata.V101 = structure(c(2L, NA, 2L, 2L, 
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, NA, 
NA, NA, 1L, 1L, 2L, NA, 2L, 2L, 2L, NA, 2L, 2L, NA, NA, 1L, NA, 
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, NA, 1L, NA, 1L, NA, 
1L, 2L, NA, NA, 2L, NA, 1L, 2L, 2L, NA, 2L, NA, 2L, 2L, 1L, 2L, 
1L, 2L, 1L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, NA, 2L, 1L, 2L, 2L
), .Label = c("Weiblich", "Männlich"), class = "factor"), exampledata.A_REF = structure(c(18L, 
18L, 18L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 18L, 16L, 18L, 
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 16L, 18L, 
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 18L, 18L, 
16L, 18L, 16L, 18L, 18L, 16L, 16L, 18L, 18L, 18L, 18L, 18L, 18L, 
18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 
16L, 16L, 18L, 18L, 18L, 17L, 16L, 18L), .Label = c("Zertifikat eines Aufbau- oder Ergänzungsstudiums", 
"LA Berufliche Schulen", "LA Sonderschule", "LA Gymnasium", "LA Haupt- und Realschule", 
"LA Grundschule", "Künstlerischer/musischer Abschluss", "Kirchlicher Abschluss", 
"Staatsexamen (ohne Lehramt)", "Diplom Fachhochschule, Diplom I an Gesamthochschulen", 
"Diplom Universität, Diplom II an Gesamthochschulen", "Sonstiges", 
"Promotion", "Staatsexamen", "Magister", "Diplom", "Master", 
"Bachelor"), class = "factor"), exampledata.V43 = structure(c(3L, 
5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 3L, 2L, NA, 4L, 5L, 5L, 
4L, 4L, 4L, 4L, NA, 2L, 4L, 3L, 5L, 4L, 4L, 4L, NA, 4L, 4L, NA, 
NA, 3L, 5L, 2L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, NA, NA, 4L, NA, 3L, 
4L, 5L, 5L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 5L, 4L, 5L, NA, 4L, 
NA, 4L, NA, 4L, 5L, 4L, NA, 5L, NA, 4L, 4L, 4L, NA, 4L, NA, 5L, 
4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 4L, 4L, 4L, 3L, 4L, NA, 4L, 
5L, 5L, 4L), .Label = c("5 Sehr unzufrieden", "4", "3", "2", 
"1 Sehr zufrieden"), class = "factor")), .Names = c("exampledata.V101", 
"exampledata.A_REF", "exampledata.V43"), row.names = c(NA, 100L
), class = "data.frame")

Plot

ggplot(data=subset(subset(exampledata, !is.na(V101)), !is.na(V43)), aes(x=A_REF, fill=factor(V43))) +
  geom_bar(position="fill") +
  facet_grid(~V101) +
  labs(y=NULL, x=NULL, fill=NULL) +
  scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
  ggtitle(paste(attr(exampledata, "variable.labels")[77])) +
  theme_classic() +
  coord_flip()

Edit: I know that there are similiar questions, but I wasn't able to adapt the solution to my problem. Most of the time the code was just not working.

Edit2: The solution from Erdem Akkas causes the following two errors:

Error in `[.data.frame`(exampledata, !is.na(V101) & !is.na(V43), .(length(.I)),  : 
  unused argument (by = .(A_REF, V101, V43))

Error in combine_vars(data, params$plot_env, cols, drop = params$drop) : 
  At least one layer must contain all variables used for facetting

Solution

ggplot(data=subset(subset(exampledata, !is.na(V101)), !is.na(V43)), aes(x=A_REF, fill=factor(V43))) + 
  geom_bar(position="fill") + 
  facet_grid(~V101) + 
  labs(y=NULL, x=NULL, fill=NULL) + 
  scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) + 
  ggtitle(paste(attr(exampledata, "variable.labels")[77])) + 
  theme_classic() + 
  geom_text(stat="count",aes(label=ifelse((..count..)>0, ..count.., "")), position = position_fill(vjust=0.5)) +
  coord_flip()

Solution

Community
  • 1
  • 1
Marc Brinkmann
  • 142
  • 2
  • 16
  • 1
    Possible duplicate of [R stacked percentage bar plot with percentage of binary factor and labels (with ggplot)](http://stackoverflow.com/questions/12386005/r-stacked-percentage-bar-plot-with-percentage-of-binary-factor-and-labels-with) – JanLauGe Apr 04 '17 at 10:33
  • 1
    Give us a sample of your data so we can run the ggplot code (str is interesting, but not really helpful) – talat Apr 04 '17 at 10:33
  • I tried to adapt from solutions like this, but I can't get it to work, @JanLauGe. I've imported the data from SPSS. How can I give you a sample easily? – Marc Brinkmann Apr 04 '17 at 10:35
  • 1
    If the dataset is smaller than say 100 rows use `dput(data)` and copy-paste the output here. If the dataset is larger than that, create a toy example that illustrates the problem. See also this post: http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example – JanLauGe Apr 04 '17 at 10:38

1 Answers1

2

I would first summarize data with data.table:

library(data.table)
summary<-exampledata[!is.na(V101) & !is.na(V43),.(length(.I)),by=.(A_REF,V101,V43)]

ggplot(data=summary, aes(x=A_REF,y=V1, fill=factor(V43))) +
geom_bar(position="fill",stat="identity") +
facet_grid(~V101) +
labs(y=NULL, x=NULL, fill=NULL) +
scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
ggtitle(paste(attr(exampledata, "variable.labels")[77])) +
theme_classic() +
geom_text(aes(label=ifelse(V1>8,V1,"")),hjust=1,position = position_fill())+coord_flip()

`

Erdem Akkas
  • 2,062
  • 10
  • 15
  • Thank you for your answer! I adjusted my code to match your solutions, but I'm receiving two errors. I've added them above. – Marc Brinkmann Apr 04 '17 at 11:37
  • Since I am kinda new to R I've got a question: Why did you add stat="identity" to geom_bar()? – Marc Brinkmann Apr 04 '17 at 11:41
  • Because we do not want it to take summary i.e count, but consider what we give as y. – Erdem Akkas Apr 04 '17 at 11:44
  • 1
    Without summarizing you can do below but it does not hide the ones which are less than 8. `ggplot(data=subset(subset(exampledata, !is.na(V101)), !is.na(V43)), aes(x=A_REF, fill=factor(V43))) + geom_bar(position="fill") + facet_grid(~V101) + labs(y=NULL, x=NULL, fill=NULL) + scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) + ggtitle(paste(attr(exampledata, "variable.labels")[77])) + theme_classic() + geom_text(stat="count",aes(label=..count..),hjust=1,position = position_fill())+coord_flip()` – Erdem Akkas Apr 04 '17 at 11:45
  • Thank you so much! The rest, like centering the numbers, I can do on my own. This was actually pretty simple. Can you explain to me why '..count..' is adressed like that? Or recommend me a site? – Marc Brinkmann Apr 04 '17 at 11:51
  • 1
    I would recommend internal help for different options. `?geom_histogram` – Erdem Akkas Apr 04 '17 at 11:57