I am currently making an euler diagram. I managed to make an euler diagram containing 6 variables using these codes, although I believe these codes are not efficient:
dataset <- data.frame(
A = rep(c(1, 2, NA), length.out = 100),
B = rep(c(2, NA, 1), length.out = 100),
C = rep(c(NA, 1, 2), length.out = 100),
D = rep(c(NA, 2, 1), length.out = 100),
E = rep(c(1, NA, 2), length.out = 100),
F = rep(c(1, 2, NA), length.out = 100))
euler_primary <- c("A" = sum(dataset$A == 1, na.rm = TRUE),
"B" = sum(dataset$B == 1, na.rm = TRUE),
"C" = sum(dataset$C == 1, na.rm = TRUE),
"D" = sum(dataset$D == 1, na.rm = TRUE),
"E" = sum(dataset$E == 1, na.rm = TRUE),
"F" = sum(dataset$F == 1, na.rm = TRUE),
"A&B" = sum(dataset$B == 1 & dataset$A == 1, na.rm=TRUE),
"A&C" = sum(dataset$C == 1 & dataset$A == 1, na.rm=TRUE),
"A&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm = TRUE),
"A&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm = TRUE),
"A&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm = TRUE),
"B&C" = sum(dataset$B == 1 & dataset$C == 1, na.rm=TRUE),
"B&D" = sum(dataset$B == 1 & dataset$D == 1, na.rm=TRUE),
"B&E" = sum(dataset$B == 1 & dataset$E == 1, na.rm=TRUE),
"B&F" = sum(dataset$B == 1 & dataset$F == 1, na.rm=TRUE),
"C&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"C&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"C&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"D&E" = sum(dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"D&F" = sum(dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"E&F" = sum(dataset$F == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1, na.rm=TRUE),
"A&B&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1, na.rm=TRUE),
"A&B&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"A&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"A&C&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"A&D&E" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&D&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&E&F" = sum(dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"B&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"B&C&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"B&D&E" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"B&D&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"B&E&F" = sum(dataset$B == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"C&D&E" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"C&D&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"C&E&F" = sum(dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"D&E&F" = sum(dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"A&B&C&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D&E" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&C&D&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&E&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&D&E&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E, na.rm=TRUE),
"B&C&D&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F, na.rm=TRUE),
"B&C&E&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"B&D&E&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"C&D&E&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"A&B&C&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D&E&F" = sum(dataset$C == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D&E&F" = sum(dataset$C == 1 & dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE)
)
venn_primary <- euler(euler_primary)
plot(venn_primary6,
quantities = list(cex = .75),
fill = list(c("red", "blue", "green", "violet", "orange", "brown")),
lty = 1,
cex = 0.5,
labels = NULL,
legend = list(labels = letters[1:6]))
The above codes result in the following plot:
However, now I need to make an euler diagram with 11 variables. It seems impossible to make such a diagram with 11 variables as the combinations of variables will reach hundreds, if not thousands. I think creating a function to assign the letters and create the list may be the solution. However, as I am a novice in data cleaning and conditionals in R, I am not able to come up with such a function. Can anyone help me to create a function where I can just enter the dataset name and the columns which will be included in the diagram, and the function will do the rest of the cleaning?
#p.s.: I noticed that the eulerr
package requires us to add the &
between variables to denote overlapping cases. For example, if we want to see the intersection between variable A
and B
, we will need to create an exact variable of A&B
.
Thank you very much in advance