1

I have a dataset consisting of 7 columns with values of 0 and 1 in r. The data has been defined at the last column into different groups based on the value. I want to colour the bars as per my group.I have 17 different combinations but I don't want the colouring based on combinations. I want to colour based on my group column but represent the combination in upset plot.

data <- data.frame(
    x1 = c(0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0),
     x2= c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
    x3 = c(0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
    x4 = c(0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
    x5 = c(0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
    x6 = c(0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
    x7 = c(0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0))

    data1 <- data %>% 
        mutate(numeric=rowSums(.[c("x1", "x2", "x3", "x4", "x5", "x6", "x7")])) %>% 
        mutate(group=case_when(
            x1==1 & x2==1 & x3==1 ~ "A",
            x1==1 & x2==1 ~ "B",
            x1=1 & x3==1 ~ "C",
            numeric ==1 ~ "D",
            numeric >=2 ~ "E",
            TRUE ~ "F"))
    color <- c("A"="red", "B"="orange", "C"="#228822", "D"="#00BFFF", "E"="yellow", 
    "F"=="gray")
    upset(data1, sets=c("x1", "x2", "x3", "x4", "x5", "x6", "x7"),
                group.by = "group",
                main.bar.color = color)


Reproducible data

data <- structure(list(X1 = c(0, 1, 0, 1, 0, 0, 0, 0, 0, 0), X2 = c(1,1, 1,
 1, 1, 1, 1, 1, 1, 1), X3 = c(0, 1, 0, 1, 0, 0, 0, 0, 0,0), X4 = c(0, 1, 0,
 1, 0, 0, 0, 0, 0, 0), X5 = c(0, 1, 1, 1,0, 0, 0, 0, 0, 1), X6 = c(0, 1, 0,
 0, 0, 0, 0, 0, 0, 0), X7 = c(0,0, 0, 0, 0, 0, 0, 0, 0, 0), X8 = c(0, 0, 0,
 0, 0, 0, 0, 0, 0,0), X9 = c(1, 1, 0, 0, 0, 1, 1, 1, 1, 1), X10 = c(0, 0, 0,
 0,0, 0, 0, 0, 0, 0), X11 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), X12 = c(0,0, 0,
 0, 0, 0, 0, 0, 0, 0), group = c("C", "D", "C", "A", "B","C", "C", "C", "C",
 "C")), row.names = c(NA, -10L), class = c("tbl_df","tbl", "data.frame"))
Allan Cameron
  • 147,086
  • 7
  • 49
  • 87
user142632
  • 41
  • 4
  • 1
    Note that your last `color` value has the test for equality `"F"=="gray"`, not the assign function `"F"="gray"` so you will get `"FALSE"` as the last value in the vector, not `"gray"` like you intended. This likely doesn't solve your problem, but may help in your effort to debug. Good luck! – jpsmith Aug 01 '23 at 13:25

1 Answers1

2

The ordering of the columns is a little obscure, but tracing through the code I have discovered that you can recreate it like this:

color <- c(A = "red", B = "orange", C = "#228822", D = "#00BFFF", 
           E = "yellow", F = "gray")

d <- as.data.frame(plyr::count(data1[, c(order(-colSums(data1[1:7])), 9)]))[-1,]

correct_cols <- color[d$group]

Allowing:

upset(data1, sets = c("x1", "x2", "x3", "x4", "x5", "x6", "x7"),
      group.by = "group", main.bar.color = correct_cols)

enter image description here


Edit

With the OP's actual data we can do:

data1 <- data %>%
  rowwise() %>%
  mutate(numeric = sum(c_across(X1:X12)), .before = group) %>%
  as.data.frame()

color <- c(A = "red", B = "orange", C = "#228822", D = "#00BFFF")

d <- as.data.frame(plyr::count(data1[, c(order(-colSums(data1[1:12])), 14)]))
correct_cols <- color[d$group]

UpSetR::upset(data1, sets = paste0("X", 1:12),
      group.by = "group", main.bar.color = correct_cols)

enter image description here

Allan Cameron
  • 147,086
  • 7
  • 49
  • 87
  • Thank you very much. In the original data, I have 12 columns and one group. I tried your codes, but i get the error that replacement has 28 rows but my has 29. I have grouped all my data into 4 groups in the dataset at last. I am sure that I am making a mistake but cannot seem to figure out as I am a beginner in r – user142632 Aug 01 '23 at 22:19
  • @user142632 the code is specific to the data set. If you can't apply the above method to your own data, perhaps you can edit your question to include your actual data? `dput(data)` (where `data` is whatever your actual data is called) makes this very easy – Allan Cameron Aug 01 '23 at 22:25
  • @user142632 see my update – Allan Cameron Aug 01 '23 at 23:22