Please find below an attempt that counts the number of unique d
values for each combination of a
values.
It is not elegant but feel free to improve it.
library(dplyr)
# Create a table with all possible combo of df$a values
conds <- expand.grid(cond1 = unique(df$a), cond2 = unique(df$a), stringsAsFactors = FALSE)
conds
# Use this to make multiple subsets of df and each time count the number of unique d values
test <- setNames( object = as.data.frame(apply(conds, 1, function(x) df %>% filter(a %in% c(x[1], x[2])) %>% summarise(length(unique(d))))),
apply(conds, 1, function(x) paste(x[1], x[2], sep = " & ")) )
# Reshape this to get a pretty printed result
res <- reshape(test,
varying = colnames(test),
times = colnames(test),
timevar = "conditions",
v.names = "count_of_unique_d",
direction = "long",
new.row.names = seq_along(colnames(test)))
res <- res[, c("conditions", "count_of_unique_d")]
res
What is happening?
apply(conds, 1, function(x) df %>% filter(a %in% c(x[1], x[2])) %>% summarise(length(unique(d))))
subset df
according to each row of conds
which are you conditions on a
. Results is stored in a list turned into a dataframe using as.data.frame()
. setNames()
give a name to each column so you know which conditions are applied.
Output:
> head(res, 5)
conditions count_of_unique_d
1 3W1 & 3W1 3
2 7W1 & 3W1 3
3 5W1 & 3W1 3
4 14W & 3W1 3
5 SP2 & 3W1 4
Data:
df <- structure(list(a = c("3W1", "3W1", "7W1", "5W1", "14W", "3W1",
"SP2", "3W1", "3W1"), b = c("5/11/2020", "5/11/2020", "5/11/2020",
"6/1/2020", "5/11/2020", "5/11/2020", "6/15/2020", "5/11/2020",
"5/11/2020"), c = c("5/31/2020", "5/31/2020", "6/28/2020", "7/5/2020",
"8/16/2020", "5/31/2020", "8/16/2020", "5/31/2020", "5/31/2020"
), d = c(1L, 1L, 1L, 1L, 1L, 2L, 3L, 4L, 4L)), class = "data.frame", row.names = c(NA,
-9L))