I have data such as this
data <- data.table(
"School" = c(1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0),
"Grade" = c(0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0),
"CAT" = c(1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1),
"FOX" = c(1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0),
"DOG" = c(0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0,
0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1)
)
and wish to achieve a new data table such as this:
dataWANT <- data.frame(
"VARIABLE" = c('CAT', 'CAT', 'CAT', 'FOX', 'FOX', 'FOX', 'DOG', 'DOG', 'DOG'),
"SCHOOL" = c(1, 1, 0, 1, 1, 0, 1, 1, 0),
"GRADE" = c(0, 1, 1, 0, 1, 1, 0, 1, 1),
"MEAN" = c(NA)
)
dataWANT
takes the mean for CAT
and FOX
and DOG
by SCHOOL
, GRADE
, and SCHOOL
X GRADE
when they are equal to 1.
I know how to do this one at a time but that is not good for doing this with a big data.
data[, CAT1 := mean(CAT), by = list(SCHOOL)]
data[, FOX1 := mean(FOX), by = list(GRADE)]
data[, DOG1 := mean(DOG), by = list(SCHOOL, GRADE)]
data$CAT2 = unique(data[SCHOOL == 1, CAT1])
data$FOX2 = unique(data[GRADE == 1, FOX1])
data$DOG2 = unique(data[SCHOOL == 1 & GRADE == 1, DOG1])
Please only use this:
data <- data.table(
"SCHOOL" = c(1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0),
"GRADE" = c(0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0),
"CAT" = c(1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1),
"FOX" = c(1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1,
1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0),
"DOG" = c(0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0,
0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1)
)
data[, CAT1 := mean(CAT), by = list(SCHOOL)]
data[, CAT2 := mean(CAT), by = list(GRADE)]
data[, CAT3 := mean(CAT), by = list(SCHOOL, GRADE)]
data[, FOX1 := mean(FOX), by = list(SCHOOL)]
data[, FOX2 := mean(FOX), by = list(GRADE)]
data[, FOX3 := mean(FOX), by = list(SCHOOL, GRADE)]
data[, DOG1 := mean(DOG), by = list(SCHOOL)]
data[, DOG2 := mean(DOG), by = list(GRADE)]
data[, DOG3 := mean(DOG), by = list(SCHOOL, GRADE)]
dataWANT <- data.frame(
"VARIABLE" = c('CAT', 'CAT', 'CAT', 'FOX', 'FOX', 'FOX', 'DOG', 'DOG', 'DOG'),
"TYPE" = c(1, 2, 3, 1, 2, 3, 1, 2, 3),
"MEAN" = c(0.48, 0.44, 0.428, 0.6, 0.611, 0.6428, 0.52, 0.61, 0.6428)
)
where:
TYPE
equals to 1 when MEAN
in estimated by SCHOOL
,
TYPE
equals to 2 when MEAN
is estimated by GRADE
,
TYPE
equals to 3 when MEAN
is estimated by SCHOOL
and GRADE