Person <- c(1,2,3)
Age <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)
People <- data.frame(Person, Age, Height, Weight)
Age_cats_type1 [5-15], [20-30], [35-45]
Age_cats_type2 [8-13], [14-16], [18-40]
Height_cat_Type1 [100-120], [121-140], [141-186]
Height_cat_type2 [110-125], [126-145], [146-190]
Weight_cat_Type1 [50-60], [61-78], [79-85]
Weight_cat_Type2 [55-75], [76-90], [91-100]
For People[1,2] (age=10), this fits within Age_cats_type1==1
and Age_cats_type2==1
.
For People[1,3] (height=140), this fits within Height_cat_Type1==2
and Height_cat_Type2==2
Now I want to create a table for each unique possible outcome of the intervals for (Age_cats_type1==1)|(Age_cats_type1==2)
, (Height_cats_type1==1)|(Height_cats_type1==2)
, (Weight_cats_type1==1)|(Weight_cats_type1==2)
.
The desired output should look like the yellow image below. The table above is a summary of the possibilities for each invterval
This is closely linked to the following question, but when you follow the codes used here as outlined by BrodieG errors arise on the third iteration.
In this example we use foverlaps
in the data.table
I have used the following code
library(intervals)
# create our limits
AGE_cats_type1 <- Intervals(
matrix(c(5, 15, 20, 30, 35, 40), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
AGE_cats_type2 <- Intervals(
matrix(c(8, 13, 14, 16, 18, 40), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Height_cats_type1 <- Intervals(
matrix(c(100, 120, 121, 140, 141, 186), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Height_cats_type2 <- Intervals(
matrix(c(110, 125, 126, 145, 146, 190), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Weight_cats_type1 <- Intervals(
matrix(c(50, 60, 61, 78, 79, 85), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
Weight_cats_type2 <- Intervals(
matrix(c(55, 75, 76, 90, 91, 100), ncol = 2, byrow = TRUE ),
closed = c( TRUE, T ),
type = "Z"
)
#now format data
# first for age
library(data.table)
PEOPLE1 <- data.table(People)
PEOPLE1[, A1:=Age]
I_age_1 <- data.table(cbind(data.frame(AGE_cats_type1), idX=1:3, idY=0))
I_age_2 <- data.table(cbind(data.frame(AGE_cats_type2), idX=0, idY=1:3))
setkey(I_age_1, X1, X2)
setkey(I_age_2, X1, X2)
PEOPLE2 <- data.frame(rbind(
foverlaps(PEOPLE1, I_age_1, by.x=c("Age", "A1"), nomatch=0),
foverlaps(PEOPLE1, I_age_2, by.x=c("Age", "A1"), nomatch=0)))
####################################################
# second iteration for height
PEOPLE3 <- data.table(PEOPLE2)
PEOPLE3[, B1:=Height]
I_height_1 <- data.table(cbind(data.frame(Height_cats_type1), idXa=1:3, idYa=0))
I_height_2 <- data.table(cbind(data.frame(Height_cats_type2), idXa=0, idYa=1:3))
setkey(I_height_1, X1, X2)
setkey(I_height_2, X1, X2)
PEOPLE4 <- data.frame(rbind(
foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0),
foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0)))
################################################
# third iteration
PEOPLE5 <- data.table(PEOPLE4)
PEOPLE5[, C1:=Weight]
I_weight_1 <- data.table(cbind(data.frame(Weight_cats_type1), idXb=1:3, idYb=0))
I_weight_2 <- data.table(cbind(data.frame(Weight_cats_type2), idXb=0, idYb=1:3))
setkey(I_weight_1, X1, X2)
setkey(I_weight_2, X1, X2)
PEOPLE6 <- data.frame(rbind(
foverlaps(PEOPLE5, I_weight_1, by.x=c("Height", "B1"), nomatch=0),
foverlaps(PEOPLE5, I_weight_2, by.x=c("Height", "B1"), nomatch=0)))
But get an error in PEOPLE6.
Error in setcolorder(ans, c(xcols1, ycols, xcols2)) :
neworder is length 16 but x has 18 columns.
When I look at PEOPLE4 we see idX idY idxA and idyA are the Age_cats_type1, Age_cats_type2, Height_cat_Type1 and Height_cat_Type2 values