-2
Person <- c(1,2,3)
Age <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)

People <- data.frame(Person, Age, Height, Weight)

Age_cats_type1 [5-15], [20-30], [35-45]
Age_cats_type2 [8-13], [14-16], [18-40]

Height_cat_Type1 [100-120], [121-140], [141-186]
Height_cat_type2 [110-125], [126-145], [146-190]

Weight_cat_Type1 [50-60], [61-78], [79-85]
Weight_cat_Type2 [55-75], [76-90], [91-100]

For People[1,2] (age=10), this fits within Age_cats_type1==1 and Age_cats_type2==1. For People[1,3] (height=140), this fits within Height_cat_Type1==2 and Height_cat_Type2==2

Now I want to create a table for each unique possible outcome of the intervals for (Age_cats_type1==1)|(Age_cats_type1==2), (Height_cats_type1==1)|(Height_cats_type1==2), (Weight_cats_type1==1)|(Weight_cats_type1==2).

The desired output should look like the yellow image below. The table above is a summary of the possibilities for each invterval

This is closely linked to the following question, but when you follow the codes used here as outlined by BrodieG errors arise on the third iteration. In this example we use foverlaps in the data.table

I have used the following code

library(intervals)
# create our limits

AGE_cats_type1 <- Intervals(
  matrix(c(5, 15, 20, 30, 35, 40), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

AGE_cats_type2 <- Intervals(
  matrix(c(8, 13, 14, 16, 18, 40), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Height_cats_type1 <- Intervals(
  matrix(c(100, 120, 121, 140, 141, 186), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Height_cats_type2 <- Intervals(
  matrix(c(110, 125, 126, 145, 146, 190), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Weight_cats_type1 <- Intervals(
  matrix(c(50, 60, 61, 78, 79, 85), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Weight_cats_type2 <- Intervals(
  matrix(c(55, 75, 76, 90, 91, 100), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

#now format data
# first for age
library(data.table)
PEOPLE1 <- data.table(People)
PEOPLE1[, A1:=Age]

I_age_1 <- data.table(cbind(data.frame(AGE_cats_type1), idX=1:3, idY=0))
I_age_2 <- data.table(cbind(data.frame(AGE_cats_type2), idX=0, idY=1:3))

setkey(I_age_1, X1, X2)
setkey(I_age_2, X1, X2)

PEOPLE2 <- data.frame(rbind(
  foverlaps(PEOPLE1, I_age_1, by.x=c("Age", "A1"), nomatch=0),
  foverlaps(PEOPLE1, I_age_2, by.x=c("Age", "A1"), nomatch=0)))

####################################################
# second iteration for  height

PEOPLE3 <- data.table(PEOPLE2)
PEOPLE3[, B1:=Height]

I_height_1 <- data.table(cbind(data.frame(Height_cats_type1), idXa=1:3, idYa=0))
I_height_2 <- data.table(cbind(data.frame(Height_cats_type2), idXa=0, idYa=1:3))

setkey(I_height_1, X1, X2)
setkey(I_height_2, X1, X2)

PEOPLE4 <- data.frame(rbind(
  foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0),
  foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0)))

################################################
# third iteration

PEOPLE5 <- data.table(PEOPLE4)
PEOPLE5[, C1:=Weight]

I_weight_1 <- data.table(cbind(data.frame(Weight_cats_type1), idXb=1:3, idYb=0))
I_weight_2 <- data.table(cbind(data.frame(Weight_cats_type2), idXb=0, idYb=1:3))

setkey(I_weight_1, X1, X2)
setkey(I_weight_2, X1, X2)

PEOPLE6 <- data.frame(rbind(
  foverlaps(PEOPLE5, I_weight_1, by.x=c("Height", "B1"), nomatch=0),
  foverlaps(PEOPLE5, I_weight_2, by.x=c("Height", "B1"), nomatch=0)))

But get an error in PEOPLE6.

Error in setcolorder(ans, c(xcols1, ycols, xcols2)) : 
  neworder is length 16 but x has 18 columns.

When I look at PEOPLE4 we see idX idY idxA and idyA are the Age_cats_type1, Age_cats_type2, Height_cat_Type1 and Height_cat_Type2 values X

Community
  • 1
  • 1
lukeg
  • 1,327
  • 3
  • 10
  • 27
  • 1
    What code have you tried so far? – r2evans May 18 '15 at 19:57
  • 1
    You need to provide a somewhat more [reproducible problem](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example). Please start by listing actual code you have tried on this problem. Since you cite errors, it does not help us to read "errors arise on the third iteration"; *what errors?* – r2evans May 18 '15 at 20:13

1 Answers1

0

You have a few problems with the formulation of your question. Let's try to reconstruct what you meant.

# Let's first construct the data
Person <- c(1,2,3)
Age    <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)

People <- data.frame(Person, Age, Height, Weight)

# Results in something like this:
#   Person Age Height Weight
# 1      1  10    140     65
# 2      2  22    185     80
# 3      3  30    160     75

# Now we want to represent ranges. One way to do it would be:

Age_cats_type1 <- list(c(5, 15), c(20, 30), c(35, 45))
Age_cats_type2 <- list(c(8, 13), c(14, 16), c(18, 40))

Height_cat_Type1 <- list(c(100, 120), c(121, 140), c(141, 186))
Height_cat_Type2 <- list(c(110, 125), c(126, 145), c(146, 190))

Weight_cat_Type1 <- list(c(50, 60), c(61, 78), c(79, 85))
Weight_cat_Type2 <- list(c(55, 75), c(76, 90), c(91, 100))

# Then you mentioned something like People[1,1] meant age == 10.
# I believe you made a mistake here. If you type People[1, 1] in the console,
# you'll find you that it returns Person == 1. Therefore, I think
# that you meant to say the People data frame was constructed without the Person vector.

People <- data.frame(Age, Height, Weight)

# Now People[1, 1] returns age == 10.

# Then you went on to say that you wanted some function that returned Age_cats_type == 1
# Well, it seems that you want the first element of the list of ranges that contains the specified value.
# Then let's build it

contains_value <- function(range, value) {
    lower <- range[1]
    upper <- range[2]
    lower <= value && value <= upper
}

range_index <- function(ranges_list, value){
    which(sapply(ranges_list, contains_value, value))[1]
}

range_index(Age_cats_type1, People[1, 1]) # 1
range_index(Age_cats_type2, People[1, 1]) # 1
range_index(Height_cat_Type1, People[1, 2]) # 2
range_index(Height_cat_Type2, People[1, 2]) # 2

# Now I didn't understand what the table you were trying to construct was, but maybe these functions will help you build it.
thalesmello
  • 3,301
  • 3
  • 20
  • 20