0

I still get tripped up using ifelse and if...else when I want to create a vector or new data.frame variable. The title of this question seems closely related, but does not address my issue: Why can't R's ifelse statements return vectors?

The code below shows my attempts to create the variables my.data2$v1b and my.data2$v2b. I failed with ifelse and if...else then succeeded with a for-loop and with apply.

Is there a way to create my.data2$v1b and my.data2$v2b with ifelse or if...else? I assume not based on my attempts and other Stack Overflow questions. So, what is the canonical way of creating these variables in R? Using apply works, but seems rather complex. Using a for-loop works but I get the impression for-loops are to be avoided.

There are many questions about ifelse, but I did not locate one that addressed this specific question: given that ifelse and if...else do not seem to work, what is the best solution? Sorry if this is a duplicate.

Here is my data set:

my.data2 <- read.table(text = '
    refno  v1  v2 state1 state2 xday first last
      111  41  47      1      2   42     1    2
      111  41  47      1      2   42     2    1
      222  45  49      1      4   47     1    2
      222  45  49      1      4   47     2    1
      333  59  65      1      2   65     1    2
      333  59  65      1      2   65     2    1
      444  45  49      1      2   48     1    2
      444  45  49      1      2   48     2    1
      555  66  80      1      2   75     1    2
      555  66  80      1      2   75     2    1
      666 103 109      1      2  108     1    2
      666 103 109      1      2  108     2    1
      777  43  46      1      2   45     1    2
      777  43  46      1      2   45     2    1
', header = TRUE, stringsAsFactors = FALSE)

Here are the desired vectors:

desired.data.v1b <- c(41,42, 45,47, 59,65, 45,48, 66,75, 103,108, 43,45)
desired.data.v2b <- c(42,47, 47,49, 65,65, 48,49, 75,80, 108,109, 45,46)

Here is where I start attempting to create these vectors:

v1b <- my.data2$v1
v2b <- my.data2$v2

# this ifelse does not work
my.data2$v1b < ifelse(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) & my.data2$last  == 1, my.data2$xday, my.data2$v1)
my.data2$v2b < ifelse(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) & my.data2$first == 1, my.data2$xday, my.data2$v2)

# this if...else does not work
if(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) & my.data2$last  == 1) {v1b = my.data2$xday} else {v1b = my.data2$v1}
if(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) & my.data2$first == 1) {v2b = my.data2$xday} else {v2b = my.data2$v2}

# this for-loop works
for(i in 1:nrow(my.data2)) {

     if(my.data2$state1[i] == 1 & my.data2$state2[i] %in% c(2,4) &   my.data2$last[i]  == 1)  {v1b[i] = my.data2$xday[i]}
     if(my.data2$state1[i] == 1 & my.data2$state2[i] %in% c(2,4) & !(my.data2$last[i]  == 1)) {v1b[i] = my.data2$v1[i]  }

     if(my.data2$state1[i] == 1 & my.data2$state2[i] %in% c(2,4) &   my.data2$first[i] == 1)  {v2b[i] = my.data2$xday[i]}
     if(my.data2$state1[i] == 1 & my.data2$state2[i] %in% c(2,4) & !(my.data2$first[i] == 1)) {v2b[i] = my.data2$v2[i]  }

}

all.equal(desired.data.v1b, v1b)
all.equal(desired.data.v2b, v2b)

my.data2$v1b <- v1b
my.data2$v2b <- v2b

# this apply works
my.v1 <- apply(my.data2, 1, function(x) {if (x['state1'] == 1 & x['state2'] %in% c(2,4) & x['last']  == 1) {x['v1b'] = x['xday']} else {x['v1b'] = x['v1']}})
my.v2 <- apply(my.data2, 1, function(x) {if (x['state1'] == 1 & x['state2'] %in% c(2,4) & x['first'] == 1) {x['v2b'] = x['xday']} else {x['v2b'] = x['v2']}})
names(my.v1) <- NULL
names(my.v2) <- NULL

all.equal(desired.data.v1b, my.v1)
all.equal(desired.data.v2b, my.v2)

EDIT

Maybe this is the canonical solution?

my.data2$v1b <- rep(-99, nrow(my.data2))
my.data2$v2b <- rep(-99, nrow(my.data2))

my.data2$v1b[(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) &   my.data2$last  == 1) ] <- my.data2$xday[(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) &   my.data2$last  == 1) ]
my.data2$v1b[(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) & !(my.data2$last  == 1))] <- my.data2$v1[  (my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) & !(my.data2$last  == 1))]

my.data2$v2b[(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) &   my.data2$first == 1) ] <- my.data2$xday[(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) &   my.data2$first == 1) ]
my.data2$v2b[(my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) & !(my.data2$first == 1))] <- my.data2$v2[  (my.data2$state1 == 1 & my.data2$state2 %in% c(2,4) & !(my.data2$first == 1))]

all.equal(desired.data.v1b, my.data2$v1b)
all.equal(desired.data.v2b, my.data2$v2b)
Community
  • 1
  • 1
Mark Miller
  • 12,483
  • 23
  • 78
  • 132
  • 3
    You won't believe it: your first ifelse statement is perfect, but somehow the `<-` sign got messed up as `<`. My personal advice is to avoid `<-` altogether and use only `=`... – Jealie Oct 23 '15 at 17:01
  • 1
    Thank you. I think this means it is time to step away from the computer for a while. – Mark Miller Oct 23 '15 at 17:06
  • 1
    I think the apply solution is the most legible of your proposed options. Are you open to using `data.table`? That could give you another, rather more elegant option. Condensed in one statement: `as.data.table(my.data2)[, v1b := ifelse(state1 == 1 & state2 %in% c(2,4) & last == 1, xday, v1)]$v1b`. – cocquemas Oct 23 '15 at 17:28

0 Answers0