0

Is there a way to approximate how long a for loop will take to run? I have a loop with about 500,000 iterations which does some basic calculations and its been running for a while now. I'm skeptical that it might be a never-ending loop.

Here is the code:

mod<- function(file, level = 5){
  df<- read.csv(file = file,header = FALSE,sep = "", col.names = c("DateTime","Seq","BP1","BQ1","BO1","AP1","AQ1","AO1","BP2","BQ2","BO2","AP2","AQ2","AO2","BP3","BQ3","BO3","AP3","AQ3","AO3","BP4","BQ4","BO4","AP4","AQ4","AO4","BP5","BQ5","BO5","AP5","AQ5","AO5","BP6","BQ6","BO6","AP6","AQ6","AO6","BP7","BQ7","BO7","AP7","AQ7","AO7","BP8","BQ8","BO8","AP8","AQ8","AO8","BP9","BQ9","BO9","AP9","AQ9","AO9","BP10","BQ10","BO10","AP10","AQ10","AO10","C","Price","Qty","OldPrice","OldQty"))
  df<- df[which(df$DateTime != 0),]
  df$DateTime= as.POSIXct(df$DateTime/(10^9), origin="1970-01-01")    #timestamp conversion
  change = c()
  for(i in 2:nrow(df)){
    if(is.na(df[i,6]) == TRUE){
      change[i] = 0
      next
    } else if(is.na(df[i,63]) == TRUE){
      change[i] = 0
      next
    }
    #browser()
    if(df[i,63] == "N"){
      a = which(df[i,] == df[i,64])
      if(a[1] > 32){
        change[i] = 0
      } else if(a[1] < 32){
        change[i] = a[1]
      }
      change
    }
    #browser()
    if(df[i,63] == "C"){
      a = which(df[i,] == df[i,64])
      if(a[1] > 32){
        change[i] = 0
      }else if(a[1] < 32){
        change[i] = a[1]*-1
      }
      change
    }
    #browser()
    if(df[i,63] == "M"){
      a = which(df[i,] == df[i,64])
      b = which(df[i-1,] == df[i,66])
      if(a[1] > 32 & b[1] > 32){
        change[i] = 0
      } else if(a[1] < 32 & b[1] > 32){
        change[i] = a[1]
      } else if(a[1] < 32 & b[1] < 32){
        change[i] = b[1] - a[1]
      }
      #browser()
      change
    }
    change
  }
  change
}

What I am trying to do is first see what column 63("C") says, if it is "N" or "C" then look at column 64("Price") and locate its position in that row, apart from column 64 itself, and then assign the column number to change[i]. Make it negative if col63 was "C" and positive if col63 was "N"

If column 63("C") says "M" then look at column 66("OldPrice") first and locate it in the previous row i-1. Then locate the price in column 64("Price") in the same row and take the difference between them(the column numbers) and assign it to change[i]

So the output should be a vector of negative or positive integers.

> dput(df[1:20,])
structure(list(DateTime = c(1.448855100369e+18, 1.448855100369e+18, 
1.448855100375e+18, 1.448855100376e+18, 1.448855100378e+18, 1.448855100379e+18, 
1.44885510038e+18, 1.44885510038e+18, 1.44885510038e+18, 1.448855100383e+18, 
1.448855100384e+18, 1.448855100385e+18, 1.448855100385e+18, 1.448855100385e+18, 
1.448855100386e+18, 1.448855100386e+18, 1.448855100386e+18, 1.448855100387e+18, 
1.448855100389e+18, 1.448855100389e+18), Seq = c(92L, 108L, 406L, 
479L, 643L, 722L, 811L, 822L, 828L, 1046L, 1103L, 1171L, 1186L, 
1196L, 1238L, 1249L, 1254L, 1273L, 1333L, 1343L), BP1 = c(80830L, 
80830L, 81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 
81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 81100L, 81200L, 
81200L, 81200L, 81200L), BQ1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BO1 = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), AP1 = c(0L, 83435L, 83435L, 82165L, 82165L, 82165L, 
82165L, 82165L, 82345L, 82345L, 82165L, 82345L, 82345L, 82165L, 
82340L, 82340L, 82340L, 82340L, 82165L, 82340L), AQ1 = c(0L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), AO1 = c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP2 = c(0L, 0L, 80830L, 
80830L, 80830L, 80830L, 80830L, 80835L, 80835L, 80835L, 80835L, 
80835L, 80835L, 80835L, 80835L, 80835L, 81100L, 81100L, 81100L, 
81100L), BQ2 = c(0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BO2 = c(0L, 0L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), AP2 = c(0L, 0L, 0L, 83435L, 83200L, 82650L, 82650L, 82650L, 
82650L, 82650L, 82650L, 82650L, 82650L, 82650L, 82650L, 82650L, 
82650L, 82650L, 82650L, 82650L), AQ2 = c(0L, 0L, 0L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
    AO2 = c(0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L), BP3 = c(0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 80830L, 80830L, 80830L, 80830L, 80830L, 80830L, 80830L, 
    80830L, 80830L, 80835L, 80835L, 80835L, 80835L), BQ3 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), BO3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AP3 = c(0L, 
    0L, 0L, 0L, 83435L, 83200L, 83200L, 83200L, 83200L, 83200L, 
    83200L, 83200L, 82900L, 82900L, 82900L, 82900L, 82900L, 82900L, 
    82900L, 82900L), AQ3 = c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AO3 = c(0L, 
    0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), BP4 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 80830L, 80830L, 80830L, 80830L
    ), BQ4 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), BO4 = c(0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
    1L), AP4 = c(0L, 0L, 0L, 0L, 0L, 83435L, 83430L, 83430L, 
    83430L, 83430L, 83430L, 83430L, 83200L, 83200L, 83200L, 83200L, 
    83200L, 83200L, 83200L, 83200L), AQ4 = c(0L, 0L, 0L, 0L, 
    0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L), AO4 = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP5 = c(0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 80035L, 
    80035L, 80035L), BQ5 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L), BO5 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 1L, 1L, 1L), AP5 = c(0L, 0L, 0L, 0L, 0L, 0L, 83435L, 
    83435L, 83435L, 83435L, 83435L, 83435L, 83430L, 83430L, 83430L, 
    83430L, 83430L, 83430L, 83430L, 83430L), AQ5 = c(0L, 0L, 
    0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), AO5 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP6 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BQ6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BO6 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), AP6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 83500L, 83500L, 83500L, 83435L, 83435L, 83435L, 83435L, 
    83435L, 83435L, 83435L, 83435L), AQ6 = c(0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), AO6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), BP7 = c(0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L), BQ7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BO7 = c(0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L), AP7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 83500L, 83500L, 83500L, 83500L, 83500L, 83500L, 
    83500L, 83500L), AQ7 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), AO7 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), BP8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BQ8 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BO8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AP8 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), AQ8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AO8 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BP9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BQ9 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BO9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AP9 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), AQ9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AO9 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BP10 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), BQ10 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), BO10 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AP10 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), AQ10 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), AO10 = c(0L, 
    0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
    0L, 0L, 0L, 0L), C = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 3L, 4L, 3L, 3L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 3L), .Label = c("", 
    "C", "M", "N"), class = "factor"), Price = c(80830L, 83435L, 
    81100L, 82165L, 83200L, 82650L, 83430L, 80835L, 82345L, 83500L, 
    82165L, 82345L, 82900L, 82165L, 82340L, 83200L, 81200L, 80035L, 
    82165L, 82340L), Qty = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), OldPrice = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, 82165L, NA, 82345L, 82165L, NA, 
    82345L, 82165L, NA, NA, NA, 82340L, 82165L), OldQty = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L, 1L, NA, 1L, 1L, NA, 
    NA, NA, 1L, 1L)), .Names = c("DateTime", "Seq", "BP1", "BQ1", 
"BO1", "AP1", "AQ1", "AO1", "BP2", "BQ2", "BO2", "AP2", "AQ2", 
"AO2", "BP3", "BQ3", "BO3", "AP3", "AQ3", "AO3", "BP4", "BQ4", 
"BO4", "AP4", "AQ4", "AO4", "BP5", "BQ5", "BO5", "AP5", "AQ5", 
"AO5", "BP6", "BQ6", "BO6", "AP6", "AQ6", "AO6", "BP7", "BQ7", 
"BO7", "AP7", "AQ7", "AO7", "BP8", "BQ8", "BO8", "AP8", "AQ8", 
"AO8", "BP9", "BQ9", "BO9", "AP9", "AQ9", "AO9", "BP10", "BQ10", 
"BO10", "AP10", "AQ10", "AO10", "C", "Price", "Qty", "OldPrice", 
"OldQty"), row.names = c(NA, 20L), class = "data.frame")
UtdMan
  • 151
  • 2
  • 12
  • 2
    Anything that estimates and adds a time bar that I have seen also adds a fairly significant calculation cost making the whole thing take even longer. You could seriously speed that code up by vectorising, it is terribly inefficient. – JeremyS Mar 14 '16 at 06:36
  • Any suggestions on how? – UtdMan Mar 14 '16 at 06:38
  • Your question at the moment seems to be more on estimating the time, in which case I suggest checking out `microbenchmark()`. Vectorising may indeed speed things up, but may be an answer to a different question. – Ricky Mar 14 '16 at 06:48
  • 1
    You should state the logic of how you build your desired output and an example of a typical input. Also notice that some of the lines that just contain `change` won't cause the function to exit and to return `change` (if that was your intention). You should declare `return(change)`. Remember that the result of an expression is returned only if it is the *last* evaluated expression. This seem not to be the case in some instances. – nicola Mar 14 '16 at 06:49
  • 1
    give an example of the input data and expected output. – JeremyS Mar 14 '16 at 06:54
  • here is the data: https://drive.google.com/file/d/0BwwnLqZcphbET25BTlpJbVA5U00/view?usp=sharing – UtdMan Mar 14 '16 at 07:06
  • The output Im looking for as of now is a vector of positive and negative integers, which is what change is calculating. Later Im gonna need a data.frame with a DateTime column and its corresponding change values. – UtdMan Mar 14 '16 at 07:08
  • 234Mb, no thanks. Just give the first 10 rows or something - `dput(df[1:10,])`, edit it into the question. – JeremyS Mar 14 '16 at 07:34
  • Haha. okay. Added it, also made a tiny change to the code, which I realised was a mistake – UtdMan Mar 14 '16 at 07:45
  • Of course your code should be vectorized (but you don't explain what it is supposed to do, so I won't bother looking into this). However, you can already speed this up a lot by avoiding the cardinal mistake of growing an object in a loop. Pre-allocate `change`, e.g., `change <- integer(nrow(df))`. – Roland Mar 14 '16 at 08:22
  • @Roland and @JeremyS Correct me if Im wrong, but I don't think I can vectorize because of the last part of the code. After. `if(df[i,63] == "M")` when Im evaluating `b = which(df[i-1,] == df[i,66])` I need to look back at the previous row in order to do my calculations. – UtdMan Mar 14 '16 at 08:36
  • @UtdMan You have been told what is required if you want someone looking into this. I won't mentally parse your longish code. Read this: http://stackoverflow.com/a/5963610/1412059 – Roland Mar 14 '16 at 08:39
  • Oh right, either I missed that `i-1` or that was the error you corrected. It is a big file, maybe just switching `read.csv` with data.table's `fread` will help a lot – JeremyS Mar 14 '16 at 08:50
  • @JeremyS That was in-fact the change I made. So am I right then that vertorization is not possible? I did try to use fread and posted a question about it here too. Its not being able to handle the missing values like I want it to or how read.csv does. http://stackoverflow.com/questions/35908249/error-in-freaddata-table-becasue-its-not-reading-nas-correctly-as-i-want-it-to – UtdMan Mar 14 '16 at 09:05
  • 1. Your bottleneck is not data import. It is that you grow an object in a loop. Substitute the line `change = c()` with `change <- integer(nrow(df))` and you should see a tremendous speed-up. 2. From your description it should be possible to at least partly vectorize this. But you don't provide data in a way which allows easy reproduction for us. – Roland Mar 14 '16 at 09:14
  • How do you suggest I provide the data? I also provided the link for the entire file above – UtdMan Mar 14 '16 at 09:18
  • I have provided a link to the relevant FAQ. – Roland Mar 14 '16 at 09:32
  • Added. My code is now giving me errors after I made changes. Hold off on evaluating my code if thats what you're trying to do. I am figuring out whats wrong, I will post the edits once I do. – UtdMan Mar 14 '16 at 09:43
  • if `which(df[(i-1),] == df[i,66])` has not match the return is `integer(0)` – JeremyS Mar 14 '16 at 09:48
  • It has to match in (i-1), the error Im getting is there too. – UtdMan Mar 14 '16 at 10:05
  • when I need to use the previous or next row in a function I add a new column of the value. e.g. `df$next <- c(df[2:20,66],NA)` then `which(df[(i-1),] == df[i,66])` becomes `which(df[i,] == df[i,68])` – JeremyS Mar 15 '16 at 03:11

1 Answers1

1

Here is how I would do this. The only loop needed is to apply which, which should be fast:

#find column matches for price
DF$change <- apply(DF[, 3:62] == DF[,64], 1, which) + 2L
#negative for C
DF$change[DF[,63] == "C"] <- DF$change[DF[,63] == "C"] * (-1)
#column matches for old price in preceding row if M
pos2 <- apply(DF[which(DF[,63] == "M") - 1, 3:62] == DF[DF[,63] == "M",66], 1, which) + 2L
#assign the difference
DF$change[DF[,63] == "M"] <- pos2 - DF$change[DF[,63] == "M"]
DF$change
#[1]  3  6  3  6 12 12 24  9  0 36  0  0 18  0  0 24  3 27  0  0

This assumes that there is always a matching column. If that's not the case wrap which in a function that returns NA if which returns integer(0).

Roland
  • 127,288
  • 10
  • 191
  • 288
  • DF$change[DF[,63] == "C"] <- DF$change[DF[,63] == "C"] * (-1) this part gives me. `Error in df$change[df[, 63] == "C"] * (-1) : non-numeric argument to binary operator` I tried producing rows that have "C" first and then multiplying by -1 but that didnt work either – UtdMan Mar 16 '16 at 10:27
  • Note the different capitalization of `DF`. (I don't like calling data.frames `df` because that's the name of a function.) – Roland Mar 16 '16 at 10:32
  • Cmon!! I realise that, I undertand Im a newb but not that bad. I ran it with my own names. Just copy pasted it from above in a hurry. Plus I wouldnt have gotten the error I got if that was the mistake – UtdMan Mar 16 '16 at 10:37
  • `*` is complaining that `DF$change[DF[,63] == "C"]` is not numeric. That should not be the case since `which` returns integers. Investigate why `DF$change` is not numeric. – Roland Mar 16 '16 at 11:13
  • It definitely is numeric. Could it be because doesn't match sometimes, so it gives `df$change = integer(0)` – UtdMan Mar 16 '16 at 11:21
  • @UtdMan Then it wouldn't be a numeric vector, but a list. The last sentence of my answer would apply in that case. – Roland Mar 16 '16 at 11:22
  • Oh and for "C" it also needs to look back at the previous row, not the same. Sorry, that's a mistake I debugged today. – UtdMan Mar 16 '16 at 11:23
  • Whats the reason for you putting a +2L at the end of the expression? Wouldn't you get the same answere if you just did `apply(DF[, 1:62] == DF[,64], 1, which)` ? – UtdMan Mar 16 '16 at 12:10
  • I was actually unsure if that is needed. I added it to get the actual column number since the first two columns were excluded from the lookup (and looking at your data they should be excluded). – Roland Mar 16 '16 at 12:11
  • Okay. So I have the vectorization set, and have the values I need for the moment in `df$change`. But what do I do about the conditions I had, like different calculations for odd/even, or if ts above 32 `change = 0` etc. Simply write if statements? – UtdMan Mar 16 '16 at 12:24
  • No. Forget `if`. It's not vectorized. Use subsetting. Something like `DF$change[seq_len(nrow(DF)) %% 2 == 1L] <- ...`. (Or even easier and more efficient with vector recycling: `DF$change[c(TRUE, FALSE)] <- ...`) – Roland Mar 16 '16 at 12:31
  • Okay, but it also differs according to "C" "N" "M" – UtdMan Mar 16 '16 at 12:58