0

I need to use a function, which I have implemented in R. This is my code:

IS_IV <- function(name,SR){  ## SR is in Hz
data <- get(name)
SR <- SR
n <- nrow(data)
p <- 60*60*24*SR  ## no. of data points per day
l <- 60*60*SR   ## no. of data points per hour
mean_all <- mean(data[1:n,])
## -----------------------------
## IS numerator calculation
for (h in 1:p){
x <- ((mean(data[h:(l+h-1),]))-mean_all)^2
if (h == 1){
  result_ISnum <- x
} else {
  result_ISnum <- rbind (result_ISnum, x)
}
}
ISnum <- sum(result_ISnum)
ISnumerator <- n*ISnum
## -----------------------------
## IS denominator calculation
  for (i in 1:n){
    y <- ((data[i,]-mean_all)^2)
    if (i == 1){
    result_ISdenom <- y
  } else {
    result_ISdenom <- rbind (result_ISdenom, y)
  }
}
ISdenom <- sum(result_ISdenom)  
## -----------------------------
ISdenominator <- p*ISdenom
## -----------------------------
## IS calculation
IS <- ISnumerator/ISdenominator
## -----------------------------

## -----------------------------
## IV numerator calculation
  for (k in 2:n){
    x <- ((data[i,]-data[(i-1),])^2)
    if (k == 1){
      result_IVnum <- x
    } else {
      result_IVnum <- rbind (result_IVnum, x)
    }
  }
  IVnum <- sum(result_IVnum)
  IVnumerator <- n*IVnum
## -----------------------------
## IV denominator calculation
IVdenominator <- (n-1)*ISdenom ## uses ISdenom as only the multiplier is different
## -----------------------------
## IV calculation
IV <- IVnumerator/IVdenominator

result <- c(IS, IV)
colnames(result) <- c("Interday Stability (IS)", "Intraday Variability (IV)")
return(result)
}

Obviously, what makes the process so slow is the calculation of the ISdenominator, because it has to loop through every single data point (may be up to 800,000 or even more).

The question now is whether I will just have to live with this and look for the fastest computer around or whether you see an opportunity, to speed the whole thing up.

Thanks a lot!

Christine Blume
  • 507
  • 3
  • 7
  • 17

1 Answers1

0

Your function has some issues i corrected them.

IS_IV <- function(name,SR){  ## SR is in Hz
  data <- name
  SR <- SR
  n <- nrow(data)
  p <- 60*60*24*SR  ## no. of data points per day
  l <- 60*60*SR   ## no. of data points per hour
  mean_all <- mean(data[1:n,])
  ## -----------------------------
  ## IS numerator calculation
  for (h in 1:p){
    x <- ((mean(data[h:(l+h-1),]))-mean_all)^2
    if (h == 1){
      result_ISnum <- x
    } else {
      result_ISnum <- rbind (result_ISnum, x)
    }
  }

  ISnum <- sum(result_ISnum)
  ISnumerator <- n*ISnum
  ## -----------------------------
  ## IS denominator calculation
  for (i in 1:n){
    y <- ((data[i,]-mean_all)^2)
    if (i == 1){
      result_ISdenom <- y
    } else {
      result_ISdenom <- rbind (result_ISdenom, y)
    }
  }

  ISdenom <- sum(result_ISdenom)  
  ## -----------------------------
  ISdenominator <- p*ISdenom
  ## -----------------------------
  ## IS calculation
  IS <- ISnumerator/ISdenominator
  ## -----------------------------

  ## -----------------------------
  ## IV numerator calculation
  for (k in 2:n){
    x <- ((data[k,]-data[(k-1),])^2)
    if (k == 2){
      result_IVnum <- x
    } else {
      result_IVnum <- rbind (result_IVnum, x)
    }
  }

  IVnum <- sum(result_IVnum)
  IVnumerator <- n*IVnum
  ## -----------------------------
  ## IV denominator calculation
  IVdenominator <- (n-1)*ISdenom ## uses ISdenom as only the multiplier is different
  ## -----------------------------
  ## IV calculation
  IV <- IVnumerator/IVdenominator

  result <- c(IS, IV)
  names(result) <- c("Interday Stability (IS)", "Intraday Variability (IV)")
  return(result)
}

I corrected above function.

IS_IV_vck<-function(name,SR){
  data <- name
  SR <- SR
  n <- nrow(data)
  p <- 60*60*24*SR  ## no. of data points per day
  l <- 60*60*SR   ## no. of data points per hour
  mean_all <- mean(data[1:n,])
  ## -----------------------------
  ## IS numerator calculation
  result_ISnum<- matrix(sapply(1:p,FUN = function(x) ((mean(data[x:(l+x-1),]))-mean_all)^2),ncol = 1)
  ISnum <- sum(result_ISnum)
  ISnumerator <- n*ISnum
  ## -----------------------------
  ## IS denominator calculation
  result_ISdenom <- (data-mean_all)^2
  ISdenom <- sum(result_ISdenom)  
  ## -----------------------------
  ISdenominator <- p*ISdenom
  ## -----------------------------
  ## IS calculation
  IS <- ISnumerator/ISdenominator
  ## -----------------------------

  ## -----------------------------
  ## IV numerator calculation
  result_IVnum <- diff(data)^2
  IVnum <- sum(result_IVnum)
  IVnumerator <- n*IVnum
  ## -----------------------------
  ## IV denominator calculation
  IVdenominator <- (n-1)*ISdenom ## uses ISdenom as only the multiplier is different
  ## -----------------------------
  ## IV calculation
  IV <- IVnumerator/IVdenominator

  result <- c(IS, IV)
  names(result) <- c("Interday Stability (IS)", "Intraday Variability (IV)")
  return(result)
}

set.seed(123)
data<-matrix(rnorm(50000,5,2),ncol = 2)

> microbenchmark(IS_IV(data,.1),IS_IV_vck(data,.1),times = 3)
Unit: milliseconds
                 expr        min         lq       mean     median         uq          max neval
 IS_IV(data, 0.1)     20182.1790 20191.8787 20246.1370 20201.5784 20278.1160 20354.6535     3
 IS_IV_vck(data, 0.1)   167.3961   169.2664   170.8591   171.1366   172.5905   174.0444     3

res1<-IS_IV(data,.1)
res2<-IS_IV_vck(data,.1)
> identical(res1,res2)
[1] TRUE
vck
  • 827
  • 5
  • 10