3

My dataset contains 2 variables y and t [05s]. y was measured every 05 seconds.

I am trying to calculate the average slope within a moving 20-second-window, i.e. after calculating the first 20-second slope value the window moves forward one time unit (05 seconds) and calculates the next 20-second-window, producing successive 20-second slope values at 05-second increments.

I thought that calculating a rolling regression with rollapply (zoo package) would do the trick, but I get the same intercept and slope values for each window over and over again. What can I do?

My data:

dput(DataExample)
structure(list(t = c(0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 
0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 
1, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.55, 
1.6, 1.65, 1.7, 1.75, 1.8, 1.85, 1.9, 1.95, 2, 2.05, 2.1, 2.15, 
2.2, 2.25, 2.3, 2.35, 2.4, 2.45, 2.5, 2.55, 2.6, 2.65, 2.7, 2.75, 
2.8, 2.85, 2.9, 2.95, 3, 3.05, 3.1, 3.15, 3.2, 3.25, 3.3, 3.35, 
3.4, 3.45, 3.5, 3.55, 3.6, 3.65, 3.7, 3.75, 3.8, 3.85, 3.9, 3.95, 
4, 4.05, 4.1, 4.15, 4.2, 4.25, 4.3, 4.35, 4.4, 4.45, 4.5, 4.55, 
4.6, 4.65, 4.7, 4.75, 4.8, 4.85, 4.9, 4.95, 5, 5.05, 5.1, 5.15, 
5.2, 5.25, 5.3, 5.35, 5.4, 5.45, 5.5, 5.55, 5.6, 5.65, 5.7, 5.75, 
5.8, 5.85, 5.9, 5.95, 6, 6.05, 6.1, 6.15, 6.2, 6.25, 6.3, 6.35, 
6.4, 6.45, 6.5, 6.55, 6.6, 6.65, 6.7, 6.75, 6.8, 6.85, 6.9, 6.95, 
7, 7.05, 7.1, 7.15, 7.2, 7.25, 7.3, 7.35, 7.4, 7.45, 7.5, 7.55, 
7.6, 7.65, 7.7, 7.75, 7.8, 7.85, 7.9, 7.95, 8, 8.05, 8.1, 8.15, 
8.2, 8.25, 8.3, 8.35, 8.4, 8.45, 8.5, 8.55, 8.6, 8.65, 8.7, 8.75, 
8.8, 8.85, 8.9, 8.95, 9, 9.05, 9.1, 9.15, 9.2, 9.25, 9.3, 9.35, 
9.4, 9.45, 9.5, 9.55, 9.6, 9.65, 9.7, 9.75, 9.8, 9.85, 9.9, 9.95, 
10, 10.05, 10.1, 10.15, 10.2, 10.25, 10.3), y = c(3.05, 3.04, 
3.02, 3.05, 3.01, 3.02, 3.02, 3.05, 3.02, 3.01, 3.04, 3.04, 3.03, 
3.03, 3.03, 3.02, 3.02, 3.03, 3.03, 3.03, 3.04, 3.03, 3.03, 3.03, 
3.03, 3.02, 3.02, 3.02, 3.01, 3.03, 3.03, 3.03, 3.03, 3.03, 3.02, 
3.01, 3.02, 3.02, 3.01, 3.02, 3.02, 3.02, 3.03, 3.02, 3.02, 3.01, 
3.01, 3.02, 3.01, 3.02, 3.02, 3.02, 3.02, 3.01, 3.01, 3.01, 3.01, 
3.02, 3, 3.01, 3.02, 3.02, 3.02, 3.01, 3.01, 3.01, 3.01, 3.02, 
3, 3.01, 3.01, 3.01, 3.01, 3.01, 3.01, 3, 3, 3.01, 3, 3, 3.01, 
3.01, 3.01, 3.01, 3, 3, 3, 3.01, 3, 3, 3.01, 3.01, 3.01, 3.01, 
3.01, 3.01, 3, 3.02, 3, 3.01, 3.02, 3.04, 3.05, 3.08, 3.04, 3.06, 
3.08, 3.06, 3.08, 3.09, 3.04, 3.05, 3.07, 3.08, 3.06, 3.08, 3.08, 
3.07, 3.08, 3.08, 3.05, 3.06, 3.07, 3.07, 3.06, 3.08, 3.08, 3.08, 
3.08, 3.08, 3.05, 3.06, 3.08, 3.08, 3.06, 3.09, 3.07, 3.08, 3.08, 
3.08, 3.06, 3.07, 3.07, 3.07, 3.06, 3.09, 3.07, 3.07, 3.08, 3.08, 
3.06, 3.07, 3.07, 3.07, 3.06, 3.09, 3.07, 3.07, 3.07, 3.08, 3.07, 
3.07, 3.07, 3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.08, 3.07, 3.07, 
3.07, 3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.08, 3.06, 3.07, 3.06, 
3.07, 3.06, 3.08, 3.07, 3.07, 3.06, 3.07, 3.06, 3.07, 3.06, 3.07, 
3.06, 3.07, 3.06, 3.06, 3.06, 3.07, 3.04, 3.04, 3.04, 3.06, 3.06, 
3.04, 3.04)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-207L), .Names = c("t", "y"))

R-Code:

require(zoo)
library("zoo", lib.loc="~/R/win-library/3.3")
rollapply(zoo(DataExample),
          width=5,
          FUN = function(Z) 
          { 
            z = lm(formula=y~t, data = as.data.frame(DataExample)); 
            return(z$coef) 
          }, by=1,
          by.column=FALSE, align="right")
Mark Miller
  • 12,483
  • 23
  • 78
  • 132
Eva
  • 33
  • 1
  • 4
  • Please `dput(DataExample)` your data. – Christoph Dec 09 '16 at 13:04
  • Sorry for my naive question, but how can I do this on stackoverflow? – Eva Dec 09 '16 at 13:05
  • check [this](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example). Type dput(mydata) and paste that into your question. If there is too much data make a sample. – Henk Dec 09 '16 at 13:06
  • And [this](http://stackoverflow.com/questions/743812/calculating-moving-average-in-r) – Christoph Dec 09 '16 at 13:06
  • Okay, that makes sense...Do you have an idea about how I canchange it to fit "moving subsets"? I thought that rollapply would to the moving part... Or do I need a completely new approach? – Eva Dec 09 '16 at 13:43
  • Calculating the slope for 5 datapoints, then moving forward one time unit and calculatig the slope for the next 5 datapoints... I don't know if I make myself clear ;) – Eva Dec 09 '16 at 13:47
  • 2
    @Zheyuan Li - Sorry, i kind of overlooked the second part of your answer... I've tried it out... It does the trick, you're awesome!! Thank you so, so much!! ( : – Eva Dec 09 '16 at 14:51
  • @Zheyuan Li That is so nice of you, but I would rather vote your comment/answer up ;) I am new to stackoverflow and not sure how to do it. Maybe you can post the answer to the question and I can vote your answer up? – Eva Dec 09 '16 at 15:18

4 Answers4

4

The comment seems to have been deleted but it was pointed out that the function in rollapply in the code in the question was not using the argument passed to it. After fixing that and making some other minor improvements, this returns the intercept and the slope in columns 1 and 2 respectively.

library(zoo)

Coef <- function(Z) coef(lm(y ~ t, as.data.frame(Z)))    
rollapplyr(zoo(DataExample), 5, Coef, by.column = FALSE)
G. Grothendieck
  • 254,981
  • 17
  • 203
  • 341
  • As shared by Jot eN [here](https://stackoverflow.com/a/39820005/2444948), `lm` is pretty slow. Use `.lm.fit` instead. For example `coef(.lm.fit(cbind(1, t), y))` for your example. Or `coef(.lm.fit(cbind(1, t), y))[2]` to get the slope. – Dorian Grv Oct 10 '17 at 11:33
4

Here a complete code to illustrate what I was meaning with the speed of .lm.fit and lm. As well as a usage with data.table.

library(zoo)
library(data.table)
library(ggplot2)
theme_set(theme_bw())
library(microbenchmark)

# function for linear regression and find the slope coefficient
rollingSlope.lm <- function(vector) {

  a <- coef(lm(vector ~ seq(vector)))[2]
  return(a)

}

rollingSlope.lm.fit <- function(vector) {

  a <- coef(.lm.fit(cbind(1, seq(vector)), vector))[2]
  return(a)

}

# create data example
test <- data.table(x = seq(100), y = dnorm(seq(100), mean=75, sd=30))
ggplot(test, aes(x, y))+ geom_point()

enter image description here

# graphics about the slope calculated
test[, ':=' (Slope.lm.fit = rollapply(y, width=5, FUN=rollingSlope.lm.fit, fill=NA),
             Slope.lm = rollapply(y, width=5, FUN=rollingSlope.lm, fill=NA))]
# change the width size
test[, ':=' (Slope.lm.fit.50 = rollapply(y, width=50, FUN=rollingSlope.lm.fit, fill=NA),
             Slope.lm.50 = rollapply(y, width=50, FUN=rollingSlope.lm, fill=NA))]
# melt data for plotting
test2 <- melt.data.table(test, measure.vars=c("Slope.lm.fit", "Slope.lm", "Slope.lm.fit.50", "Slope.lm.50"))
ggplot(test2, aes(x, value))+ geom_point(aes(color=variable))

enter image description here

# efficiency of the 2 lm
mb <- microbenchmark(lm.fit = a <- rollapply(test$y, 5, rollingSlope.lm.fit, fill=NA),
                     lm = b <- rollapply(test$y, 5, rollingSlope.lm, fill=NA))
# check if they equal
all.equal(a, b, check.attributes=FALSE)
    # TRUE
# plot results
boxplot(mb, unit="ms", notch=TRUE)

enter image description here

Dorian Grv
  • 421
  • 5
  • 9
0

This is how I would go about doing it without the zoo library

## Modified version of your function that does not rely on accessing
## variables that is external to its environment.
slopes<-function(data) { 
            z = lm(formula=y~t, data=data ); 
            z$coef ## Implicit return of last variable
}

## The number of frames to take the windowed slope of
windowsize<-4

do.call(rbind,lapply(seq(dim(data)[1]-windowsize),
                     function(x) slopes(data[x:(x+windowsize),])))

It iterates over a list from 1 to length data - windowsize subsetting data into overlapping window sizes of 4. The subsetted data is then passed to your slopes function before being bound into a single array.

0

I've tried to plot slopes as geom_segment() but I failed. At least I've got the df with different values for slope:

slope <- function(dat){
        return(data.frame(t = sprintf("[%f,%f]", min(dat$t), max(dat$t)),
                          slope = lm(y~t-1, data = dat)$coef, 
                          row.names = NULL)
               )
}

mw <- function(dtf, wdth = 0.2, incr = 0.05){
        if(!nrow(dtf)){
                return(data.frame())
        }
        return(rbind(slope(dtf[dtf$t <= min(dtf$t) + wdth,]),
                mw(dtf[dtf$t >= min(dtf$t) + incr,])
                )
        )
}


slp <- mw(dtf)
head(slp)
tail(slp)

#                    t     slope
#  1 [0.000000,0.200000] 20.180000
#  2 [0.050000,0.250000] 16.498182
#  3 [0.100000,0.300000] 13.433333
#  4 [0.200000,0.400000]  9.554737
#  5 [0.250000,0.450000]  8.299608
#  6 [0.300000,0.500000]  7.340606
#    ...
#175 [9.900000,10.100000] 0.3049778
#176 [10.000000,10.200000] 0.3017733
#177 [10.050000,10.250000] 0.3002829
#178 [10.150000,10.300000] 0.2982748
#179 [10.250000,10.300000] 0.2958620
#180 [10.300000,10.300000] 0.2951456
utubun
  • 4,400
  • 1
  • 14
  • 17