0

I have a panel data set of country years. I would like to calculate time since event, as well as get a running total of events per country which I can decay over time. I am using the timeSinceEvent function in the doBy package, which returns a data frame which has the values that I want, but I am having trouble applying this to my main df.

structure(list(ccode.a = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 
41L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 
42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 42L, 
42L, 42L, 42L, 42L, 42L), year = c(1975, 1976, 1977, 1978, 1979, 
1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 
1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 
2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 1976, 1977, 1978, 
1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 
1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 
2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 1976, 1977, 
1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 
1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 
2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 1976, 
1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 
1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 
1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 1975, 
1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 
1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 
1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 
1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 
1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004), onset.a = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("ccode.a", "year", 
"onset.a"), row.names = c(NA, 200L), class = "data.frame")

I have tried using this:

last.step <- function(x) {
  temp <- timeSinceEvent(x$onset.a, x$year)
  cbind(x[,1],temp) #timeSinceEvent cuts off the country ID
}
result <- do.call("rbind", by(data, data$ccode.a, last.step))

As well as

test <- by(data, data$ccode.a, function(x) timeSinceEvent(data$onset.a, data$year))

To little avail. I stepped through the function, and it seems to be doing what I want, but I guess there is a problem in the way that I am calling it?

Zach
  • 996
  • 12
  • 25

3 Answers3

1

It seems to me the problem is simply that there are no events for ccode.a==20 and so timeSinceEvent returns NULL when applied to that subset. This means that last.step returns data frames of different dimension for the two ccode.as and thus the rbind fails.

Not exactly a solution, but perhaps better understanding where the problem lies already helps.

RoyalTS
  • 9,545
  • 12
  • 60
  • 101
  • So it is the case that some of the cross sectional units don't have any onsets, but of course many do. I don't understand why the returned df would have different dimensions though. They should match number of years in the df I pass to `last.step` just have missing values for some of the variables returned by `timeSinceEvent`. Unless I misunderstand.. – Zach Jun 18 '12 at 22:30
  • It's not the number of rows that's different, it's the number of columns. For the first country in your example `last.step` returns a data frame with 9 columns (because `timeSinceEvent` inside the function returns a data frame with 8 columns) whereas for the second country `last.step` returns a data frame with only 1 column (because `timeSinceEvent` inside the function returns `NULL`). And because the number of columns does not match `rbind` will not work. – RoyalTS Jun 18 '12 at 22:47
  • Ah okay, that makes more sense. I was just testing the function on `ID` codes that happened to have at least 1 onset. Thanks. – Zach Jun 19 '12 at 02:47
1

Since there are empty columns you should use rbind.fill() in plyr. It will fill with na the columns that are empty

last.step <- function(x) {
  temp <- timeSinceEvent(x$onset.a, x$year)
  cbind(x[,1],temp) #timeSinceEvent cuts off the country ID
}
result <- do.call(rbind.fill, by(data, data$ccode.a, last.step))

However this won't return the "empty" lists i.e. the one with only the x[,1]. It will only rbind those lists that have data.frame inside. I don't know if this is the expected behaviour and/or is what you want.

Luciano Selzer
  • 9,806
  • 3
  • 42
  • 40
  • This looks like what I am trying to do, but what is going on with the `if` statement? This is the desired behavior though. – Zach Jun 19 '12 at 02:42
  • I'm sorry I was going to add an if but then I thought it wasn't needed but forgot to delete it. Edited – Luciano Selzer Jun 19 '12 at 11:35
  • I got this error `Error in sss[ii, 2]:sss[ii, 3] : NA/NaN argument` – Zach Jun 19 '12 at 12:53
  • I also tried it with `if(temp != NULL)` in the line above the `cbind` and got the error `Error in matrix(unlist(value, recursive = FALSE, use.names = FALSE), nrow = nr, : length of 'dimnames' [2] not equal to array extent` – Zach Jun 19 '12 at 13:04
  • @Zach Using the same data you provided? – Luciano Selzer Jun 19 '12 at 17:14
  • Both the data I provided and the main DF. – Zach Jun 19 '12 at 18:45
0

Ended up having to modify timeSinceEvent in the doBy package a bit. Here is the final code that worked. Kudos to lselzer for pointing out rbind.fill in plyr and RoyalTS for pointing out that timeSinceEvent returns null when the yvar argument is all zeros.

panel.tse <- function(yvar, tvar = seq_along(yvar)){
   if (!(is.numeric(yvar) | is.logical(yvar))){
        stop("yvar must be either numeric or logical")
    }
   yvar[is.na(yvar)] <- 0
   event.idx <- which(yvar == 1)
   run <- cumsum(yvar)
   un <- unique(run)
   tlist <- list()
   for (i in 1:length(un)){
     v <- un[[i]]
     y <- yvar[run == v]
     t <- tvar[run == v]
     t <- t - t[1]
     tlist[[i]] <- t
   }
   timeAfterEvent <- unlist(tlist)
   timeAfterEvent[run == 0] <- NA
   run[run == 0] <- NA
   ans <- cbind(data.frame(yvar = yvar, tvar = tvar), run, tae = timeAfterEvent)
   return(ans)
 }

last.step <- function(x) {
  temp <- panel.tse(x$onset.a, x$year)
  cbind(x[,1],temp) 
}

result <- do.call(rbind.fill, by(data, data$ccode.a, last.step))
Zach
  • 996
  • 12
  • 25