7

I wish to obtain the mean date by row, where each row contains two dates. Eventually I found a way, posted below. However, the approach I used seems rather cumbersome. Is there a better way?

my.data = read.table(text = "
     OBS  MONTH1  DAY1  YEAR1  MONTH2  DAY2  YEAR2   STATE
       1       3     6   2012       3    10   2012       1
       2       3    10   2012       3    20   2012       1
       3       3    16   2012       3    30   2012       1
       4       3    20   2012       4     8   2012       1
       5       3    20   2012       4     9   2012       1
       6       3    20   2012       4    10   2012       1
       7       3    20   2012       4    11   2012       1
       8       4     4   2012       4     5   2012       1
       9       4     6   2012       4     6   2012       1
      10       4     6   2012       4     7   2012       1
", header = TRUE, stringsAsFactors = FALSE)
my.data

my.data$MY.DATE1 <- do.call(paste, list(my.data$MONTH1, my.data$DAY1, my.data$YEAR1))
my.data$MY.DATE2 <- do.call(paste, list(my.data$MONTH2, my.data$DAY2, my.data$YEAR2))

my.data$MY.DATE1 <- as.Date(my.data$MY.DATE1, format=c("%m %d %Y"))
my.data$MY.DATE2 <- as.Date(my.data$MY.DATE2, format=c("%m %d %Y"))
my.data

desired.result = read.table(text = "
   OBS MONTH1 DAY1 YEAR1 MONTH2 DAY2 YEAR2 STATE   MY.DATE1   MY.DATE2    mean.date
    1      3     6  2012      3   10  2012     1 2012-03-06 2012-03-10   2012-03-08
    2      3    10  2012      3   20  2012     1 2012-03-10 2012-03-20   2012-03-15
    3      3    16  2012      3   30  2012     1 2012-03-16 2012-03-30   2012-03-23
    4      3    20  2012      4    8  2012     1 2012-03-20 2012-04-08   2012-03-29
    5      3    20  2012      4    9  2012     1 2012-03-20 2012-04-09   2012-03-30
    6      3    20  2012      4   10  2012     1 2012-03-20 2012-04-10   2012-03-30
    7      3    20  2012      4   11  2012     1 2012-03-20 2012-04-11   2012-03-31
    8      4     4  2012      4    5  2012     1 2012-04-04 2012-04-05   2012-04-04
    9      4     6  2012      4    6  2012     1 2012-04-06 2012-04-06   2012-04-06
   10      4     6  2012      4    7  2012     1 2012-04-06 2012-04-07   2012-04-06
", header = TRUE, stringsAsFactors = FALSE)

Here is the approach that worked for me:

my.data$mean.date <- (my.data$MY.DATE1 + ((my.data$MY.DATE2 - my.data$MY.DATE1) / 2))
my.data

These approaches did not work:

my.data$mean.date <- mean(my.data$MY.DATE1, my.data$MY.DATE2)
my.data$mean.date <- mean(my.data$MY.DATE1, my.data$MY.DATE2, trim = 0)
my.data$mean.date <- mean(my.data$MY.DATE1, my.data$MY.DATE2, trim = 1)
my.data$mean.date <- mean(my.data$MY.DATE1, my.data$MY.DATE2, trim = 0.5)
my.data$mean.data <- apply(my.data, 1, function(x) {(x[9] + x[10]) / 2})

I think I am supposed to use the Ops.Date command, but have not found an example.

Thank you for any suggestions.

Mark Miller
  • 12,483
  • 23
  • 78
  • 132

7 Answers7

7

Keep things simple and use mean.Date in base R.

mean.Date(as.Date(c("01-01-2014", "01-07-2014"), format=c("%m-%d-%Y"))) 
[1] "2014-01-04"
JasonAizkalns
  • 20,243
  • 8
  • 57
  • 116
2

Maybe something like that?

library(data.table)
setDT(my.data)[, `:=`(MY.DATE1 = as.Date(paste(DAY1 ,MONTH1, YEAR1), format = "%d %m %Y"),
                      MY.DATE2 = as.Date(paste(DAY2 ,MONTH2, YEAR2), format = "%d %m %Y"))][, 
                      mean.date := MY.DATE2 - ceiling((MY.DATE2 - MY.DATE1)/2)]

my.data
#     OBS MONTH1 DAY1 YEAR1 MONTH2 DAY2 YEAR2 STATE   MY.DATE1   MY.DATE2  mean.date
#  1:   1      3    6  2012      3   10  2012     1 2012-03-06 2012-03-10 2012-03-08
#  2:   2      3   10  2012      3   20  2012     1 2012-03-10 2012-03-20 2012-03-15
#  3:   3      3   16  2012      3   30  2012     1 2012-03-16 2012-03-30 2012-03-23
#  4:   4      3   20  2012      4    8  2012     1 2012-03-20 2012-04-08 2012-03-29
#  5:   5      3   20  2012      4    9  2012     1 2012-03-20 2012-04-09 2012-03-30
#  6:   6      3   20  2012      4   10  2012     1 2012-03-20 2012-04-10 2012-03-30
#  7:   7      3   20  2012      4   11  2012     1 2012-03-20 2012-04-11 2012-03-31
#  8:   8      4    4  2012      4    5  2012     1 2012-04-04 2012-04-05 2012-04-04
#  9:   9      4    6  2012      4    6  2012     1 2012-04-06 2012-04-06 2012-04-06
# 10:  10      4    6  2012      4    7  2012     1 2012-04-06 2012-04-07 2012-04-06

Or if you insist on using mean.date, here's alternative solution:

library(data.table)
setDT(my.data)[, `:=`(MY.DATE1 = as.Date(paste(DAY1 ,MONTH1, YEAR1), format = "%d %m %Y"),
                      MY.DATE2 = as.Date(paste(DAY2 ,MONTH2, YEAR2), format = "%d %m %Y"))][, 
                      mean.date := mean.Date(c(MY.DATE1, MY.DATE2)), by = OBS]
David Arenburg
  • 91,361
  • 17
  • 137
  • 196
  • @Spacedman, yeah you are right, I must have created previously. I've changed it to a `data.table` solution and got over with it though. Not sure if it worth the effort with the pipomania lately – David Arenburg Oct 27 '14 at 14:44
2

Using the good advice of @ jaysunice3401, I came up with this. If you want to keep the original data, you can add remove = FALSE in the two lines with unite

library(dplyr)
library(tidyr)

my.data %>%
    unite(whatever1, matches("1"), sep = "-") %>%
    unite(whatever2, matches("2"), sep = "-") %>%
    mutate_each(funs(as.Date(., "%m-%d-%Y")), contains("whatever")) %>%
    rowwise %>%
    mutate(mean.date = mean.Date(c(whatever1, whatever2)))

#   OBS  whatever1  whatever2 STATE  mean.date
#1    1 2012-03-06 2012-03-10     1 2012-03-08
#2    2 2012-03-10 2012-03-20     1 2012-03-15
#3    3 2012-03-16 2012-03-30     1 2012-03-23
#4    4 2012-03-20 2012-04-08     1 2012-03-29
#5    5 2012-03-20 2012-04-09     1 2012-03-30
#6    6 2012-03-20 2012-04-10     1 2012-03-30
#7    7 2012-03-20 2012-04-11     1 2012-03-31
#8    8 2012-04-04 2012-04-05     1 2012-04-04
#9    9 2012-04-06 2012-04-06     1 2012-04-06
#10  10 2012-04-06 2012-04-07     1 2012-04-06
jazzurro
  • 23,179
  • 35
  • 66
  • 76
1

One-liner (split for readability), uses lubridate and dplyr and (of course) pipes:

> require(lubridate)
> require(dplyr)
> my.data =  my.data %>% 
    mutate(
      MY.DATE1=as.Date(mdy(paste(MONTH1,DAY1,YEAR1))),
      MY.DATE2=as.Date(mdy(paste(MONTH2,DAY2,YEAR2)))) %>% 
    rowwise %>%
    mutate(mean.data=mean.Date(c(MY.DATE1,MY.DATE2))) %>% data.frame()
> head(my.data)
  OBS MONTH1 DAY1 YEAR1 MONTH2 DAY2 YEAR2 STATE   MY.DATE1   MY.DATE2
1   1      3    6  2012      3   10  2012     1 2012-03-06 2012-03-10
2   2      3   10  2012      3   20  2012     1 2012-03-10 2012-03-20
3   3      3   16  2012      3   30  2012     1 2012-03-16 2012-03-30
4   4      3   20  2012      4    8  2012     1 2012-03-20 2012-04-08
5   5      3   20  2012      4    9  2012     1 2012-03-20 2012-04-09
6   6      3   20  2012      4   10  2012     1 2012-03-20 2012-04-10
   mean.data
1 2012-03-08
2 2012-03-15
3 2012-03-23
4 2012-03-29
5 2012-03-30
6 2012-03-30

As an afterthought, if you like pipes, you can put a pipe in your pipe so you can pipe while you pipe - rewriting the first mutate step thus:

my.data %>% mutate(
  MY.DATE1 = paste(MONTH1,DAY1,YEAR1) %>% mdy %>% as.Date,
  MY.DATE2 = paste(MONTH2,DAY2,YEAR2) %>% mdy %>% as.Date)
Spacedman
  • 92,590
  • 12
  • 140
  • 224
1

1) Create Date class columns and then its easy. No external packages are used:

asDate <- function(x) as.Date(x, "1970-01-01")

my.data2 <- transform(my.data, 
   date1 = as.Date(ISOdate(YEAR1, MONTH1, DAY1)),
   date2 = as.Date(ISOdate(YEAR2, MONTH2, DAY2))
)
transform(my.data2, mean.date = asDate(rowMeans(cbind(date1, date2))))

If we did add a library(zoo) call then we could omit the asDate definition using as.Date in the last line instead of asDate since zoo adds a default origin to as.Date.

1a) A dplyr version would look like this (using asDate from above):

library(dplyr)

my.data %>%
  mutate(
     date1 = ISOdate(YEAR1, MONTH1, DAY1) %>% as.Date,
     date2 = ISOdate(YEAR2, MONTH2, DAY2) %>% as.Date,
     mean.date = cbind(date1, date2) %>% rowMeans %>% asDate)

2) Another way uses julian in the chron package. julian converts a month/day/year to the number of days since the Epoch. We can average the two julians and convert back to Date class:

library(zoo)
library(chron)

transform(my.data, 
  mean.date = as.Date( ( julian(MONTH1,DAY1,YEAR1) + julian(MONTH2,DAY2,YEAR2) )/2 ) 
)

We could omit library(zoo) if we used asDate from (1) in place of as.Date.

Update Discussed use of zoo to shorten the solutions and made further reductions in solution (1).

G. Grothendieck
  • 254,981
  • 17
  • 203
  • 341
  • 1
    Neat solution, but I have to downvote you for not using pipes. j/k – Spacedman Oct 27 '14 at 15:03
  • Sorry, I have added `library(zoo)` which is needed for them to work. Without that they can still work if you add `origin = "1970-10-01"` to the last `as.Date` call in each answer. – G. Grothendieck Oct 27 '14 at 15:18
0

what about :

apply(my.data[,c("MY.DATE1","MY.DATE2")],1,function(date){substr(strptime(mean(c(strptime(date[1],"%y%y-%m-%d"),strptime(date[2],"%y%y-%m-%d"))),format="%y%y-%m-%d"),1,10)})

? (I just had to use substr because of CET and CEST that put my output as a list...)

Cath
  • 23,906
  • 5
  • 52
  • 86
0

This is a vectorized version of the answer posted by jaysunice3401. It seems fairly straight-forward, except that I had to use trial-and-error to identify the correct origin. I do not know how general origin = "1970-01-01" is or whether a different origin would have to be specified with each data set.

According to this website: http://www.ats.ucla.edu/stat/r/faq/dates.htm

When R looks at dates as integers, its origin is January 1, 1970.

Which seems to suggest that origin = "1970-01-01" is fairly general. Although, if I had dates prior to "1970-01-01" in my data set I would definitely test the code before using it.

my.data = read.table(text = "
     OBS  MONTH1  DAY1  YEAR1  MONTH2  DAY2  YEAR2   STATE
       1       3     6   2012       3    10   2012       1
       2       3    10   2012       3    20   2012       1
       3       3    16   2012       3    30   2012       1
       4       3    20   2012       4     8   2012       1
       5       3    20   2012       4     9   2012       1
       6       3    20   2012       4    10   2012       1
       7       3    20   2012       4    11   2012       1
       8       4     4   2012       4     5   2012       1
       9       4     6   2012       4     6   2012       1
      10       4     6   2012       4     7   2012       1
", header = TRUE, stringsAsFactors = FALSE)

desired.result = read.table(text = "
   OBS MONTH1 DAY1 YEAR1 MONTH2 DAY2 YEAR2 STATE   MY.DATE1   MY.DATE2    mean.date
    1      3     6  2012      3   10  2012     1 2012-03-06 2012-03-10   2012-03-08
    2      3    10  2012      3   20  2012     1 2012-03-10 2012-03-20   2012-03-15
    3      3    16  2012      3   30  2012     1 2012-03-16 2012-03-30   2012-03-23
    4      3    20  2012      4    8  2012     1 2012-03-20 2012-04-08   2012-03-29
    5      3    20  2012      4    9  2012     1 2012-03-20 2012-04-09   2012-03-30
    6      3    20  2012      4   10  2012     1 2012-03-20 2012-04-10   2012-03-30
    7      3    20  2012      4   11  2012     1 2012-03-20 2012-04-11   2012-03-31
    8      4     4  2012      4    5  2012     1 2012-04-04 2012-04-05   2012-04-04
    9      4     6  2012      4    6  2012     1 2012-04-06 2012-04-06   2012-04-06
   10      4     6  2012      4    7  2012     1 2012-04-06 2012-04-07   2012-04-06
", header = TRUE, stringsAsFactors = FALSE)

my.data$MY.DATE1 <- do.call(paste, list(my.data$MONTH1,my.data$DAY1,my.data$YEAR1))
my.data$MY.DATE2 <- do.call(paste, list(my.data$MONTH2,my.data$DAY2,my.data$YEAR2))

my.data$MY.DATE1 <- as.Date(my.data$MY.DATE1, format=c("%m %d %Y"))
my.data$MY.DATE2 <- as.Date(my.data$MY.DATE2, format=c("%m %d %Y"))

my.data$mean.date2 <- as.Date( apply(my.data, 1, function(x) {

                      mean.Date(c(as.Date(x['MY.DATE1']), as.Date(x['MY.DATE2'])))

                      }) , origin = "1970-01-01")
my.data

desired.result
Mark Miller
  • 12,483
  • 23
  • 78
  • 132