2

Data:

 DB <- data.frame(orderID  = c(1,2,3,4,5,6,7,8,9,10),     
   orderDate = c("1.1.14","8.4.14","17.4.14","29.3.12","29.7.14", 
        "2.8.14","21.9.14","4.10.14","30.11.14","9.4.06"),  

Expected outcome [Hope I counted the number of days right]:

orderDuringPresentShoppingWeekseasternpast =c("No", "Yes", "Yes", "Yes", "No", "No", "No", "No", "No", "Yes") 

Hi guys,

I think I have now the most complex/difficult question I asked till now:but perhaps someone is smarter than me and can solve the prob in a min :)

I have different time spans with dates in it for the public holiday eastern. But not only for this year-also for the past 10. As everybody knows eastern is on a different date every year: so I can´t fix it on a specific date for every year.

1.I want to give out a "yes" if the order happend on the easterdays or in a span of 14days before easter sunday during the last years and a "no" if not. I already made some timespans for the past 10years:

spanEasternpast
 [1] 2015-03-22 UTC--2015-04-05 UTC 2014-04-06 UTC--2014-04-20 UTC 2013-03-17 UTC--2013-03-31 UTC 2012-03-25 UTC--2012-04-08 UTC
 [5] 2011-04-10 UTC--2011-04-24 UTC 2010-03-21 UTC--2010-04-04 UTC 2009-03-29 UTC--2009-04-12 UTC 2008-03-09 UTC--2008-03-23 UTC
 [9] 2007-03-25 UTC--2007-04-08 UTC 2006-04-02 UTC--2006-04-16 UTC 2005-03-13 UTC--2005-03-27 UTC

Tried it already like this, but it´s not working:

Easternpast <- Easter(currentYear:(currentYear -10))
spanEasternpast <- new_interval (ymd(Easternpast-ddays(14)), ymd(Easternpast)) 

spanEasternpast
 [1] 2015-03-22 UTC--2015-04-05 UTC 2014-04-06 UTC--2014-04-20 UTC 2013-03-17 UTC--2013-03-31 UTC 2012-03-25 UTC--2012-04-08 UTC
 [5] 2011-04-10 UTC--2011-04-24 UTC 2010-03-21 UTC--2010-04-04 UTC 2009-03-29 UTC--2009-04-12 UTC 2008-03-09 UTC--2008-03-23 UTC
 [9] 2007-03-25 UTC--2007-04-08 UTC 2006-04-02 UTC--2006-04-16 UTC 2005-03-13 UTC--2005-03-27 UTC (so this part with the right span is working)

DB$orderDuringPresentShoppingWeekseasternpast  <- ifelse(DB$orderDate%within%spanEasternpast == TRUE, "Yes", "No")

Hope your able to show me what´s wrong or show me another posibility to solve the prob....

Cheers and THX!

Henrik
  • 65,555
  • 14
  • 143
  • 159
AbsoluteBeginner
  • 485
  • 4
  • 13

2 Answers2

2

Here's another possibility using foverlaps in package data.table.

library(timeDate)
library(data.table)

# first, some dummy data
# create easter interval for 2000-2002
easter <- data.table(start = as.Date(Easter(2000:2002, -14)),
                     end = as.Date(Easter(2000:2002)))
#         start        end
# 1: 2000-04-09 2000-04-23
# 2: 2001-04-01 2001-04-15
# 3: 2002-03-17 2002-03-31

# set key for overlap join
setkey(easter)    

# create some order dates
# 3 dates before (>14 days) easter holiday
# 3 dates during holiday
set.seed(1)
order <- data.table(order_date = as.Date(Easter(2000:2002)) + sample(c(-17:-15, -2:0)))

# create an 'end date' for the order_date
order[, order_date2 := order_date]

# overlap join
# use nomatch = NA (default) to keep track of dates within and outside holiday period
# convert NA to "No" and non-NA to "Yes" using vector indexing
# remove columns (I deliberately kept start and end just to check the join)
order <- foverlaps(x = order, y = easter, by.x = names(order),
                   type = "within", mult = "all", nomatch = NA)[
                     , easter_order := c("Yes", "No")[as.integer(is.na(end)) + 1]][
                       , order_date2 := NULL]
order   
#         start        end order_date easter_order
# 1: 2000-04-09 2000-04-23 2000-04-23          Yes
# 2: 2001-04-01 2001-04-15 2001-04-13          Yes
# 3:       <NA>       <NA> 2002-03-16           No
# 4:       <NA>       <NA> 2000-04-06           No
# 5: 2001-04-01 2001-04-15 2001-04-14          Yes
# 6:       <NA>       <NA> 2002-03-15           No

Please refer to this nice answer by @Arun, where foverlaps is described more thoroughly.

Update following comment from OP
Match dates against both easter and christmas holidays

# create intervals for easter and christmas holidays 2000-2002
holiday <- data.table(start = c(as.Date(Easter(2000:2002, -14)),
                               as.Date(ChristmasDay(year = 2000:2002)) - 14),
                     end = c(as.Date(Easter(2000:2002)),
                             as.Date(ChristmasDay(year = 2000:2002))))
# holiday                     

# set key for overlap join
setkey(holiday)

# create some order dates
# 3 dates before (>14 days) and 3 during easter holiday
# 3 dates before (>14 days) and 3 during christmas holiday    
set.seed(1)
order <- data.table(order_date = c(as.Date(Easter(2000:2002)) + sample(c(-17:-15, -2:0)),
                    as.Date(ChristmasDay(2000:2002)) + sample(c(-17:-15, -2:0))))

# create a 'end' date for the order
order[, order_date2 := order_date]

# overlap join
# use nomatch = NA (default) to keep track of dates within and outside holiday period
# convert NA to "No" and non-NA to "Yes"
# remove columns (I deliberately kept start and end just to check the join)
order <- foverlaps(x = order, y = holiday, by.x = names(order),
                   type = "within", mult = "all", nomatch = NA)[
                     , holiday_order := c("Yes", "No")[as.integer(is.na(end)) + 1]][
                       , order_date2 := NULL]
order

#          start        end order_date holiday_order
# 1:        <NA>       <NA> 2000-04-07            No
# 2:  2001-04-01 2001-04-15 2001-04-15           Yes
# 3:        <NA>       <NA> 2002-03-16            No
# 4:  2000-04-09 2000-04-23 2000-04-21           Yes
# 5:        <NA>       <NA> 2001-03-29            No
# 6:  2002-03-17 2002-03-31 2002-03-30           Yes
# 7:  2000-12-11 2000-12-25 2000-12-25           Yes
# 8:  2001-12-11 2001-12-25 2001-12-23           Yes
# 9:        <NA>       <NA> 2002-12-10            No
# 10:       <NA>       <NA> 2000-12-08            No
# 11: 2001-12-11 2001-12-25 2001-12-24           Yes
# 12:       <NA>       <NA> 2002-12-09            No
Community
  • 1
  • 1
Henrik
  • 65,555
  • 14
  • 143
  • 159
  • Nice:but whats about christmas? I´m not able to get create a christmas intervall in the same way:can U help me? – AbsoluteBeginner Feb 24 '15 at 16:11
  • There is no `shift` argument in `Christmas`, so instead you may substract 14 after you have converted to `Date`. You can combine `Easter` and `Christmas` dates like this: `start = c(as.Date(Easter(2000:2002, -14)), as.Date(ChristmasDay(year = 2000:2002)) - 14)`; `end = c(as.Date(Easter(2000:2002)), as.Date(ChristmasDay(year = 2000:2002)))`. – Henrik Feb 24 '15 at 19:36
  • The creation of a second data table is working- but I have this error: – AbsoluteBeginner Feb 25 '15 at 16:34
  • Error in foverlaps(x = mydata$orderDate, y = presentBuyingDay, by.x = names(orderDate), : y and x must both be data.tables. Use `setDT()` to convert list/data.frames to data.tables by reference or as.data.table() to convert to data.tables by copying. > – AbsoluteBeginner Feb 25 '15 at 16:35
  • x=order here is order y=easter – AbsoluteBeginner Feb 25 '15 at 16:36
  • @AbsoluteBeginner Please see updated answer where some sample order dates are matched against both easter and christmas holidays. – Henrik Feb 25 '15 at 20:57
0
library(lubridate)
library(timeDate) # For function Easter() above

DB$orderDuringPresentShoppingWeekeasternpast <- apply(sapply(dmy(DB$orderDate), function(x) x %within% spanEasternpast), 2, any)

Why this works...consider two steps:

sapply(dmy(DB$orderDate), function(x) x %within% spanEasternpast)

#        [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10]
#  [1,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#  [2,] FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#  [3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#  [4,] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
#  [5,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#  [6,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#  [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#  [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#  [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [10,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
# [11,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

Then check by column if anything is TRUE via apply(x, margin=2, ...)

JasonAizkalns
  • 20,243
  • 8
  • 57
  • 116