1

I have time-series data that starts and ends during the calendar year and most fill functions (like pad, package padr) fill gaps between start and end dates. However I need a complete annual record. For example if my data start date is 2016-01-03 the desired result would be to extend the time series to the beginning of the year, or end of the year if end date occurs prior. NA would be used to fill the gap.

A solution that works on data with multiple sites is appreciated, hence the example below.

library(dplyr)
library(padr)

#Example datset

site<-"site_1"
date<-seq(as.Date('2016-01-03'),as.Date('2016-12-09'), by='day')
x <- runif(length(date),min=20,max=40)
df1<-data.frame(site,date,x)
df11<-df1[-c(2,3,4,5,6),]

site<-"site_2"
date<-seq(as.Date('2012-06-01'),as.Date('2012-10-25'), by='day')
x <- runif(length(date),min=30,max=40)
df2<-data.frame(site,date,x)
df22<-df2[-c(2,3,4,5,6),]

df<-rbind(df11,df22)

Attempt below results in error "start value is larger than the end value for all groups" I think the issue is that its not grouping.

dfpad<-df%>%   
pad(group ='site',start_val=floor_date(df[1,2],unit="year"),
 end_val=(round_date(df[length(df$date),2], unit="year")-1))

Desired outcome

dfgoal<- data.frame(date=seq(as.Date('2016-01-01'),as.Date('2016-01-10'), by='day'),
                x=c("NA","NA",21,"NA","NA","NA","NA","NA",20,22))
head(dfgoal,10)

DAY
  • 91
  • 6

1 Answers1

1

This solution uses a for loop

Original Data

library(dplyr)
library(padr)
library(lubridate) 

#Example datset

site<-"site_1"
date<-seq(as.Date('2016-01-03'),as.Date('2016-12-09'), by='day')
x <- runif(length(date),min=20,max=40)
df1<-data.frame(site,date,x)
df11<-df1[-c(2,3,4,5,6),]

site<-"site_2"
date<-seq(as.Date('2012-06-01'),as.Date('2012-10-25'), by='day')
x <- runif(length(date),min=30,max=40)
df2<-data.frame(site,date,x)
df22<-df2[-c(2,3,4,5,6),]

df<-rbind(df11,df22)

Solution

sites_a<-as.vector(unique(df$site))

contiga_df<-data.frame()

for(i in 1:2){
  
  site1a<-subset(df, site==sites_a[i])
  
  siteresult<-site1a%>%
    pad(start_val=floor_date(site1a[1,2],unit="year"), 
        end_val=(round_date(site1a[length(site1a$date),2], unit="year")-1))
  siteresult$site<- replace_na(siteresult$site,sites_a[i])
  contiga_df<-rbind(contiga_df, siteresult)
}
DAY
  • 91
  • 6