I have a dataset as follows:
19/9/1997 22/9/1997 23/9/1997 24/9/1997 25/9/1997 26/9/1997 29/9/1997 30/9/1997
35440 35471 35499 35591 35621 35652 35683 35713
13/10/1997 14/10/1997 15/10/1997 16/10/1997 17/10/1997 20/10/1997 21/10/1997 22/10/1997 23/10/1997 24/10/1997 27/10/1997 28/10/1997 29/10/1997 30/10/1997 31/10/1997
35500 35531 35561 35592 35622 35714 35745 35775
13/11/1997 14/11/1997 17/11/1997 18/11/1997 19/11/1997 20/11/1997 21/11/1997 24/11/1997 ...
The Data that should be here are (for reproduction as requested) 19/9/1997 22/9/1997 23/9/1997 24/9/1997 25/9/1997 26/9/1997 29/9/1997 30/9/1997 10/01/1997 10/02/1997 10/03/1997 10/06/1997 10/07/1997 10/08/1997 10/09/1997 10/10/1997 13/10/1997 14/10/1997 15/10/1997 16/10/1997 17/10/1997 20/10/1997 21/10/1997 22/10/1997 23/10/1997 24/10/1997 27/10/1997 28/10/1997 29/10/1997 30/10/1997 31/10/1997 11/03/1997 11/04/1997 11/05/1997 11/06/1997 11/07/1997 11/10/1997 11/11/1997 11/12/1997 13/11/1997 14/11/1997 17/11/1997 18/11/1997 19/11/1997 20/11/1997 21/11/1997 24/11/1997
I have 5,149 rows of dates where there are numbers in places of dates. I tried fixing the missing dates with this: ATTEMPT 1 BEFORE REVISION:
rm (list = ls(all=TRUE))
graphics.off()
library(readxl)
Dates <- read_excel("F:/OneDrive - University of Tasmania/Mardi Meetings/Dataset/Dates.xlsx")
x<-Dates[,1]
library(date)
library(datetime)
ans <- Reduce(function(prev, curr) {
f1 <- as.Date(curr, "%d/%m/%Y")
f2 <- as.Date(curr, "%m/%d/%Y")
if (is.na(f1)) return(f2)
if (is.na(f2)) return(f1)
if (prev < f1 && prev < f2) return(min(f1, f2))
if (prev < f1) return(f1)
if (prev < f2) return(f2)
}, x[-1], init=as.Date(x[1], "%d/%m/%Y"), accumulate=TRUE)
as.Date(ans, origin="1970-01-01")
But I am getting the following error:
+ }, x[-1], init=as.Date(x[1], "%d/%m/%Y"), accumulate=TRUE)
Error in Reduce(function(prev, curr) { : object 'x' not found
>
> as.Date(ans, origin="1970-01-01")
Error in as.Date(ans, origin = "1970-01-01") : object 'ans' not found
Any suggestions will be highly appreciated.
OK AS PER ADVICE I REVISED THE CODE ATTEMPT 2 AFTER REVISION
> rm (list = ls(all=TRUE))
> graphics.off()
> library(readxl)
> Dates <- read_excel("F:/OneDrive - University of Tasmania/Mardi Meetings/Dataset/Dates.xlsx")
> dput(head(Dates))
structure(list(Date = c("33274", "33302", "33394", "33424", "33455",
"33486")), row.names = c(NA, -6L), class = c("tbl_df", "tbl",
"data.frame"))
> x<-Dates[[1]]
> library(date)
> library(datetime)
Attaching package: ‘datetime’
The following object is masked from ‘package:date’:
as.date
> dates <- as.Date(x, format="%d/%m/%Y")
> dput(head(dates))
structure(c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), class = "Date")
> head(dates,10)
[1] NA NA NA NA NA NA NA
[8] "1991-05-13" "1991-05-14" "1991-05-15"
As you can see I have lost the corrupted dates completely
Today on 28th I tried again
> rm (list = ls(all=TRUE))
> graphics.off()
> library(readxl)
> Dates <- read_excel("F:/OneDrive - University of Tasmania/Mardi Meetings/Dataset/Dates.xlsx")
> x<-Dates[[1]]
>
> library(date)
> library(datetime)
Attaching package: ‘datetime’
The following object is masked from ‘package:date’:
as.date
> formats <- c("%m/%d/%Y", "%d/%m/%Y", "%Y/%m/%d")
> dates <- as.Date(rep(NA, length(x)))
> for (fmt in formats) {
+ nas <- is.na(dates)
+ dates[nas] <- as.Date(as.integer(x[nas], format=fmt))
+ }
Error in as.Date.numeric(as.integer(x[nas], format = fmt)) :
'origin' must be supplied
In addition: Warning message:
In as.Date(as.integer(x[nas], format = fmt)) : NAs introduced by coercion
> dates <- as.Date(x, format="%d/%m/%Y")
> head(dates)
[1] NA NA NA NA NA NA
> head(dates, 10)
[1] NA NA NA NA NA NA NA
[8] "1991-05-13" "1991-05-14" "1991-05-15"