-1

I have files having the name with years. I listed all the files and taken the year value to a variable name. I want to create a column in the output file with all 365 days. But how can I give the variable name having the year value in the date sequence?

The name of the files is in the format 'E1901.txt', 'E1902.txt',.... Here is the script


setwd("location")
input_files = list.files(,pattern="[.]txt$")
total = data.frame()
for(i in 1:length(input_files)){
    rf = read.csv(input_files[i])
    year = regmatches(rf,regexec("E(.+).txt",rf))
    year=sapply(year,"[",2)
    print(year)
    filenm = sub("txt","csv",rf)
    date = seq(as.Date(paste(year,"/1/1")), as.Date(paste(year,"/12/31")), "day")
    rf$date = date
    rf= rf[,c(220,1:219)]
}
cat("\n Finished processing data of ",filenm)
total = do.call("rbind",rf)
write.csv(total, file="1901-2016.csv", row.names=FALSE, col.names=FALSE,sep =",")

Regards,

Meloman
  • 3,558
  • 3
  • 41
  • 51
  • 1
    Welcome to StackOverflow. Please read [how to ask](https://stackoverflow.com/help/how-to-ask) and [how to make a great reproducible R example](https://stackoverflow.com/q/5963269/3250126) so that we can help you sufficiently. – loki Jun 29 '18 at 08:11

1 Answers1

0

This is what I would do with my favourite tools:

library(data.table)
input_files <- list.files(pattern = "[.]txt$")
years <- stringr::str_extract(input_files, "\\d{4}")
total <- rbindlist(
  lapply(input_files, fread),
  idcol = "file_id"
)
total[, date := seq(as.Date(paste0(years[file_id],"-01-01")), 
                    as.Date(paste0(years[file_id],"-12-31")), "day"), 
      by = file_id][
        , file_id := NULL]
setcolorder(total, "date")
fwrite(total, "1901-2016.csv")

With my dummy data, the contents of total looks like

             date         V1  V2 V3
    1: 1901-01-01 1901-01-01   1  G
    2: 1901-01-02 1901-01-02   2  J
    3: 1901-01-03 1901-01-03   3  O
    4: 1901-01-04 1901-01-04   4  X
    5: 1901-01-05 1901-01-05   5  F
   ---                             
42365: 2016-12-27 2016-12-27 362  F
42366: 2016-12-28 2016-12-28 363  P
42367: 2016-12-29 2016-12-29 364  P
42368: 2016-12-30 2016-12-30 365  X
42369: 2016-12-31 2016-12-31 366  N

Reproducible data

# create dummy data files (in base R)
if (basename(getwd()) != "location") {
  dir.create("location")
  setwd("location")
}
set.seed(1L)
lapply(1901:2016, function(year){
  V1 <- seq(as.Date(paste0(year,"-01-01")), as.Date(paste0(year,"-12-31")), "day")
  V2 <- seq_along(V1)
  V3 <- sample(LETTERS, length(V1), TRUE)
  write.csv(
    data.frame(V1, V2, V3, stringsAsFactors = FALSE),
    sprintf("E%4i.txt", year),
    row.names = FALSE
  )
})
Community
  • 1
  • 1
Uwe
  • 41,420
  • 11
  • 90
  • 134