I'm trying to wrangle USR files (around 7,000) into a Long data format.
I've created the below, but it takes over 2 hours to run (hence the reason for adding the progress printer).
Does anyone have any idea how I can speed up this code? Are there specific lines that are slowing it down?
Thanks in advance!
for(i in D_flows){
flow <- read.table(i, header = F, fill = T, sep = "|")
for(j in flow){
Flow_name <- i
Timestamp <- ymd_hms(flow[flow$V1 == "ZHV",8])
Date <- ymd(flow[flow$V1 == "ZPD",2])
SR <- as.vector(flow[flow$V1 == "ZPD",3])
SP <- as.integer(as.vector(flow[flow$V1 == "SE1",2]))
EV <- as.numeric(as.character(flow[flow$V1 == "SE1" , 4]))
Flow_data <- tibble(Flow_name, Timestamp, Date, SR, SP, EV)
Flow_data <- Flow_data[complete.cases(Flow_data),]
Flow_data <- Flow_data %>%
group_by(SP) %>%
mutate(MEV = sum(EV)) %>%
select(Flow_name, Timestamp, Date, SR, SP, MEV) %>%
unique() %>%
ungroup()
}
#Append the flow data to the D Flow data file
D_flow_data <- bind_rows(D_flow_data, Flow_data)
#Shows the progress of the for loop
progress <- D_flow_data %>%
select(-Timestamp, -Date, -SR, -SP, -MEV) %>%
unique()
print(nrow(progress))
}