I need to perform a series of calculations from several files and create a single output table with the results from all files.
I have tried by listing all files within folder and looping, either with for
or with lapply
functions but there is something I am missing.
Here is a simplified vs of what I have with some 'fake' files:
# Create new folder -- "trials"
setwd("C:/Users/.../Desktop")
dir.create("trials")
# Create 'trial' files
setwd("C:/Users/.../Desktop/trials")
pathFiles <- "C:/Users/.../Desktop/trials"
df_1 <- data.frame(x=c(1,2,3,4,5,6,7,8,9,10))
df_1$y <- c(1,2,3,4,5,6,7,8,9,10)
df_1$z <- c(10,20,30,40,50,60,70,80,90,100)
write.table(df_1, "table1.csv", col.names = TRUE, row.names = FALSE, sep = ",")
df_2 <- data.frame(x=c(2,3,4,5,6,7,8,9,10,11))
df_2$y <- c(2,3,4,5,6,7,8,9,10,11)
df_2$z <- c(20,30,40,50,60,70,80,90,100,110)
write.table(df_2, "table2.csv", col.names = TRUE, row.names = FALSE, sep = ",")
df_3 <- data.frame(x=c(3,4,5,6,7,8,9,10,11,12))
df_3$y <- c(3,4,5,6,7,8,9,10,11,12)
df_3$z <- c(30,40,50,60,70,80,90,100,110,120)
write.table(df_3, "table3.csv", col.names = TRUE, row.names = FALSE, sep = ",")
For each of these files, I want to extract certain information and create an output table with all calculated fields.
I've tried with for
loop:
Final <- NULL
M <- NULL
slp <- NULL
eval <- NULL
dfs <- dir(pathFiles, "*.csv", full.names = TRUE, ignore.case = TRUE, all.files = TRUE)
for (df in dfs) {
t <- read.csv(df, header = TRUE, sep = ",")
x <- t$x
y <- t$y
z <- t$z
lim_y <- y >= 3 & y <=6
lim_x <- x[lim_y]
lim_z <- z[lim_y]
iFinal <- x[nrow(t)]
Final <- c(Final, iFinal) # add value to the string
iM <- mean(lim_z)
M <- c(M, iM) # add value to the string
p <- lm(lim_x ~ lim_z)
iSlp <- summary(p)$coefficients[2,1]
slp <- c(slp, iSlp) # add value to the string
ifelse ((Slp <= 0.05 & Slp >= -0.05), ieval <- "ok", ieval <- "false")
eval <- c(eval, ieval) # add value to the string
}
sum_df <- data.frame(df, M, Slp, eval, Final)
write.table(sum_df, "sum_df.csv", sep = ",", row.names = FALSE, col.names = TRUE)
I have used this for
loop in a similar way before and it worked fine, but not here.
With an lapply
function I do not get better results:
dfs <- list.files(pathFiles, "^.+\\.csv", full.names = TRUE, ignore.case = TRUE, all.files = TRUE)
Final <- NULL
M <- NULL
slp <- NULL
eval <- NULL
model <- function(x){
t <- read.csv(x, header = TRUE, sep = ",")
x <- t$x
y <- t$y
z <- t$z
lim_y <- y >= 3 & y <=6
lim_x <- x[lim_y]
lim_z <- z[lim_y]
iFinal <- x[nrow(t)]
Final <- c(Final, iFinal)
iM <- mean(lim_z)
M <- c(M, iM)
p <- lm(lim_x ~ lim_z)
iSlp <- summary(p)$coefficients[2,1]
slp <- c(slp, iSlp)
ifelse ((Slp <= 0.05 & Slp >= -0.05), ieval <- "ok", ieval <- "false")
eval <- c(eval, ieval)
}
lapply(dfs, model)
The functions and output table works fine with just one file, so I guess the error must be in how I am looping through the files. But I don't know where I'm going wrong.
I would appreciate any help.