This was my original code, very messy and untidy: Link to my previous question
library(dplyr); library(plyr)
library(magrittr); library(stringr)
library(ExclusionTable)
library(lubridate)
library(tidyverse); library(tidyr)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
i=1
num_fu = c(1,2,3,4,5,6,7,8,9)
as <- data.frame()
df <- data.frame()
dfs <- data.frame()
data_dir <- 'C:/Users/thepr/Documents/data/as'
assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
function(x){base::as.data.frame(read.csv(x))}))
nm <- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
df <- Reduce(full_join, get(paste0("as", i, "_list")))
assign(paste0("as",i), df[!duplicated(base::as.list(df))])
dfs <- df
for (i in 2:length(num_fu)){
RID_common <- as1$RID %in% get(paste0("as", i))$RID
assign(paste0("flnames", i), list.files(path = paste0(data_dir, i), pattern = "\\.csv", full.names = TRUE))
assign(paste0("as", i, "_list"), lapply(get(paste0("flnames", i)),
function(x){base::as.data.frame(read.csv(x))}))
nm <- gsub(".csv", "", basename(eval(parse(text = paste0("flnames", i))))) %>% str_sub(., 1,6)
assign(paste0("as", i, "_list"), setNames(get(paste0("as", i, "_list")), nm))
df <- Reduce(full_join, get(paste0("as", i, "_list")))
assign(paste0("as",i), df[!duplicated(base::as.list(df))])
dfs <- merge(dfs, df, by = "RID", all.x = TRUE)
dfs <- dfs[!duplicated(base::as.list(dfs))]
if(paste0("AS", i, "_AREA") %in% colnames(get(paste0("as", i)))){
assign(paste0("fu_",i-1), get(paste0("as", i))[RID_common, c("RID", paste0("AS", i, "_AREA"))])
assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!RID_common, c("RID", paste0("AS", i, "_AREA"))])
# FU rate
assign(paste0("fu_rate_", i-1), nrow(get(paste0("as", i)))/nrow(as1))
}
else if(paste0("AS", i, "_DATA_CLASS") %in% colnames(get(paste0("as", i)))){
assign(paste0("fu_",i-1), get(paste0("as", i))[RID_common, c("RID", paste0("AS", i, "_DATA_CLASS"))])
assign(paste0("fu_loss_",i-1), get(paste0("as", i))[!RID_common, c("RID", paste0("AS", i, "_DATA_CLASS"))])
# FU rate
assign(paste0("fu_rate_", i-1), nrow(get(paste0("as", i)))/nrow(as1))
}
else{}
}
After helpful comments by @Gregor Thomas @joran, I read previous posts and finally started using lists and vectors. Here is what I have tried so far:
library(tidyverse) #Includes: dplyr, stringr, tidyr
library(magrittr)
library(lubridate)
library(ExclusionTable)
library(janitor)
library(survival)
library(ggsurvfit); library(gtsummary)
library(zoo)
library(tidycmprsk)
# AA cohort (2 of 3)
## as
i=1
data_dir = c("C:/Users/thepr/Documents/data/as")
num_fu = c(1,2,3,4,5,6,7,8,9)
dirs <- paste0(data_dir, num_fu) # character
as <- data.frame()
df <- data.frame()
dfs <- data.frame()
flnames <- list.files(path = dirs, pattern = "\\.csv", full.names = TRUE)
as_list[[num_fu]] <- lapply(flnames[[num_fu]],
function(x){base::as.data.frame(read.csv(x))})
names(as_list) <- gsub(".csv", "", basename(flnames[[num_fu]])) %>% str_sub(., 1,6)
df <- Reduce(full_join, as_list)
df <- df[!duplicated(base::as.list(df))]
somehow I keep getting error messages:
Error in flnames[[num_fu]] : attempt to select more than one element in vectorIndex
Based on How do I make a list of data frames?, I think I am headed in the right direction. Please give some insights and thougths. Will be appreciated, thanks.