You can try using map()
and unnest()
to do this. This will create a single data frame combining all of the data read in from the CSV files. It will create an additional grouping variable using the file name. Assumes the column names are the same across all files.
# list files
d <- list.files(full.names = F)
# for each file, read in, expand rows (unnest)
# read all cols in as character
# and add label from filename
dd <- data.frame(filename=d) %>%
mutate(cont=map(filename, ~read_csv(file.path('',.), # !!! specify your file path here !!!
col_types = cols(.default = "c")))) %>%
unnest()
UPDATE
I think the map
and unnest
functions will still work here, based on your example data and expected output. If you gather
the combined data, you can then filter
and find which variables are in each CSV file. It's not pretty but it might suit your purposes.
# create toy CSV files in wd
df1 <- data.frame(matrix(runif(100),10)) # 10 vars named X1 to X10
df2 <- data.frame(matrix(runif(120),12)) # as above but more obs
df3 <- data.frame(matrix(runif(120),8)) # 15 vars named X1 to X15
df4 <- data.frame(matrix(runif(100),10)) # 10 vars names a to j
names(df4) <- letters[1:10]
write_csv(df4,'df4.csv')
d <- list.files(pattern='\\.csv',full.names = T)
dd <- data.frame(filename=d) %>%
mutate(cont=map(filename,~read_csv(file.path('.',.),
col_types = cols(.default = "c")))) %>%
unnest() %>%
gather(k,v,-filename) %>% filter(!is.na(v)) %>%
distinct(filename,k)
dd %>% reshape2::dcast(k~filename)
k ./df1.csv ./df2.csv ./df3.csv ./df4.csv
1 a <NA> <NA> <NA> a
2 b <NA> <NA> <NA> b
3 c <NA> <NA> <NA> c
4 d <NA> <NA> <NA> d
5 e <NA> <NA> <NA> e
6 f <NA> <NA> <NA> f
7 g <NA> <NA> <NA> g
8 h <NA> <NA> <NA> h
9 i <NA> <NA> <NA> i
10 j <NA> <NA> <NA> j
11 X1 X1 X1 X1 <NA>
12 X10 X10 X10 X10 <NA>
13 X11 <NA> <NA> X11 <NA>
14 X12 <NA> <NA> X12 <NA>
15 X13 <NA> <NA> X13 <NA>
16 X14 <NA> <NA> X14 <NA>
17 X15 <NA> <NA> X15 <NA>
18 X2 X2 X2 X2 <NA>
19 X3 X3 X3 X3 <NA>
20 X4 X4 X4 X4 <NA>
21 X5 X5 X5 X5 <NA>
22 X6 X6 X6 X6 <NA>
23 X7 X7 X7 X7 <NA>
24 X8 X8 X8 X8 <NA>
25 X9 X9 X9 X9 <NA>