Here are some tidyverse
and custom function options that might work depending on your needs:
library(tidyverse)
# custom function to generate, filter, and mutate the data:
combine_dfs <- function(i){
data_frame(x = rnorm(5), y = runif(5)) %>%
filter(x < y) %>%
mutate(x_plus_y = x + y) %>%
mutate(i = i)
}
df <- 1:5 %>% map_df(~combine_dfs(.))
df <- map_df(1:5, ~combine_dfs(.)) # both give the same results
> df %>% head()
# A tibble: 6 x 4
x y x_plus_y i
<dbl> <dbl> <dbl> <int>
1 -0.973 0.673 -0.300 1
2 -0.553 0.0463 -0.507 1
3 0.250 0.716 0.967 2
4 -0.745 0.0640 -0.681 2
5 -0.736 0.228 -0.508 2
6 -0.365 0.496 0.131 3
You could do something similar if you had a directory of files that needed to be combined:
dir_path <- '/path/to/data/test_directory/'
list.files(dir_path)
combine_files <- function(path, file){
read_csv(paste0(path, file)) %>%
filter(a < b) %>%
mutate(a_plus_b = a + b) %>%
mutate(file_name = file)
}
df <- list.files(dir_path, '\\.csv$') %>%
map_df(~combine_files(dir_path, .))
# or if you have Excel files, using the readxl package:
combine_xl_files <- function(path, file){
readxl::read_xlsx(paste0(path, file)) %>%
filter(a < b) %>%
mutate(a_plus_b = a + b) %>%
mutate(file_name = file)
}
df <- list.files(dir_path, '\\.xlsx$') %>%
map_df(~combine_xl_files(dir_path, .))