1

Edit: added a snippet of my dataframe

I'm trying to split a dataframe into several smaller ones and then save each of it into a csv file with a unique identifier from the main dataframe

metrics %>%  #main dataframe
  spread(base, pct ) %>% 
  select(sample_name,Cycle = pos,R1.A,R1.C,R1.G,R1.T,R1.N,R2.A) %>% 
  group_split(sample_name) %>%  #splitting it into several smaller dataframes based on sample names
  map(select, -sample_name) %>%  #excluding first column 
  iwalk( ~ write_csv(.x, str_c('/home/Projects/', '%s.csv')),a_id) # a_id is a column name in metrics dataframe 

My files get saved as %s.csv in /home/Projects/ rather than using the unique values from a_id for each csv file, for example 2910968.csv, 2908963.csv Any suggestions would be useful.

Small snippet of my main dataframe dput(metrics)

structure(list(b_id = c(163173, 163173, 163173, 163173, 163173, 
163173, 163173, 163173, 163173, 163173, 164172, 164172, 164172, 
164172, 164172, 164172, 164172, 164172, 164172, 164172), sample_name = c("Sample_1", 
"Sample_1", "Sample_1", "Sample_1", "Sample_1", "Sample_1", "Sample_1", 
"Sample_1", "Sample_1", "Sample_1", "Sample_2", "Sample_2", "Sample_2", 
"Sample_2", "Sample_2", "Sample_2", "Sample_2", "Sample_2", "Sample_2", 
"Sample_2"), a_id = c(2910968, 2910968, 2910968, 2910968, 2910968, 
2910968, 2910968, 2910968, 2910968, 2910968, 2908963, 2908963, 
2908963, 2908963, 2908963, 2908963, 2908963, 2908963, 2908963, 
2908963), type = c("basecomp", "basecomp", "basecomp", "basecomp", 
"basecomp", "basecomp", "basecomp", "basecomp", "basecomp", "basecomp", 
"basecomp", "basecomp", "basecomp", "basecomp", "basecomp", "basecomp", 
"basecomp", "basecomp", "basecomp", "basecomp"), pos = c(1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), base = c("R1.A", 
"R1.C", "R1.G", "R1.N", "R1.T", "R2.A", "R2.C", "R2.G", "R2.N", 
"R2.T", "R1.A", "R1.C", "R1.G", "R1.N", "R1.T", "R2.A", "R2.C", 
"R2.G", "R2.N", "R2.T"), pct = c(30.35095242, 20.80328011, 16.59966437, 
6.83e-05, 32.24603478, 31.28154795, 20.39882211, 17.25755071, 
0.005099781, 31.05697946, 30.16529478, 20.67986859, 16.16195464, 
7.86e-05, 32.99280343, 30.47328103, 20.18747421, 17.29746286, 
0.005100642, 32.03668127)), spec = structure(list(cols = list(
    b_id = structure(list(), class = c("collector_double", "collector"
    )), sample_name = structure(list(), class = c("collector_character", 
    "collector")), a_id = structure(list(), class = c("collector_double", 
    "collector")), type = structure(list(), class = c("collector_character", 
    "collector")), pos = structure(list(), class = c("collector_double", 
    "collector")), base = structure(list(), class = c("collector_character", 
    "collector")), pct = structure(list(), class = c("collector_double", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x7f935a8c5f90>, row.names = c(NA, 
-20L), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"
))

Thank you

nbn
  • 195
  • 1
  • 7

1 Answers1

2

You could try something like this:

library(tidyverse)

metrics %>%  #main dataframe
  pivot_wider(names_from = base, values_from = pct) %>%
  select(sample_name, a_id, Cycle = pos, R1.A, R1.C, R1.G, R1.T, R1.N, R2.A) %>% 
  group_split(sample_name) %>%  #splitting it into several smaller dataframes based on sample names
  walk(
    ~.x %>% 
      { 
        a_id <- .x %>% select(a_id) %>% distinct() %>% pull()
        .x %>% 
          select(-sample_name, -a_id) %>% 
          write_csv(str_c("/home/Projects/", a_id, ".csv"))
      } 
  )

This should create several .csv files in folder /home/Projects/ each named with the a_id.

Martin Gal
  • 16,640
  • 5
  • 21
  • 39