I am attempting to learn/use dplyr in R and I am having a few issues with how too create a custom table concatenation.
In short, I have a table that looks like so:
structure(list(bamfile = structure(c(2L, 1L, 4L, 3L, 6L), .Label = c("CZ_25bL001s.bam",
"CZ_25L001s.bam", "CZ_26bL001s.bam", "CZ_26L001s.bam", "CZ_27bL001s.bam",
"CZ_27L001s.bam", "CZ_28bL001s.bam", "CZ_28L001s.bam", "CZ_29bL001s.bam",
"CZ_29L001s.bam", "CZ_30bL001s.bam", "CZ_30L001s.bam", "CZ_31bL001s.bam",
"CZ_31L001s.bam", "CZ_32bL001s.bam", "CZ_32L001s.bam", "CZ_33bL001s.bam",
"CZ_33L001s.bam", "CZ_34bL001s.bam", "CZ_34L001s.bam", "CZ_35bL001s.bam",
"CZ_35L001s.bam", "CZ_36bL001s.bam", "CZ_36L001s.bam"), class = "factor"),
directory = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "TestDirectory/DataFolder", class = "factor"),
Short.name = structure(1:5, .Label = c("CZ_25", "CZ_25b",
"CZ_26", "CZ_26b", "CZ_27", "CZ_27b", "CZ_28", "CZ_28b",
"CZ_29", "CZ_29b", "CZ_30", "CZ_30b", "CZ_31", "CZ_31b",
"CZ_32", "CZ_32b", "CZ_33", "CZ_33b", "CZ_34", "CZ_34b",
"CZ_35", "CZ_35b", "CZ_36", "CZ_36b"), class = "factor"),
Targeting.type = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "single", class = "factor"),
sgRNA1 = structure(c(1L, 4L, 6L, 7L, 7L), .Label = c("guide_16",
"guide_2", "guide_21", "guide_22", "guide_6", "guide_76",
"guide_83"), class = "factor"), sgRNA2 = c(NA, NA, NA, NA,
NA), Group = structure(c(1L, 1L, 1L, 1L, 1L), .Label = "CZ", class = "factor")), .Names = c("bamfile",
"directory", "Short.name", "Targeting.type", "sgRNA1", "sgRNA2",
"Group"), row.names = c(NA, 5L), class = "data.frame")
What I would like to do is take a search function, and iterate through the "sgRNA1" column, and then create a new column for each "bamfile" that corresponds to that same row.
The final table would thus contain a guide_XX column, followed by a n() column with how many entries, and then an individual column for each .bam file that was found. Each row would then correspond to the next guide_XX sample that was iterated.
This would be an example table of the output:
structure(list(sgRNA1 = structure(1:6, .Label = c("guide_16",
"guide_2", "guide_21", "guide_22", "guide_6", "guide_76", "guide_83"
), class = "factor"), Count = c(4L, 2L, 5L, 4L, 2L, 1L), bam1 = structure(c(2L,
7L, 5L, 1L, 6L, 4L), .Label = c("CZ_25bL001s.bam", "CZ_25L001s.bam",
"CZ_26bL001s.bam", "CZ_26L001s.bam", "CZ_29bL001s.bam", "CZ_30bL001s.bam",
"CZ_30L001s.bam"), class = "factor"), bam2 = structure(c(3L,
6L, 5L, 4L, 7L, 1L), .Label = c("", "CZ_27L001s.bam", "CZ_28bL001s.bam",
"CZ_29L001s.bam", "CZ_31L001s.bam", "CZ_33bL001s.bam", "CZ_34L001s.bam"
), class = "factor"), bam3 = structure(c(4L, 1L, 5L, 3L, 1L,
1L), .Label = c("", "CZ_27bL001s.bam", "CZ_32bL001s.bam", "CZ_32L001s.bam",
"CZ_33L001s.bam"), class = "factor"), bam4 = structure(c(4L,
1L, 3L, 5L, 1L, 1L), .Label = c("", "CZ_28L001s.bam", "CZ_34bL001s.bam",
"CZ_35bL001s.bam", "CZ_36L001s.bam"), class = "factor"), bam5 = structure(c(1L,
1L, 3L, 1L, 1L, 1L), .Label = c("", "CZ_31bL001s.bam", "CZ_36bL001s.bam"
), class = "factor"), bam6 = structure(c(1L, 1L, 1L, 1L, 1L,
1L), .Label = c("", "CZ_35L001s.bam"), class = "factor")), .Names = c("sgRNA1",
"Count", "bam1", "bam2", "bam3", "bam4", "bam5", "bam6"), row.names = c(NA,
6L), class = "data.frame")
Thank you in advance! I'm looking forward to getting to know dplyr a bit better