iterative subtraction of column values of two dataframes

Question

I have two dataframes:

df.means.refs:

enter image description here

df.target:

enter image description here

I need to loop through df.target and make the following substraction:

df.target$Cq - df.means.refs$Cq

df.target is group_by Tissue, Accession, Genotype, Gene, BReplicate

df.means.refs is group_by Tissue, Accession, Genotype, BReplicate

For example I need to calculate

df.target$Cq - df.means.refs$Cq (row 1),
df.target$Cq - df.means.refs$Cq (row 2),
df.target$Cq - df.means.refs$Cq (row 3),

so far no problem. But now

df.target$Cq - df.means.refs$Cq (row 4 in df.target - row1 in df.means.refs)
df.target$Cq - df.means.refs$Cq (row 5 in df.target - row2 in df.means.refs)
df.target$Cq - df.means.refs$Cq (row 6 in df.target - row3 in df.means.refs)

and so on.

Starting from df.target row 10 the column Genotype switches to WT and now I need to loop using the Cq values in df.means.refs according to Genotype:WT.

(essentially Cq values corresponding to 3 biological replicates per genotype, two different genotypes; three different genes tested and listed in df.target)

How do I code this in R?

Thank you very much for any help, Kay

The dataframes (via dput(): df.means.refs:

structure(list(Tissue = c("Seedling", "Seedling", "Seedling", 
"Seedling", "Seedling", "Seedling"), Accession = c("Col", "Col", 
"Col", "Col", "Col", "Col"), Genotype = c("sub-9", "sub-9", "sub-9", 
"WT", "WT", "WT"), BReplicate = c("1", "2", "3", "1", "2", "3"
), Cq = c(25.2540053029395, 24.6386988176262, 24.5407237397682, 
24.7295032752289, 24.4544553518053, 24.4604738608338)), .Names = c("Tissue", 
"Accession", "Genotype", "BReplicate", "Cq"), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), vars = c("Tissue", 
"Accession", "Genotype"), drop = TRUE)

df.target:

structure(list(Tissue = c("Seedling", "Seedling", "Seedling", 
"Seedling", "Seedling", "Seedling", "Seedling", "Seedling", "Seedling", 
"Seedling", "Seedling", "Seedling", "Seedling", "Seedling", "Seedling", 
"Seedling", "Seedling", "Seedling"), Accession = c("Col", "Col", 
"Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", 
"Col", "Col", "Col", "Col", "Col", "Col", "Col"), Genotype = c("sub-9", 
"sub-9", "sub-9", "sub-9", "sub-9", "sub-9", "sub-9", "sub-9", 
"sub-9", "WT", "WT", "WT", "WT", "WT", "WT", "WT", "WT", "WT"
), Gene = c("CESA1", "CESA1", "CESA1", "CESA3", "CESA3", "CESA3", 
"PRC1", "PRC1", "PRC1", "CESA1", "CESA1", "CESA1", "CESA3", "CESA3", 
"CESA3", "PRC1", "PRC1", "PRC1"), BReplicate = c("1", "2", "3", 
"1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", 
"2", "3"), Cq = c(23.496195267366, 22.7054342062343, 22.639685306532, 
22.8211127310626, 22.0820545565921, 22.1110378642623, 24.1462601520338, 
23.3972522049923, 23.3270831096319, 22.5998036632355, 22.7161277680243, 
22.6526346162252, 22.237990186265, 22.0715318793714, 22.0171712171306, 
23.4524362896598, 23.4121887867123, 23.3648625264175)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -18L), .Names = c("Tissue", 
"Accession", "Genotype", "Gene", "BReplicate", "Cq"), vars = c("Tissue", 
"Accession", "Genotype", "Gene", "BReplicate"), drop = TRUE, indices = list(
    0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
    14L, 15L, 16L, 17L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
    Tissue = c("Seedling", "Seedling", "Seedling", "Seedling", 
    "Seedling", "Seedling", "Seedling", "Seedling", "Seedling", 
    "Seedling", "Seedling", "Seedling", "Seedling", "Seedling", 
    "Seedling", "Seedling", "Seedling", "Seedling"), Accession = c("Col", 
    "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col", 
    "Col", "Col", "Col", "Col", "Col", "Col", "Col", "Col"), 
    Genotype = c("sub-9", "sub-9", "sub-9", "sub-9", "sub-9", 
    "sub-9", "sub-9", "sub-9", "sub-9", "WT", "WT", "WT", "WT", 
    "WT", "WT", "WT", "WT", "WT"), Gene = c("CESA1", "CESA1", 
    "CESA1", "CESA3", "CESA3", "CESA3", "PRC1", "PRC1", "PRC1", 
    "CESA1", "CESA1", "CESA1", "CESA3", "CESA3", "CESA3", "PRC1", 
    "PRC1", "PRC1"), BReplicate = c("1", "2", "3", "1", "2", 
    "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", 
    "3")), class = "data.frame", row.names = c(NA, -18L), vars = c("Tissue", 
"Accession", "Genotype", "Gene", "BReplicate"), drop = TRUE, .Names = c("Tissue", 
"Accession", "Genotype", "Gene", "BReplicate")))

Please include a reproducible example. https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example — Mark, Nov 21 '17 at 14:22

score 0 · Answer 1 · edited Jun 20 '20 at 09:12

I suppose you are looking for a join of both data sets on Tissue, Accession, Genotype, and BReplicate.

With `dplyr`

library(dplyr)
df.target %>% 
  left_join(df.means.refs, by = c("Tissue", "Accession", "Genotype", "BReplicate")) %>% 
  mutate(diff = Cq.x - Cq.y)

# A tibble: 18 x 8
# Groups:   Tissue, Accession, Genotype, Gene, BReplicate [18]
     Tissue Accession Genotype  Gene BReplicate     Cq.x     Cq.y      diff
      <chr>     <chr>    <chr> <chr>      <chr>    <dbl>    <dbl>     <dbl>
 1 Seedling       Col    sub-9 CESA1          1 23.49620 25.25401 -1.757810
 2 Seedling       Col    sub-9 CESA1          2 22.70543 24.63870 -1.933265
 3 Seedling       Col    sub-9 CESA1          3 22.63969 24.54072 -1.901038
 4 Seedling       Col    sub-9 CESA3          1 22.82111 25.25401 -2.432893
 5 Seedling       Col    sub-9 CESA3          2 22.08205 24.63870 -2.556644
 6 Seedling       Col    sub-9 CESA3          3 22.11104 24.54072 -2.429686
 7 Seedling       Col    sub-9  PRC1          1 24.14626 25.25401 -1.107745
 8 Seedling       Col    sub-9  PRC1          2 23.39725 24.63870 -1.241447
 9 Seedling       Col    sub-9  PRC1          3 23.32708 24.54072 -1.213641
10 Seedling       Col       WT CESA1          1 22.59980 24.72950 -2.129700
11 Seedling       Col       WT CESA1          2 22.71613 24.45446 -1.738328
12 Seedling       Col       WT CESA1          3 22.65263 24.46047 -1.807839
13 Seedling       Col       WT CESA3          1 22.23799 24.72950 -2.491513
14 Seedling       Col       WT CESA3          2 22.07153 24.45446 -2.382923
15 Seedling       Col       WT CESA3          3 22.01717 24.46047 -2.443303
16 Seedling       Col       WT  PRC1          1 23.45244 24.72950 -1.277067
17 Seedling       Col       WT  PRC1          2 23.41219 24.45446 -1.042267
18 Seedling       Col       WT  PRC1          3 23.36486 24.46047 -1.095611

With `data.table`

library(data.table)
options(datatable.print.class = TRUE)
setDT(df.target)[setDT(df.means.refs), on = .(Tissue, Accession, Genotype, BReplicate), 
                 diff := Cq - i.Cq][]

      Tissue Accession Genotype   Gene BReplicate       Cq      diff
      <char>    <char>   <char> <char>     <char>    <num>     <num>
 1: Seedling       Col    sub-9  CESA1          1 23.49620 -1.757810
 2: Seedling       Col    sub-9  CESA1          2 22.70543 -1.933265
 3: Seedling       Col    sub-9  CESA1          3 22.63969 -1.901038
 4: Seedling       Col    sub-9  CESA3          1 22.82111 -2.432893
 5: Seedling       Col    sub-9  CESA3          2 22.08205 -2.556644
 6: Seedling       Col    sub-9  CESA3          3 22.11104 -2.429686
 7: Seedling       Col    sub-9   PRC1          1 24.14626 -1.107745
 8: Seedling       Col    sub-9   PRC1          2 23.39725 -1.241447
 9: Seedling       Col    sub-9   PRC1          3 23.32708 -1.213641
10: Seedling       Col       WT  CESA1          1 22.59980 -2.129700
11: Seedling       Col       WT  CESA1          2 22.71613 -1.738328
12: Seedling       Col       WT  CESA1          3 22.65263 -1.807839
13: Seedling       Col       WT  CESA3          1 22.23799 -2.491513
14: Seedling       Col       WT  CESA3          2 22.07153 -2.382923
15: Seedling       Col       WT  CESA3          3 22.01717 -2.443303
16: Seedling       Col       WT   PRC1          1 23.45244 -1.277067
17: Seedling       Col       WT   PRC1          2 23.41219 -1.042267
18: Seedling       Col       WT   PRC1          3 23.36486 -1.095611

iterative subtraction of column values of two dataframes

1 Answers1

With dplyr

With data.table

With `dplyr`

With `data.table`