Normalizing to average of control group within a data frame

Question

I have a data frame:

    tissue_merge <-structure(list(Experiment = c(170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 170911L, 
170911L, 170911L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 170918L, 
170918L, 170918L, 170918L, 170918L, 170918L), Sample = structure(c(11L, 
11L, 11L, 12L, 12L, 12L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 
15L, 16L, 16L, 16L, 17L, 17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 
20L, 20L, 20L, 21L, 21L, 21L, 22L, 22L, 22L, 23L, 23L, 23L, 24L, 
24L, 24L, 25L, 25L, 25L, 26L, 26L, 26L, 27L, 27L, 27L, 28L, 28L, 
28L, 29L, 29L, 29L, 30L, 30L, 30L, 31L, 31L, 31L, 32L, 32L, 32L, 
33L, 33L, 33L, 34L, 34L, 34L, 35L, 35L, 35L, 36L, 36L, 36L, 37L, 
37L, 37L, 38L, 38L, 38L, 39L, 39L, 39L, 40L, 40L, 40L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L), .Label = c("1: FL_643", "10: cKO_657", 
"2: FL_645", "3: FL_647", "4: FL_656", "5: FL_658", "6: cKO_644", 
"7: cKO_646", "8: cKO_654", "9: cKO_655", "Spl_cKO_19", "Spl_cKO_21", 
"Spl_cKO_29", "Spl_cKO_37", "Spl_cKO_39", "Spl_FL_622", "Spl_FL_630", 
"Spl_FL_631", "Spl_FL_635", "Spl_FL_638", "iLN_cKO_19", "iLN_cKO_21", 
"iLN_cKO_29", "iLN_cKO_37", "iLN_cKO_39", "iLN_FL_622", "iLN_FL_630", 
"iLN_FL_631", "iLN_FL_635", "iLN_FL_638", "Thy_cKO_19", "Thy_cKO_21", 
"Thy_cKO_29", "Thy_cKO_37", "Thy_cKO_39", "Thy_FL_622", "Thy_FL_630", 
"Thy_FL_631", "Thy_FL_635", "Thy_FL_638"), class = "factor"), 
    Genotype = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L), .Label = c("miR-15/16 FL", "miR-15/16 cKO"
    ), class = "factor"), variable = structure(c(1L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 
    1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("MFI CD127 Foxp3+ CD4+", 
    "MFI CD127 Foxp3- CD4+", "MFI CD127 CD8+"), class = "factor"), 
    value = c(3076, 4718, 4987, 3083, 5317, 5345, 3058, 5007, 
    4744, 3531, 5308, 5143, 3032, 4804, 4409, 1757, 4173, 3991, 
    2039, 3501, 3357, 1927, 4434, 3910, 1611, 3325, 3085, 1748, 
    3509, 3093, 1992, 4502, 4866, 2306, 5047, 5062, 2295, 5084, 
    4900, 2436, 5266, 5139, 2396, 4804, 4648, 1363, 3974, 3903, 
    1550, 3829, 3653, 1543, 4356, 4013, 1356, 3587, 3334, 1444, 
    3715, 3410, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, 494, 518, 524, 917, 1786, 848, 912, 1092, 1191, 
    1343, 543, 882, 914, 1127, 1237, 649, 843, 926, 1084, 3714, 
    894, 1271, 1382, 1623, 1629, 570, 902, 1363, 1490, 1963, 
    528, 610, 715, 2079, NA, 857, 1139, 1147, 1278, 1325, 377, 
    1212, 1280, 1635, NA, 572, 613, 727, 1066, 2199, 976, 1025, 
    1089, 1304, 1311, 276, 1037, 1165, 1400, 1654, 524, 599, 
    624, 1059, 2345, 970, 1090, 1140, 1154, 1208, 470, 1139, 
    1267, 1359, 1583, 603, 614, 631, 939, 2360, 868, 1147, 1180, 
    1202, 1555, 868, 961, 1102, 1251, 1607, 772, 881, 925, 1269, 
    2408, 985, 1095, 1165, 1517, 1735, 402, 1019, 1445, 1583, 
    1720, 743, 779, 880, 1047, 2509, 916, 1179, 1190, 1406, 1441, 
    489, 904, 1374, 1483, 1817, 719, 722, 932, 974, 3129, 839, 
    1188, 1344, 1455, 1616, 524, 966, 1088, 1342, 2100, 764, 
    779, 876, 1048, 3263, 866, 1336, 1413, 1560, 1571, 570, 1038, 
    1446, 1499, 2051), Tissue = structure(c(2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 5L, 4L, 
    4L, 5L, 2L, 3L, 1L, 4L, 5L, 3L, 2L, 1L, 1L, 3L, 2L, 5L, 4L, 
    5L, 1L, 4L, 3L, 2L, 4L, 5L, 1L, 3L, 2L, 1L, 2L, 3L, 4L, 5L, 
    5L, 4L, 1L, 2L, 3L, 4L, 1L, 3L, 2L, 5L, 3L, 2L, 1L, 5L, 4L, 
    5L, 4L, 3L, 2L, 1L, 4L, 5L, 3L, 1L, 2L, 1L, 3L, 2L, 5L, 4L, 
    5L, 1L, 2L, 3L, 4L, 4L, 5L, 3L, 1L, 2L, 1L, 3L, 2L, 5L, 4L, 
    5L, 3L, 2L, 1L, 4L, 5L, 4L, 3L, 1L, 2L, 1L, 3L, 2L, 5L, 4L, 
    4L, 1L, 5L, 2L, 3L, 4L, 5L, 1L, 3L, 2L, 3L, 1L, 2L, 5L, 4L, 
    5L, 4L, 1L, 2L, 3L, 4L, 5L, 1L, 3L, 2L, 1L, 3L, 2L, 5L, 4L, 
    5L, 1L, 4L, 3L, 2L, 4L, 5L, 3L, 1L, 2L, 1L, 3L, 2L, 5L, 4L, 
    5L, 1L, 4L, 3L, 2L, 4L, 5L, 1L, 3L, 2L), .Label = c("Thymus", 
    "Spleen", "iLN", "Skin", "Colon"), class = "factor")), .Names = c("Experiment", 
"Sample", "Genotype", "variable", "value", "Tissue"), row.names = c(19L, 
23L, 30L, 71L, 75L, 82L, 123L, 127L, 134L, 175L, 179L, 186L, 
227L, 231L, 238L, 279L, 283L, 290L, 331L, 335L, 342L, 383L, 387L, 
394L, 435L, 439L, 446L, 487L, 491L, 498L, 539L, 543L, 550L, 591L, 
595L, 602L, 643L, 647L, 654L, 695L, 699L, 706L, 747L, 751L, 758L, 
799L, 803L, 810L, 851L, 855L, 862L, 903L, 907L, 914L, 955L, 959L, 
966L, 1007L, 1011L, 1018L, 1049L, 1053L, 1055L, 1098L, 1102L, 
1104L, 1147L, 1151L, 1153L, 1196L, 1200L, 1202L, 1245L, 1249L, 
1251L, 1294L, 1298L, 1300L, 1343L, 1347L, 1349L, 1392L, 1396L, 
1398L, 1441L, 1445L, 1447L, 1490L, 1494L, 1496L, 1589L, 1590L, 
1591L, 1592L, 1593L, 1609L, 1610L, 1611L, 1612L, 1613L, 1629L, 
1630L, 1631L, 1632L, 1633L, 1842L, 1843L, 1844L, 1845L, 1846L, 
1862L, 1863L, 1864L, 1865L, 1866L, 1882L, 1883L, 1884L, 1885L, 
1886L, 2095L, 2096L, 2097L, 2098L, 2099L, 2115L, 2116L, 2117L, 
2118L, 2119L, 2135L, 2136L, 2137L, 2138L, 2139L, 2348L, 2349L, 
2350L, 2351L, 2352L, 2368L, 2369L, 2370L, 2371L, 2372L, 2388L, 
2389L, 2390L, 2391L, 2392L, 2601L, 2602L, 2603L, 2604L, 2605L, 
2621L, 2622L, 2623L, 2624L, 2625L, 2641L, 2642L, 2643L, 2644L, 
2645L, 2854L, 2855L, 2856L, 2857L, 2858L, 2874L, 2875L, 2876L, 
2877L, 2878L, 2894L, 2895L, 2896L, 2897L, 2898L, 3107L, 3108L, 
3109L, 3110L, 3111L, 3127L, 3128L, 3129L, 3130L, 3131L, 3147L, 
3148L, 3149L, 3150L, 3151L, 3360L, 3361L, 3362L, 3363L, 3364L, 
3380L, 3381L, 3382L, 3383L, 3384L, 3400L, 3401L, 3402L, 3403L, 
3404L, 3613L, 3614L, 3615L, 3616L, 3617L, 3633L, 3634L, 3635L, 
3636L, 3637L, 3653L, 3654L, 3655L, 3656L, 3657L, 3866L, 3867L, 
3868L, 3869L, 3870L, 3886L, 3887L, 3888L, 3889L, 3890L, 3906L, 
3907L, 3908L, 3909L, 3910L), class = "data.frame")

What I would like to do is normalize each value to the average of its respective "miR-15/16 FL" control for each tissue and each experiment.

I have tried to do this using dplyr with an average function

tissue_merge <- tissue_merge %>%
  group_by(.dots = c("Experiment", "Tissue", "variable"), na.rm = T) %>%
  mutate(value_norm = value/mean(value))

However, this function just gives me "NA" as the value_norm for every number.

I realize that in my current code, I am not specifying that I want to only use the average of the "miR-15/16 FL" samples but I do not know how to incorporate that into this function (not to mention that the function doesn't even give values at all as is).

For clarification:

For a given tissue, say "Spleen", for a given variable, say "MFI CD127 Foxp3- CD4+", for a given experiment, say "170911" I would like to take each value and divide it by the average of the "miR-15/16 FL" samples. This should result in a batch normalization for each experiment so that when I pool data from multiple experiments, everything is relative to the "miR-15/16 FL" samples within each experiment.

For "Spleen" "MFI CD127 Foxp3- CD4+", the normalized data should look like this when combining the multiple experiments post normalization:

Thank you, I have updated the question with mean() but get the same result — John Gagnon, Dec 06 '17 at 19:18
`na.rm = T` should be inside of the `mean` function, not the `group_by` call — bouncyball, Dec 06 '17 at 19:21
that makes sense! That is now giving me values but it is using the average of both "miR-15/16 FL" and "miR-15/16 cKO" combined. I'm not sure how to modify my code to normalize only to the average of the "miR-15/16 FL" samples — John Gagnon, Dec 06 '17 at 19:23
I appreciate the `dput`ted reproducible example, but next time please make it minimal. 10-20 rows would have been plenty. — Gregor Thomas, Dec 06 '17 at 19:48

Gregor Thomas · Accepted Answer · 2017-12-06T21:55:45.910

You could do it all in one step, but it's clearer to add a column for the control mean and then take it out later.

tissue_merge %>%
    group_by(.dots = c("Experiment", "Tissue", "variable")) %>%
    mutate(control_mean = mean(value[Genotype == "miR-15/16 FL"], na.rm = T),
           value_norm = value / control_mean) %>%
    select(-control_mean)

Demo Below you can see that, for Experiment 170911 and Tissue Spleen, for each variable, MFI CD127 Foxp3+ CD4+, MFI CD127 Foxp3- CD4+, and MFI CD127 CD127 CD8+, the control_mean is equal to the mean of the values for that variable when Genotype == "miR-15/16 FL", and the value_norm is the value divided by the control_mean.

tissue_merge %>%
    group_by(.dots = c("Experiment", "Tissue", "variable")) %>%
    mutate(control_mean = mean(value[Genotype == "miR-15/16 FL"], na.rm = T),
           value_norm = value / control_mean) %>%
    filter(Experiment == 170911 & Tissue == "Spleen") %>%
    arrange(variable) %>%
    print.data.frame
#    Experiment     Sample      Genotype              variable value Tissue control_mean value_norm
# 1      170911 Spl_cKO_19 miR-15/16 cKO MFI CD127 Foxp3+ CD4+  3076 Spleen       1816.4  1.6934596
# 2      170911 Spl_cKO_21 miR-15/16 cKO MFI CD127 Foxp3+ CD4+  3083 Spleen       1816.4  1.6973134
# 3      170911 Spl_cKO_29 miR-15/16 cKO MFI CD127 Foxp3+ CD4+  3058 Spleen       1816.4  1.6835499
# 4      170911 Spl_cKO_37 miR-15/16 cKO MFI CD127 Foxp3+ CD4+  3531 Spleen       1816.4  1.9439551
# 5      170911 Spl_cKO_39 miR-15/16 cKO MFI CD127 Foxp3+ CD4+  3032 Spleen       1816.4  1.6692359
# 6      170911 Spl_FL_622  miR-15/16 FL MFI CD127 Foxp3+ CD4+  1757 Spleen       1816.4  0.9672980
# 7      170911 Spl_FL_630  miR-15/16 FL MFI CD127 Foxp3+ CD4+  2039 Spleen       1816.4  1.1225501
# 8      170911 Spl_FL_631  miR-15/16 FL MFI CD127 Foxp3+ CD4+  1927 Spleen       1816.4  1.0608897
# 9      170911 Spl_FL_635  miR-15/16 FL MFI CD127 Foxp3+ CD4+  1611 Spleen       1816.4  0.8869192
# 10     170911 Spl_FL_638  miR-15/16 FL MFI CD127 Foxp3+ CD4+  1748 Spleen       1816.4  0.9623431
# 11     170911 Spl_cKO_19 miR-15/16 cKO MFI CD127 Foxp3- CD4+  4718 Spleen       3788.4  1.2453806
# 12     170911 Spl_cKO_21 miR-15/16 cKO MFI CD127 Foxp3- CD4+  5317 Spleen       3788.4  1.4034949
# 13     170911 Spl_cKO_29 miR-15/16 cKO MFI CD127 Foxp3- CD4+  5007 Spleen       3788.4  1.3216661
# 14     170911 Spl_cKO_37 miR-15/16 cKO MFI CD127 Foxp3- CD4+  5308 Spleen       3788.4  1.4011192
# 15     170911 Spl_cKO_39 miR-15/16 cKO MFI CD127 Foxp3- CD4+  4804 Spleen       3788.4  1.2680815
# 16     170911 Spl_FL_622  miR-15/16 FL MFI CD127 Foxp3- CD4+  4173 Spleen       3788.4  1.1015204
# 17     170911 Spl_FL_630  miR-15/16 FL MFI CD127 Foxp3- CD4+  3501 Spleen       3788.4  0.9241368
# 18     170911 Spl_FL_631  miR-15/16 FL MFI CD127 Foxp3- CD4+  4434 Spleen       3788.4  1.1704150
# 19     170911 Spl_FL_635  miR-15/16 FL MFI CD127 Foxp3- CD4+  3325 Spleen       3788.4  0.8776792
# 20     170911 Spl_FL_638  miR-15/16 FL MFI CD127 Foxp3- CD4+  3509 Spleen       3788.4  0.9262485
# 21     170911 Spl_cKO_19 miR-15/16 cKO        MFI CD127 CD8+  4987 Spleen       3487.2  1.4300872
# 22     170911 Spl_cKO_21 miR-15/16 cKO        MFI CD127 CD8+  5345 Spleen       3487.2  1.5327483
# 23     170911 Spl_cKO_29 miR-15/16 cKO        MFI CD127 CD8+  4744 Spleen       3487.2  1.3604038
# 24     170911 Spl_cKO_37 miR-15/16 cKO        MFI CD127 CD8+  5143 Spleen       3487.2  1.4748222
# 25     170911 Spl_cKO_39 miR-15/16 cKO        MFI CD127 CD8+  4409 Spleen       3487.2  1.2643382
# 26     170911 Spl_FL_622  miR-15/16 FL        MFI CD127 CD8+  3991 Spleen       3487.2  1.1444712
# 27     170911 Spl_FL_630  miR-15/16 FL        MFI CD127 CD8+  3357 Spleen       3487.2  0.9626635
# 28     170911 Spl_FL_631  miR-15/16 FL        MFI CD127 CD8+  3910 Spleen       3487.2  1.1212434
# 29     170911 Spl_FL_635  miR-15/16 FL        MFI CD127 CD8+  3085 Spleen       3487.2  0.8846639
# 30     170911 Spl_FL_638  miR-15/16 FL        MFI CD127 CD8+  3093 Spleen       3487.2  0.8869580

For example, for the MFI CD127 Foxp3- CD4+ Genotype, the miR-15/16 FL values are 4173, 3501, 4434, 3325, 3509, and the mean is correct.

mean(c(4173, 3501, 4434, 3325, 3509))
# [1] 3788.4

I'm not sure if it's a grouping problem or not but the result here is not what I'm looking for. For a given tissue, say "Spleen", for a given variable, say "MFI CD127 Foxp3+ CD4+", for a given experiment, say "170911" I would like to take each value and divide it by the average of the "miR-15/16 FL" samples. This should result in essentially a batch normalization for each experiment so that when I pool data from multiple experiments, everything is relative to the "miR-15/16 FL" samples within each experiment. Hope that makes sense — John Gagnon, Dec 06 '17 at 20:53
Please see above for modifications/clarification to the question, thanks! — John Gagnon, Dec 06 '17 at 21:03
That is what my code does. Delete the last line where I deselect the `control_mean` column and you can verify that within each Experiment/Tissue/Variable grouping, the control mean is indeed the mean of the `"miR-15/16 FL"` values. — Gregor Thomas, Dec 06 '17 at 21:45
If you are having issues where `group_by` does not seem to work for you, please make sure that you have not loaded `plyr` after `dplyr` - [see this FAQ for details](https://stackoverflow.com/q/26106146/903061). — Gregor Thomas, Dec 06 '17 at 21:52
@JohnGagnon You can test `"mutate" %in% conflicts()` to see if you are affected. `FALSE` is good, `TRUE` means that `plyr::mutate` might be masking `dplyr::mutate`, and `plyr::mutate` doesn't know how to use grouped data. — Gregor Thomas, Dec 06 '17 at 21:58
Thank you, this worked! I just had to load `dplyr` after loading `ggplot2` and `deseq2` (which apparently both mask the `dplyr::mutate` as well). — John Gagnon, Dec 07 '17 at 16:50

Normalizing to average of control group within a data frame

1 Answers1