Calculate average of rows which fulfill certain condition

Question

I am working with a dataframe which looks similar to this

Ind Pos Sample  Ct      LogConc     RelConc
1   B1  wt1A    26.93   -2.0247878  0.009445223
2   B2  wt1A    27.14   -2.0960951  0.008015026
3   B3  wt1B    26.76   -1.9670628  0.010787907
4   B4  wt1B    26.94   -2.0281834  0.009371662
5   B5  wt1C    26.01   -1.7123939  0.019391264
6   B6  wt1C    26.08   -1.7361630  0.018358492
7   B7  wt1D    25.68   -1.6003396  0.025099232
8   B8  wt1D    25.75   -1.6241087  0.023762457
9   B9  wt1E    22.11   -0.3881154  0.409151879
10  B10 wt1E    22.21   -0.4220713  0.378380453
11  B11 dko1A   22.20   -0.4186757  0.381350463
12  B12 dko1A   22.10   -0.3847199  0.412363423

My goal is to calculate the sample wise average of the RelConc, which would result in a dataframe which would look something like this.

Ind Pos Sample  Ct      LogConc     RelConc     AverageRelConc
1   B1  wt1A    26.93   -2.0247878  0.009445223 0.008730124
2   B2  wt1A    27.14   -2.0960951  0.008015026 0.008730124
3   B3  wt1B    26.76   -1.9670628  0.010787907 0.010079785
4   B4  wt1B    26.94   -2.0281834  0.009371662 0.010079785
5   B5  wt1C    26.01   -1.7123939  0.019391264 0.018874878
6   B6  wt1C    26.08   -1.7361630  0.018358492 0.018874878
7   B7  wt1D    25.68   -1.6003396  0.025099232 0.024430845
8   B8  wt1D    25.75   -1.6241087  0.023762457 0.024430845
9   B9  wt1E    22.11   -0.3881154  0.409151879 0.393766166
10  B10 wt1E    22.21   -0.4220713  0.378380453 0.393766166
11  B11 dko1A   22.20   -0.4186757  0.381350463 0.396856943
12  B12 dko1A   22.10   -0.3847199  0.412363423 0.396856943

I am fairly new to R and have no idea how to accomplish such a seemingly simple task. In python, I'd probably loop through each row and check if I have encountered a new sample name and then calculate the average for all samples above. However this seems not very "R like". If somebody could point me to a solution, I'd be very happy!

Cheers!

akrun · Accepted Answer · 2020-09-14T19:10:10.680

In base R, we can use ave and it is very fast

df1$AverageRelConc <- with(df1, ave(RelConc, Sample))

-output

df1$AverageRelConc
#[1] 0.008730125 0.008730125 0.010079784 0.010079784 0.018874878 0.018874878 0.024430844 0.024430844 0.393766166 0.393766166
#[11] 0.396856943 0.396856943

Or using tidyverse, we group by 'Sample' and get the mean of 'RelConc'

library(dplyr)
df1 %>%
  group_by(Sample) %>%
  mutate(AverageRelConc = mean(RelConc, na.rm = TRUE))

-output

# A tibble: 12 x 7
# Groups:   Sample [6]
#     Ind Pos   Sample    Ct LogConc RelConc AverageRelConc
#   <int> <chr> <chr>  <dbl>   <dbl>   <dbl>          <dbl>
# 1     1 B1    wt1A    26.9  -2.02  0.00945        0.00873
# 2     2 B2    wt1A    27.1  -2.10  0.00802        0.00873
# 3     3 B3    wt1B    26.8  -1.97  0.0108         0.0101 
# 4     4 B4    wt1B    26.9  -2.03  0.00937        0.0101 
# 5     5 B5    wt1C    26.0  -1.71  0.0194         0.0189 
# 6     6 B6    wt1C    26.1  -1.74  0.0184         0.0189 
# 7     7 B7    wt1D    25.7  -1.60  0.0251         0.0244 
# 8     8 B8    wt1D    25.8  -1.62  0.0238         0.0244 
# 9     9 B9    wt1E    22.1  -0.388 0.409          0.394  
#10    10 B10   wt1E    22.2  -0.422 0.378          0.394  
#11    11 B11   dko1A   22.2  -0.419 0.381          0.397  
#12    12 B12   dko1A   22.1  -0.385 0.412          0.397

data

df1 <- structure(list(Ind = 1:12, Pos = c("B1", "B2", "B3", "B4", "B5", 
"B6", "B7", "B8", "B9", "B10", "B11", "B12"), Sample = c("wt1A", 
"wt1A", "wt1B", "wt1B", "wt1C", "wt1C", "wt1D", "wt1D", "wt1E", 
"wt1E", "dko1A", "dko1A"), Ct = c(26.93, 27.14, 26.76, 26.94, 
26.01, 26.08, 25.68, 25.75, 22.11, 22.21, 22.2, 22.1), LogConc = c(-2.0247878, 
-2.0960951, -1.9670628, -2.0281834, -1.7123939, -1.736163, -1.6003396, 
-1.6241087, -0.3881154, -0.4220713, -0.4186757, -0.3847199), 
    RelConc = c(0.009445223, 0.008015026, 0.010787907, 0.009371662, 
    0.019391264, 0.018358492, 0.025099232, 0.023762457, 0.409151879, 
    0.378380453, 0.381350463, 0.412363423)), class = "data.frame",
    row.names = c(NA, 
-12L))

Duck · Answer 2 · 2020-09-14T19:08:57.473

Try this tidyverse option:

library(tidyverse)
#Code
df %>% group_by(Sample) %>%
  mutate(AvgRelConc=mean(RelConc,na.rm=T))

Output:

# A tibble: 12 x 7
# Groups:   Sample [6]
     Ind Pos   Sample    Ct LogConc RelConc AvgRelConc
   <int> <chr> <chr>  <dbl>   <dbl>   <dbl>      <dbl>
 1     1 B1    wt1A    26.9  -2.02  0.00945    0.00873
 2     2 B2    wt1A    27.1  -2.10  0.00802    0.00873
 3     3 B3    wt1B    26.8  -1.97  0.0108     0.0101 
 4     4 B4    wt1B    26.9  -2.03  0.00937    0.0101 
 5     5 B5    wt1C    26.0  -1.71  0.0194     0.0189 
 6     6 B6    wt1C    26.1  -1.74  0.0184     0.0189 
 7     7 B7    wt1D    25.7  -1.60  0.0251     0.0244 
 8     8 B8    wt1D    25.8  -1.62  0.0238     0.0244 
 9     9 B9    wt1E    22.1  -0.388 0.409      0.394  
10    10 B10   wt1E    22.2  -0.422 0.378      0.394  
11    11 B11   dko1A   22.2  -0.419 0.381      0.397  
12    12 B12   dko1A   22.1  -0.385 0.412      0.397

Some data used:

#Data
df <- structure(list(Ind = 1:12, Pos = c("B1", "B2", "B3", "B4", "B5", 
"B6", "B7", "B8", "B9", "B10", "B11", "B12"), Sample = c("wt1A", 
"wt1A", "wt1B", "wt1B", "wt1C", "wt1C", "wt1D", "wt1D", "wt1E", 
"wt1E", "dko1A", "dko1A"), Ct = c(26.93, 27.14, 26.76, 26.94, 
26.01, 26.08, 25.68, 25.75, 22.11, 22.21, 22.2, 22.1), LogConc = c(-2.0247878, 
-2.0960951, -1.9670628, -2.0281834, -1.7123939, -1.736163, -1.6003396, 
-1.6241087, -0.3881154, -0.4220713, -0.4186757, -0.3847199), 
    RelConc = c(0.009445223, 0.008015026, 0.010787907, 0.009371662, 
    0.019391264, 0.018358492, 0.025099232, 0.023762457, 0.409151879, 
    0.378380453, 0.381350463, 0.412363423)), class = "data.frame", row.names = c(NA, 
-12L))

Or you could use aggregate() and save the results in a different dataframe and after that you can join with original df:

#Compute means
dfmeans <- aggregate(RelConc~Sample,df,mean,na.rm=T)
#Now match
df$AvgRelConc <- dfmeans[match(df$Sample,dfmeans$Sample),"RelConc"]

Output:

   Ind Pos Sample    Ct    LogConc     RelConc  AvgRelConc
1    1  B1   wt1A 26.93 -2.0247878 0.009445223 0.008730125
2    2  B2   wt1A 27.14 -2.0960951 0.008015026 0.008730125
3    3  B3   wt1B 26.76 -1.9670628 0.010787907 0.010079784
4    4  B4   wt1B 26.94 -2.0281834 0.009371662 0.010079784
5    5  B5   wt1C 26.01 -1.7123939 0.019391264 0.018874878
6    6  B6   wt1C 26.08 -1.7361630 0.018358492 0.018874878
7    7  B7   wt1D 25.68 -1.6003396 0.025099232 0.024430844
8    8  B8   wt1D 25.75 -1.6241087 0.023762457 0.024430844
9    9  B9   wt1E 22.11 -0.3881154 0.409151879 0.393766166
10  10 B10   wt1E 22.21 -0.4220713 0.378380453 0.393766166
11  11 B11  dko1A 22.20 -0.4186757 0.381350463 0.396856943
12  12 B12  dko1A 22.10 -0.3847199 0.412363423 0.396856943

Calculate average of rows which fulfill certain condition

2 Answers2

data