2

Apologies if this is a repeat of a question but I haven't been able to find an answer to my question.

I have a dataframe of data for every mm from 0 to 475 mm. I want to create a new csv file that averages the data every 1 cm so creates an average 0-10mm 10-20mm etc.

I have seen something similar that might work using a vector but not a dataframe Get the average every 10 steps in a vector in R

Does anyone know how I would achieve this?

I saw someome say that dplyr could be used using something like:

n1 <- 10
iris %>% group_by(mean = (row_number() -1) %/% n1) %>%
  mutate(mean = mean(Sepal.Length))

or something like this may work if I can group by every 10 rows

diamonds %>%
    group_by(cut) %>%
    dplyr::summarize(Mean = mean(price, na.rm=TRUE))

I would need an average for each separate column, the data look like this:

structure(list(Depth = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 
0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2, 
2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3, 3.1, 3.2, 3.3, 
3.4, 3.5, 3.6, 3.7, 3.8, 3.9), AIncCoh = c(6.049230907, 5.975282432, 
5.736199822, 5.658584418, 5.659008377, 5.597103404, 5.479694824, 
5.392676342, 5.766141707, 5.587877013, 5.923228066, 6.083677805, 
6.370731845, 6.299689957, 6.540310037, 6.76014416, 6.64052669, 
6.722735675, 6.538929311, 6.600849788, 6.537525808, 6.472332356, 
6.451542164, 6.314029783, 6.326980578, 6.427467369, 6.386539147, 
6.324748515, 6.174247704, 6.115741782, 5.940887624, 5.926674295, 
6.011768504, 6.259105537, 6.293339228, 6.507754408, 6.568542262, 
6.332767248, 6.323517, 6.45046208), BFeIncCoh = c(0.533952007, 
1.04123504, 1.575552754, 1.887597729, 2.467136771, 2.507664907, 
3.152474986, 3.59568144, 1.619560271, 1.446736541, 0.998453458, 
0.708287575, 0.75013817, 0.716823288, 0.446906747, 0.276885553, 
0.318712042, 0.412030825, 0.661614436, 1.139646108, 1.078190702, 
1.442860329, 1.18398141, 1.344180259, 1.410589788, 1.393485583, 
1.542221239, 1.708359187, 2.021297356, 2.181852255, 1.876645293, 
1.759113298, 1.785977674, 1.76244417, 1.824628328, 1.343339815, 
1.227320351, 2.396835375, 2.067314222, 1.449669028), CAlinccoh = c(0.000640664, 
0.000266313, 0.000720408, 0.000615829, 0.000874697, 0.000796354, 
0.001251353, 0.001013647, 0.000554639, 0.000591174, 0.00044422, 
0.000299878, 0.000260988, 0.000235121, 0.000192523, 0.00041571, 
0.000288884, 4.38245e-05, 0.000584711, 0.000595545, 0.000581137, 
0.000465238, 0.000485389, 0.000468044, 0.000594975, 0.00070524, 
0.000394065, 0.000951412, 0.000870142, 0.000492213, 0.000693941, 
0.000599945, 0.000782692, 0.000551049, 0.000553618, 0.000804453, 
0.000682427, 0.000835408, 0.000654415, 0.00066155), DPinccoh = c(0.000171267, 
7.06545e-05, 0.000397284, 0.000535504, 0.000458946, 0.000640312, 
0.000649436, 0.000834147, 0.000554639, 0.000360038, 0.000374296, 
0.000360664, 0.000184227, 0.00021616, 4.12548e-05, 0.000162387, 
0.00019591, 6.068e-05, 0.000147939, 0.000412862, 0.000447883, 
0.000592121, 0.000347798, 0.000471945, 0.000480557, 0.00034684, 
0.000365917, 0.000409521, 0.000439295, 0.000654778, 0.00034697, 
0.000495607, 0.000422737, 0.000472327, 0.00044047, 0.000238506, 
0.000421499, 0.000558317, 0.000526825, 0.000610359), ECaTi = c(4.071146245, 
3.27955665, 4.395989975, 3.677383592, 3.028670721, 4.523910733, 
3.623082055, 2.428717425, 1.143992933, 1.000783699, 1.167405765, 
1.489803485, 2.019990249, 2.510752688, 2.588560886, 3.398337113, 
4.051594747, 3.648189209, 3.354673496, 3.057881773, 3.153846154, 
3.076456311, 2.402086297, 2.205960854, 2.918837068, 3.052762259, 
2.853471843, 2.970514429, 3.306431274, 3.085731415, 2.150507849, 
1.643615075, 1.171841542, 1.394634777, 1.652872777, 1.751993923, 
1.946314444, 2.140757238, 2.838143036, 3.455563331), FBrCl = c(8.04090623, 
4.306048968, 3.417836951, 3.156895904, 2.787628518, 2.059316731, 
1.665443728, 1.979686894, 1.919284294, 2.106621773, 1.984810127, 
2.381081081, 2.495918818, 2.934549961, 3.223319734, 3.661795179, 
3.680280436, 3.326894659, 2.572694701, 2.510810003, 2.61608562, 
2.485975483, 2.623981753, 2.615200819, 2.753870857, 2.911768285, 
2.810209577, 2.989661774, 2.648906468, 2.478130062, 2.772837613, 
2.87838731, 2.794758117, 2.785974974, 2.860625462, 2.834103685, 
2.685483871, 2.72094811, 2.823782043, 2.672785909), GSrCa = c(1.476699029, 
0.697333834, 0.531927024, 0.581549593, 0.584337349, 0.471223867, 
0.397348555, 0.463917526, 0.620592021, 0.668493866, 0.621082621, 
0.520657043, 0.495293266, 0.588865096, 0.523639819, 0.544261566, 
0.559851818, 0.399513776, 0.396946565, 0.373942811, 0.429853659, 
0.382840237, 0.418278721, 0.496672716, 0.438887736, 0.44448963, 
0.445296034, 0.509820486, 0.39092296, 0.365067029, 0.414126234, 
0.485529831, 0.475788031, 0.527462341, 0.38278502, 0.422935183, 
0.469789842, 0.460466084, 0.484084881, 0.434993925), HSiinccoh = c(0.000450368, 
0.000244573, 0.00075219, 0.000583699, 0.00068032, 0.000925492, 
0.000855355, 0.001963942, 0.001483778, 0.001729073, 0.001468393, 
0.001138726, 0.001093848, 0.000576426, 0.000347228, 0.000334517, 
0.000398461, 0.000249462, 0.000715038, 0.000694193, 0.000544122, 
0.000849732, 0.00089816, 0.000924387, 0.000690323, 0.000971151, 
0.000607182, 0.000959685, 0.000996862, 0.001219242, 0.000896715, 
0.0010999, 0.0015319, 0.001466701, 0.001410312, 0.001443165, 
0.00088314, 0.001401997, 0.000773774, 0.000728493), IMnFe = c(0.012176723, 
0.010329834, 0.009460859, 0.004488071, 0.0033725, 0.003435313, 
0.002524022, 0.001789815, 0.002523102, 0.002427177, 0.004737443, 
0.006345047, 0.008130081, 0.00597814, 0.003815561, 0.005911677, 
0.004938375, 0.006038094, 0.00566993, 0.005562324, 0.005153047, 
0.004540827, 0.004835627, 0.007999907, 0.009290228, 0.009745845, 
0.007576883, 0.006542547, 0.007564866, 0.01107687, 0.00868016, 
0.004918037, 0.004108234, 0.004358456, 0.003138233, 0.00340349, 
0.002135809, 0.002106807, 0.002305464, 0.005688023), JFeTi = c(332.715415, 
235.9371921, 372.726817, 390.7871397, 396.986099, 495.2624867, 
398.3075384, 348.0204394, 100.6015901, 85.02638454, 67.28021064, 
64.80608083, 95.29351536, 112.916368, 79.94710947, 64.44066515, 
90.04033771, 90.33555063, 120.2516005, 192.0689655, 179.2516923, 
227.7075243, 146.8866761, 153.3047153, 224.0163537, 224.4506518, 
209.6965555, 259.0897114, 301.7200504, 289.6696643, 192.2746999, 
148.0545188, 114.2141328, 137.4854557, 154.4213406, 126.2081276, 
130.2680017, 258.1514477, 315.1097867, 257.6221134), KZrRb = c(1.305111821, 
4.6367713, 1.986547085, 1.775510204, 5.387755102, 2.099778271, 
6.847826087, 1.760157274, 0.982819606, 1.037322515, 1.015357613, 
1.377717391, 2.261603376, 2.883275261, 3.748267898, 3.688931298, 
8.160550459, 4.390957447, 3.878865979, 2.159613059, 5.284023669, 
2.704166667, 1.433843384, 1.265707797, 1.814081408, 2.007683864, 
1.929152149, 2.484520124, 2.402555911, 1.70468948, 1.53030303, 
1.596969697, 1.258212878, 1.092152628, 1.212525667, 3.411627907, 
2.554, 1.91858679, 3.013186813, 8.751879699), Age = c(NA, NA, 
NA, NA, NA, -68, NA, NA, NA, NA, NA, NA, NA, NA, NA, -64.5, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, -63, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, -59, NA, NA, NA, NA)), row.names = c(NA, 40L), class = "data.frame")
```
Limey
  • 10,234
  • 2
  • 12
  • 32
Sophie Williams
  • 338
  • 1
  • 3
  • 14

1 Answers1

3

Hi I do not know the meaning of your columns but in principle you can do like this

  • Create a group variable with values 1,2,.. cm

  • group by that variable and compute the mean of the target columns

In my code below I suppose Depth is the variable measured in cm (it goes from 0.0 to 3.9 with step 0.1). The groups based on Depth can be obtained just rounding the values of this column. I will use data.table in my attempt (the code below computes the mean for all the columns...you can specify a single one in dd[, lapply(.SD, mean), by = grp]).

library(data.table)
dd = data.table(data) # data is your data.frame
dd[, grp := floor(Depth)]
dd_avg = dd[, lapply(.SD, mean), by = grp]
print(dd_avg)

   grp Depth  AIncCoh BFeIncCoh    CAlinccoh     DPinccoh    ECaTi    FBrCl
1:   0  0.45 5.690180 1.9827592 0.0007325078 0.0004672228 3.117323 3.143967
2:   1  1.45 6.448082 0.6429498 0.0003361405 0.0002156380 2.728719 2.877215
3:   2  2.45 6.353116 1.5307018 0.0006007855 0.0004556655 2.902610 2.693379
4:   3  3.45 6.261482 1.7493288 0.0006819498 0.0004533617 2.014624 2.782969
       GSrCa    HSiinccoh       IMnFe     JFeTi    KZrRb Age
1: 0.6493423 0.0009668790 0.005252742 315.63711 2.781960  NA
2: 0.5024054 0.0007016292 0.005712667  97.73804 3.356514  NA
3: 0.4322129 0.0008660846 0.007432665 221.57936 2.303042  NA
4: 0.4557961 0.0011636097 0.004084271 183.38096 2.633945  NA

This is just an example because I do not know the precise details of your problem. I think you can easily adapt the 'logic' to your problem.

Gi_F.
  • 911
  • 6
  • 7
  • Hi thanks,the data columns (aside from depth) are all geochemical data down a sediment core. When I tried this on my data it does group them correctly but does not give me the average of the group, it just adds a column with the group number that they all belong to. Have I missed something? – Sophie Williams Mar 26 '21 at 15:54
  • 1
    The averages by group for each column are in the dd_avg data.frame (I edited a bit my previous code to assign a name to the result of the aggregation). I hope I understood your question – Gi_F. Mar 26 '21 at 16:12