0

I want to calculate daily means of all variables in my dataframe which includes NA values. All my databases have a value every 30min, so I´m very interested in using the timestamp with aggregate function to obtain daily, weekly, monthly... aggregated data. My dataframe is 37795 rows x 54 variables. I´ve tried two ways to do that, first option does not give me daily means cause I obtained too high values (not logical). Second option gives me almost all NA values. I do not what to do.

I write my dataframe head and code below.

head(data)
timestamp day month year.x hour minute   doy.x  rn_1_1_1 ppfd_1_1_1
1 2013-07-06 00:00:00   6     7   2013    0      0 187.000 -84.37381    0.754
2 2013-07-06 00:30:00   6     7   2013    0     30 187.020 -84.07990      0.808
3 2013-07-06 01:00:00   6     7   2013    1      0 187.041 -82.19991      0.808
4 2013-07-06 01:30:00   6     7   2013    1     30 187.062 -81.12341      0.831
5 2013-07-06 02:00:00   6     7   2013    2      0 187.083 -79.57474      0.708
6 2013-07-06 02:30:00   6     7   2013    2     30 187.104 -77.72460      0.639
ppfdr_1_1_1 p_rain_1_1_1  swc_1_1_1  swc_2_1_1   swc_3_1_1 air_pressure air_pressure.1
1       0.624            0 0.07230304 0.09577876 0.134602791  101212.4165    1012.124165
2       0.587            0 0.07233134 0.09569421 0.134479816  101181.8094    1011.818094
3       0.713            0 0.07242914 0.09566160 0.134203719  101166.0948    1011.660948
4        0.72            0 0.07252077 0.09563419 0.134149141  101144.6151    1011.446151
5       0.564            0 0.07261925 0.09560297 0.134095791  101144.8662    1011.448662
6       0.706            0 0.07271843 0.09557789 0.134037119  101144.5084    1011.445084
u_rot    v_rot     w_rot  wind_speed          u. h_scr_qc01_man
1 5.546047919 1.42E-14  4.76E-16 5.546047919 0.426515403   -28.07603618
2 5.122724997 6.94E-15 -8.00E-16 5.122724997 0.408213459   -34.39110979
3 5.248639421 4.56E-15  7.28E-17 5.248639421 0.393959075   -33.29033501
4 4.845257286 2.81E-14 -1.33E-17 4.845257286 0.365475898   -32.62427147
5 4.486426895 1.39E-14 -4.43E-16 4.486426895 0.335905384   -33.80219189
6 4.109603841 7.08E-15 -9.76E-16 4.109603841 0.312610588   -35.77289349
fco2_scr_qc01_man le_scr_qc01_man fco2_scr_qc0 fco2_scr_qc0_man      date year.y time
1      -0.306504951              NA           NA               NA 06-jul-13       2013 0:00
2      -0.206266524              NA -0.206266524     -0.206266524 06-jul-13   2013 0:30
3      -0.268508139              NA -0.268508139     -0.268508139 06-jul-13   2013 1:00
4      -0.203804516     0.426531598 -0.203804516     -0.203804516 06-jul-13   2013 1:30
5      -0.217438742    -0.358248118 -0.217438742     -0.217438742 06-jul-13   2013 2:00
6      -0.193778528     2.571063044 -0.193778528     -0.193778528 06-jul-13   2013 2:30
  doy_ent    doy.y  doy_cum doy_cum_ent mes nrecord   bat panel_temp    vwc_0.1
1     187 187.0000 187.0000         187   7      24 12.57      22.93 0.06284828
2     187 187.0208 187.0208         187   7      25 12.56      22.85 0.06267169
3     187 187.0417 187.0417         187   7      26 12.55      22.58 0.06261738
4     187 187.0625 187.0625         187   7      27 12.54       22.3 0.06247716
5     187 187.0833 187.0833         187   7      28 12.53      22.01 0.06249525
6     187 187.1042 187.1042         187   7      29 12.52      21.82 0.06236862
     vwc_0.5   vwc_1.5 temp_0.1 temp_0.5 temp_1.5 tempsd_0.1 tempsd_0.5 tempsd_1.5
1 0.07569027 0.1007845     30.9    28.96    25.14      0.372      0.961      0.767
2 0.07569027 0.1007743     30.8    28.85    24.99      0.181      1.361      1.087
3 0.07568554 0.1008558    30.53     28.8    25.03       0.98      1.476      0.351
4 0.07559577 0.1008507    30.52    29.09    25.11      0.186      0.229      0.556
5 0.07559577 0.1007743    30.11    29.09    24.87      1.331      0.191      0.954
6 0.07556271 0.1007285    30.15    29.33    25.04      1.447      1.078        0.2
      pair pair_avg     CO2_0.1     CO2_0.5     CO2_1.5     DCO2_0.1    DCO2_0.5
1 101.2124 101.2118 1161.592832   3275.1134 4888.231603 -24.67422109 34.88538221
2 101.1818 101.2131 1168.144925  3338.24016 4941.418642   6.55209301 63.12675931
3 101.1661 101.2090 1201.049131 3435.235974 5012.525851  32.90420541  96.9958144
4 101.1446 101.2007 1268.613941 3556.723878  5092.96558  67.56481067 121.4879035
5 101.1449 101.1906 1364.315214 3680.188043 5164.795759   95.7012722  123.464165
6 101.1445 101.1805 1472.975286 3808.988677  5236.40855  108.6600723 128.8006346
     DCO2_1.5
1 31.30293041
2 53.18703947
3 71.10720845
4 80.43972916
5 71.83017884
6 71.61279156

    ## Daily avg - OPTION 1
    data$timestamp <- as.POSIXct(data$timestamp, format = "%d/%m/%Y %H:%M",tz ="GMT")
    > dates <- format(data$timestamp,"%Y/%m/%d",tz = "GMT")
    > datadates <- cbind(data,dates)
    > dailydata_avg <- aggregate(. ~ dates, datadates, FUN=mean, na.rm=TRUE, na.action = "na.pass")              

head(dailydata_avg)
dates  timestamp day month year.x hour minute   doy.x rn_1_1_1 ppfd_1_1_1
1 2013/07/06 1373111100   6     7   2013 11.5     15 187.489 159.7788   3580.562
2 2013/07/07 1373197500   7     7   2013 11.5     15 188.489 154.0925   3506.688
3 2013/07/08 1373283900   8     7   2013 11.5     15 189.489 152.5259   3460.667
4 2013/07/09 1373370300   9     7   2013 11.5     15 190.489 131.1619   2965.250
5 2013/07/10 1373456700  10     7   2013 11.5     15 191.489 136.7853   3171.958
6 2013/07/11 1373543100  11     7   2013 11.5     15 192.489 145.2757   3282.167
  ppfdr_1_1_1 p_rain_1_1_1  swc_1_1_1  swc_2_1_1 swc_3_1_1 air_pressure air_pressure.1
1    2552.396       1.0000 0.07095847 0.09606378  18341.81    25940.167      25940.167
2    2532.542       1.0000 0.06994341 0.09502167  18065.98    24891.000      24891.000
3    2523.562       1.0000 0.06860553 0.09379282  17777.02    23107.271      23107.271
4    2336.000       1.0000 0.06717054 0.09268716  17526.50    19309.500      19309.500
5    2607.229       1.0625 0.06620048 0.09166904  17275.56     8385.646       8385.646
6    2484.521       1.0000 0.06562964 0.09083684  17028.94     3535.438       3535.438
     u_rot    v_rot    w_rot wind_speed       u. h_scr_qc01_man fco2_scr_qc01_man
1 32167.83 2215.875 2041.354   32167.83 28531.44       18197.75          15365.65
2 30878.27 1911.312 1939.917   30878.27 26929.62       17605.52          14955.56
3 26052.96 2261.417 2116.458   26052.96 23305.83       19167.98          18399.33
4 17284.04 1987.438 2139.083   17284.04 17704.35       20349.92          18137.65
5 12028.06 2053.812 1960.417   12028.06 15670.00       21997.83          21120.19
6 15607.50 1997.417 1907.646   15607.50 15384.56       18000.94          18810.62
  le_scr_qc01_man fco2_scr_qc0 fco2_scr_qc0_man date year.y time doy_ent    doy.y
1        17409.67     13032.10         13027.90  137   2013 44.5     187 187.4896
2        15524.38     12077.17         12072.92  163   2013 44.5     188 188.4896
3        16407.71     14775.94         14770.56  189   2013 44.5     189 189.4896
4        16788.04     15024.79         15019.02  215   2013 44.5     190 190.4896
5        17955.58     17737.25         17730.75  241   2013 44.5     191 191.4896
6        14610.02     16605.48         16599.33  267   2013 44.5     192 192.4896
   doy_cum doy_cum_ent mes   nrecord      bat panel_temp  vwc_0.1  vwc_0.5   vwc_1.5
1 187.4896       187.5   7 28966.375 111.5208   1836.250 4638.833 4594.396  37.35417
2 188.4896       188.5   7 20801.417 111.7292   1900.812 4656.875 4392.979  26.68750
3 189.4896       189.5   7  4394.500 110.6042   1934.792 4675.604 4238.229  65.20833
4 190.4896       190.5   7  9467.708 104.0000   2090.896 4776.521 4178.729  54.12500
5 191.4896       191.5   7 14796.375 109.7500   2145.875 4907.292 4161.312 108.39583
6 192.4896       192.5   7 20127.958 109.3125   1934.375 4876.021 4123.458 143.10417
  temp_0.1 temp_0.5 temp_1.5 tempsd_0.1 tempsd_0.5 tempsd_1.5     pair pair_avg  CO2_0.1
1 2018.438 1565.812 797.8750   470.8125   474.3958   508.8333 101.1268 101.1323 10400.27
2 1998.438 1574.000 783.1875   478.3333   460.4583   566.0208 101.0764 101.0789 11292.75
3 1994.833 1568.104 780.2083   463.8125   453.1667   488.5625 100.9967 101.0036 13288.25
4 2042.625 1564.875 780.1667   465.0000   599.2708   437.6042 100.8520 100.8665 16156.60
5 2114.708 1576.729 780.5000   471.5833   406.5417   484.6875 100.4828 100.5169 18656.50
6 2124.604 1591.125 781.8125   516.7500   530.3333   510.7500 100.3025 100.2947 14586.60
   CO2_0.5  CO2_1.5 DCO2_0.1 DCO2_0.5 DCO2_1.5
1 26360.38 34371.31 19795.81 20637.94 27123.92
2 26939.60 34558.17 18838.38 20464.56 20452.58
3 27603.06 34608.31 17413.15 19998.02 22754.85
4 28572.69 34678.38 19294.62 21894.92 18379.62
5 28983.29 34644.15 20251.17 20409.58 22077.40
6 28236.12 34736.67 17031.02 18852.04 19684.69`

    ## Daily avg - OPTION 2
    data$timestamp <- as.POSIXct(data$timestamp, format = "%d/%m/%Y %H:%M",tz     ="GMT")
    datatime <- data$timestamp
    dailydata_avg <- aggregate( data, 
                          by = list('DATES'= format(datatime,'%Y%m%d' )), 
                          FUN = mean, na.rm=T)

I obtain this console message: 

1: In mean.default(X[[i]], ...) :
  argument is not numeric or logical: returning NA
2: In mean.default(X[[i]], ...) :
  argument is not numeric or logical: returning NA
3: In mean.default(X[[i]], ...) :
  argument is not numeric or logical: returning NA

head(dailydata_avg)
     DATES           timestamp day month year.x hour minute   doy.x rn_1_1_1 ppfd_1_1_1
1 20130706 2013-07-06 13:45:00   6     7   2013 11.5     15 187.489 159.7788         NA
2 20130707 2013-07-07 13:45:00   7     7   2013 11.5     15 188.489 154.0925         NA
3 20130708 2013-07-08 13:45:00   8     7   2013 11.5     15 189.489 152.5259         NA
4 20130709 2013-07-09 13:45:00   9     7   2013 11.5     15 190.489 131.1619         NA
5 20130710 2013-07-10 13:45:00  10     7   2013 11.5     15 191.489 136.7853         NA
6 20130711 2013-07-11 13:45:00  11     7   2013 11.5     15 192.489 145.2757         NA
  ppfdr_1_1_1 p_rain_1_1_1  swc_1_1_1  swc_2_1_1 swc_3_1_1 air_pressure air_pressure.1
1          NA           NA 0.07095847 0.09606378        NA           NA             NA
2          NA           NA 0.06994341 0.09502167        NA           NA             NA
3          NA           NA 0.06860553 0.09379282        NA           NA             NA
4          NA           NA 0.06717054 0.09268716        NA           NA             NA
5          NA           NA 0.06620048 0.09166904        NA           NA             NA
6          NA           NA 0.06562964 0.09083684        NA           NA             NA
  u_rot v_rot w_rot wind_speed u. h_scr_qc01_man fco2_scr_qc01_man le_scr_qc01_man
1    NA    NA    NA         NA NA             NA                NA              NA
2    NA    NA    NA         NA NA             NA                NA              NA
3    NA    NA    NA         NA NA             NA                NA              NA
4    NA    NA    NA         NA NA             NA                NA              NA
5    NA    NA    NA         NA NA             NA                NA              NA
6    NA    NA    NA         NA NA             NA                NA              NA
  fco2_scr_qc0 fco2_scr_qc0_man date year.y time doy_ent    doy.y  doy_cum doy_cum_ent
1           NA               NA   NA   2013   NA     187 187.4896 187.4896       187.5
2           NA               NA   NA   2013   NA     188 188.4896 188.4896       188.5
3           NA               NA   NA   2013   NA     189 189.4896 189.4896       189.5
4           NA               NA   NA   2013   NA     190 190.4896 190.4896       190.5
5           NA               NA   NA   2013   NA     191 191.4896 191.4896       191.5
6           NA               NA   NA   2013   NA     192 192.4896 192.4896       192.5
  mes nrecord bat panel_temp vwc_0.1 vwc_0.5 vwc_1.5 temp_0.1 temp_0.5 temp_1.5
1   7      NA  NA         NA      NA      NA      NA       NA       NA       NA
2   7      NA  NA         NA      NA      NA      NA       NA       NA       NA
3   7      NA  NA         NA      NA      NA      NA       NA       NA       NA
4   7      NA  NA         NA      NA      NA      NA       NA       NA       NA
5   7      NA  NA         NA      NA      NA      NA       NA       NA       NA
6   7      NA  NA         NA      NA      NA      NA       NA       NA       NA
  tempsd_0.1 tempsd_0.5 tempsd_1.5     pair pair_avg CO2_0.1 CO2_0.5 CO2_1.5 DCO2_0.1
1         NA         NA         NA 101.1268 101.1323      NA      NA      NA       NA
2         NA         NA         NA 101.0764 101.0789      NA      NA      NA       NA
3         NA         NA         NA 100.9967 101.0036      NA      NA      NA       NA
4         NA         NA         NA 100.8520 100.8665      NA      NA      NA       NA
5         NA         NA         NA 100.4828 100.5169      NA      NA      NA       NA
6         NA         NA         NA 100.3025 100.2947      NA      NA      NA       NA
  DCO2_0.5 DCO2_1.5
1       NA       NA
2       NA       NA
3       NA       NA
4       NA       NA
5       NA       NA
6       NA       NA

How could I do it?

Thanks!!

Jaap
  • 81,064
  • 34
  • 182
  • 193
Tecla
  • 1
  • 1
  • 1
    Welcome to SO and thank you for posting. Please format your code correctly.You should be using four spaces and not the backtick for your code blocks. Also, can you reduce your example more? (see [here](http://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example)) – Richard Erickson Dec 01 '15 at 17:34
  • Have you checked any of the daily averages manually? I think your first method appears to be working. The second method won't work because `aggregate()` is not expecting a dataframe as its first argument. – Sam Dickson Dec 01 '15 at 18:53

1 Answers1

0

I didn't use the aggregate function, I used the tapply one.

This is the code, that deals with NA's, I came up with:

# create a sequence of DateTime with half-hourly data
DateTime <- seq.POSIXt(from = as.POSIXct("2015-05-01 00:00:00", tz = "Etc/GMT+12"), 
                       to = as.POSIXct("2015-05-30 23:59:00", tz = "Etc/GMT+12"), by = 1800)

# create some dummy data of the same length as DateTime vector
aa <- runif(1440, 5.0, 7.5) 
bb <- NA
df <- data.frame(DateTime, aa, bb)

# replace a cell with NA in the "a" column
df[19,2] <- NA # dataframe = df, row = 19, column = 2

# create DateHour column to use later
df$DateHour <- paste(format(df$DateTime, "%Y/%m/%d"), format(df$DateTime, "%H"), sep = " ")
View(df)

# Hourly means
# Calculate hourly mean values
aa.HourlyMean <- tapply(df$aa, df$DateHour, mean, na.rm = TRUE)
# convert the vector to dataframe
aa.HourlyMean <- data.frame(aa.HourlyMean) 

# Extract the DateHour column from the "aa" dataframe
aa.HourlyMean$DateHour <- row.names(aa.HourlyMean); 
# Delete rownames of "aa" dataframe
row.names(aa.HourlyMean) <- NULL

# Create a tidy DateTime column
aa.HourlyMean$DateTime <- as.POSIXct(aa.HourlyMean$DateHour, "%Y/%m/%d %H", tz = "Etc/GMT+12")

# change to a tidy dataframe
aa.HourlyMean <- aa.HourlyMean[,c(3,2,1)]

# You can delete any column (for example, DateHour) by
# aa.HourlyMean$Date <- NULL

# You can rename a column with "plyr" package by
# rename(aa.HourlyMean)[3] <- "NewColumnName"

# View the hourly mean of the "aa" dataframe
View(aa.HourlyMean)

# You can do the same with the "bb" vector
bb.HourlyMean <- tapply(df$bb, df$DateHour, mean, na.rm = TRUE)
bb.HourlyMean <- data.frame(bb.HourlyMean)

# View the hourly mean of the "bb" vector
View(bb.HourlyMean) 

# /Hourly means

You then can combine in one dataframe the aa.HourlyMean and bb.HourlyMean vectors.

# Daily means
df$Date <- format(df$DateTime, "%Y/%m/%d")
aa.DailyMean <- tapply(df$aa, df$Days, mean, na.rm = TRUE)
aa.DailyMean <- data.frame(aa.DailyMean)
aa.DailyMean$Date <- row.names(aa.DailyMean); row.names(aa.DailyMean) <- NULL
aa.DailyMean <- aa.DailyMean[,c(2,1)]

View(aa.DailyMean)
# /Daily means

# Weekly means
df$YearWeek <- paste(format(df$DateTime, "%Y"), strftime(DateTime, format = "%W"), sep = " ")
aa.WeeklyMean <- tapply(df$aa, df$YearWeek, mean, na.rm = TRUE)
aa.WeeklyMean <- data.frame(aa.WeeklyMean)
aa.WeeklyMean$YearWeek <- row.names(aa.WeeklyMean); row.names(aa.WeeklyMean) <- NULL
aa.WeeklyMean <- aa.WeeklyMean[,c(2,1)]

View(aa.WeeklyMean)
# /Weekly means

I created the mean values for hourly, daily and weekly observations but you get the idea how to create the monthly, yearly, ... ones.

nick
  • 1,090
  • 1
  • 11
  • 24