0

I have a dataframe with varying time steps, which I want to convert into even time steps. Every 10 minutes a value should be written and if there is no new value, the previous one should be taken (see 2019-01-01 01:00:00 and 2019-01-01 02:30:00).

                date        ZUL_T
1   2019-01-01 00:04:00     23.3
2   2019-01-01 00:15:00     23.3
3   2019-01-01 00:26:00     19.9
4   2019-01-01 00:37:00     20.7
5   2019-01-01 00:48:00     21.9
6   2019-01-01 00:59:00     21.9
7   2019-01-01 01:10:00     18.8
8   2019-01-01 01:22:00     18.8
9   2019-01-01 01:33:00     20.7
10  2019-01-01 01:44:00     21.6
11  2019-01-01 01:55:00     19.2
12  2019-01-01 02:06:00     19.2
13  2019-01-01 02:17:00     19.6
14  2019-01-01 02:29:00     19.6
15  2019-01-01 02:40:00     20.5

This is my current code, but there are some time steps missing if there is no value in the DS.

library(lubridate)

lowtime <- min(DS$date)
hightime <- max(DS$date)

# Set the minute and second to the nearest 10 minute value
minute(lowtime) <- floor(minute(lowtime)/10) * 10
minute(hightime) <- ceiling(minute(hightime)/10) * 10
second(lowtime) <- 0
second(hightime) <- 0

# Set the breakpoints at 10 minute intervals
breakpoints <- seq.POSIXt(lowtime, hightime, by = 600)
ZUL_T <- aggregate(ZUL_T ~ cut(date, breaks = breakpoints), DS, mean)


> data
                        date                       ZUL_T
1                       2019-01-01 00:00:00        23.3
2                       2019-01-01 00:10:00        23.3
3                       2019-01-01 00:20:00        19.9
4                       2019-01-01 00:30:00        20.7
5                       2019-01-01 00:40:00        21.9
6                       2019-01-01 00:50:00        21.9
7                       2019-01-01 01:10:00        18.8
8                       2019-01-01 01:20:00        18.8
9                       2019-01-01 01:30:00        20.7
10                      2019-01-01 01:40:00        21.6
11                      2019-01-01 01:50:00        19.2
12                      2019-01-01 02:00:00        19.2
13                      2019-01-01 02:10:00        19.6
14                      2019-01-01 02:20:00        19.6
15                      2019-01-01 02:40:00        20.5
jay.sf
  • 60,139
  • 8
  • 53
  • 110
Annika
  • 35
  • 3

2 Answers2

0

You could merge with the breakpoints as data frame.

# first, you probably need 10 min later in time 
minute(hightime) <- ceiling((minute(max(DS$date)) + 10)/10) * 10
breakpoints <- seq.POSIXt(lowtime, hightime, by=600)

Use aggregate in classic list notation to get proper names.

ZUL_T <- aggregate(list(ZUL_T=DS$ZUL_T), list(date=cut(DS$date, breaks=breakpoints)), mean)

Now merge,

ZUL_T <- merge(transform(ZUL_T, date=as.character(date)), 
               data.frame(date=as.character(breakpoints[-length(breakpoints)]), 
       stringsAsFactors=F), 
               all=TRUE)

and replace NA values wit values - 1.

ZUL_T$ZUL_T[is.na(ZUL_T$ZUL_T)] <- ZUL_T$ZUL_T[which(is.na(ZUL_T$ZUL_T)) - 1]
ZUL_T
#                   date ZUL_T
# 1  2019-01-01 00:00:00  23.3
# 2  2019-01-01 00:10:00  23.3
# 3  2019-01-01 00:20:00  19.9
# 4  2019-01-01 00:30:00  20.7
# 5  2019-01-01 00:40:00  21.9
# 6  2019-01-01 00:50:00  21.9
# 7  2019-01-01 01:00:00  21.9
# 8  2019-01-01 01:10:00  18.8
# 9  2019-01-01 01:20:00  18.8
# 10 2019-01-01 01:30:00  20.7
# 11 2019-01-01 01:40:00  21.6
# 12 2019-01-01 01:50:00  19.2
# 13 2019-01-01 02:00:00  19.2
# 14 2019-01-01 02:10:00  19.6
# 15 2019-01-01 02:20:00  19.6
# 16 2019-01-01 02:30:00  19.6
# 17 2019-01-01 02:40:00  20.5
jay.sf
  • 60,139
  • 8
  • 53
  • 110
0

We can use floor_date from lubridate package to cut time every 10 mins and take a lower bound, group by it and sum ZUL_T values.

library(dplyr)
library(lubridate)
library(tidyr)

df %>%
  group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
  summarise(ZUL_T = sum(ZUL_T)) 


#   date                ZUL_T
#   <dttm>              <dbl>
# 1 2019-01-01 00:00:00  23.3
# 2 2019-01-01 00:10:00  23.3
# 3 2019-01-01 00:20:00  19.9
# 4 2019-01-01 00:30:00  20.7
# 5 2019-01-01 00:40:00  21.9
# 6 2019-01-01 00:50:00  21.9
# 7 2019-01-01 01:10:00  18.8
# 8 2019-01-01 01:20:00  18.8
# 9 2019-01-01 01:30:00  20.7
#10 2019-01-01 01:40:00  21.6
#11 2019-01-01 01:50:00  19.2
#12 2019-01-01 02:00:00  19.2
#13 2019-01-01 02:10:00  19.6
#14 2019-01-01 02:20:00  19.6
#15 2019-01-01 02:40:00  20.5

and then use complete and fill to complete the missing combinations and fill the NA values with previous values.

df %>%
 group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
 summarise(ZUL_T = sum(ZUL_T)) 
 complete(date = seq(min(date), max(date), "10 mins")) %>%
 fill(ZUL_T)


#   date                ZUL_T
#   <dttm>              <dbl>
# 1 2019-01-01 00:00:00  23.3
# 2 2019-01-01 00:10:00  23.3
# 3 2019-01-01 00:20:00  19.9
# 4 2019-01-01 00:30:00  20.7
# 5 2019-01-01 00:40:00  21.9
# 6 2019-01-01 00:50:00  21.9
# 7 2019-01-01 01:00:00  21.9
# 8 2019-01-01 01:10:00  18.8
# 9 2019-01-01 01:20:00  18.8
#10 2019-01-01 01:30:00  20.7
#11 2019-01-01 01:40:00  21.6
#12 2019-01-01 01:50:00  19.2
#13 2019-01-01 02:00:00  19.2
#14 2019-01-01 02:10:00  19.6
#15 2019-01-01 02:20:00  19.6
#16 2019-01-01 02:30:00  19.6
#17 2019-01-01 02:40:00  20.5

data

df <- structure(list(date = structure(1:15, .Label = c("2019-01-01 00:04:00", 
"2019-01-01 00:15:00", "2019-01-01 00:26:00", "2019-01-01 00:37:00", 
"2019-01-01 00:48:00", "2019-01-01 00:59:00", "2019-01-01 01:10:00", 
"2019-01-01 01:22:00", "2019-01-01 01:33:00", "2019-01-01 01:44:00", 
"2019-01-01 01:55:00", "2019-01-01 02:06:00", "2019-01-01 02:17:00", 
"2019-01-01 02:29:00", "2019-01-01 02:40:00"), class = "factor"), 
ZUL_T = c(23.3, 23.3, 19.9, 20.7, 21.9, 21.9, 18.8, 18.8, 
20.7, 21.6, 19.2, 19.2, 19.6, 19.6, 20.5)), 
class = "data.frame", row.names = c(NA,-15L))
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213