0

I want to divide data in 5 minute intervals and compare them with monthly averages and standard deviation to detect any anomalies. I cut the data in 5 minute intervals as follows:

cut(month.data, breaks = "5 min")


dput(monthlydata)
structure(list(time = c("00:00:00", "00:05:00", "00:10:00", "00:15:00", "00:20:00", "00:25:00", "00:30:00", "00:35:00", "00:40:00", "00:45:00", "00:50:00", "00:55:00", "01:00:00", "01:05:00", "01:10:00", "01:15:00", "01:25:00", "01:30:00", "01:35:00", "01:40:00", "01:45:00", "01:55:00", "02:00:00", "02:20:00", "02:25:00", "02:40:00", "02:45:00", "02:50:00", "03:05:00", "03:15:00", "03:25:00", "03:30:00", "03:35:00", "03:40:00", "03:45:00", "03:50:00", "03:55:00", "04:05:00", "04:25:00", "04:30:00", "04:50:00", "04:55:00", "05:05:00", "05:10:00", "05:15:00", "05:20:00", "05:30:00", "05:40:00", "05:50:00", "05:55:00", "06:00:00", "06:05:00", "06:10:00", "06:15:00", "06:20:00", "06:30:00", "06:35:00", "06:40:00", "06:45:00", "06:55:00", "07:00:00", "07:05:00", "07:10:00", "07:15:00", "07:20:00", "07:25:00", "07:30:00", "07:35:00", "07:40:00", "07:45:00", "07:50:00", "07:55:00", "08:00:00", "08:05:00", "08:10:00", "08:15:00", "08:20:00", "08:25:00", "08:30:00", "08:35:00", "08:40:00", "08:45:00", "08:50:00", "08:55:00", "09:00:00", "09:05:00", "09:10:00", "09:15:00", "09:20:00", "09:25:00", "09:30:00", "09:35:00", "09:40:00", "09:45:00", "09:50:00", "09:55:00", "10:00:00", "10:05:00", "10:10:00", "10:15:00", "10:20:00", "10:25:00", "10:30:00", "10:35:00", "10:40:00", "10:45:00", "10:50:00"), avg = c(50.5510560619622, 43.0189344993435, 50.3126451810161, 51.7984139398903, 44.1216815428764, 54.6443574865021, 50.065771120662, 50.9085361977819, 59.7024337563552, 44.6504863019322, 51.3800349930107, 47.2281110758541, 39.1562855847908, 47.3556824171027, 54.4776585774653, 64.9968044851706, 47.878888210121, 44.7561800618865, 45.0695456307952, 56.1759044802863, 50.8227417957758, 52.6309915011542, 62.7342370217067, 54.4257866432874, 54.3226457929837, 40.1938479668371, 54.5403748037875, 51.8463279336394, 53.8173320832895, 49.9889812414321, 50.5827916556644, 53.5500871960216, 37.0209877205586, 55.2612198888207, 55.1863860227875, 58.2740366768661, 50.5885573635039, 48.1051996319848, 47.7634397043728, 46.5863241465071, 54.1810631439095, 53.7906152156406, 52.6409719258956, 54.81269123308, 44.0537724370726, 47.1977242746078, 46.5010741707819, 45.8433966693518, 42.3623605036368, 43.0730189148746, 58.4167050044254, 49.7799961792657, 53.0755779045083, 52.0552481180891, 42.0602921415756, 52.5126950828788, 55.0870481980705, 44.4144434705709, 42.392966543036, 47.1807241560313, 44.6884956183158, 47.2896481418499, 57.8319708553495, 46.6397655826931, 52.4873442246903, 51.6324293101077, 55.0908694414676, 43.8596455462562, 48.8941181950083, 48.7514252330684, 49.310070422837,48.5234755805063, 58.5542276809981, 47.7720993402378,49.9405735614802, 52.8780543357139, 58.2557463154677,52.9242756783794, 55.4983029733778, 44.2299478555713,57.3379257421419, 50.1951095071188, 53.0553561602009, 50.3109504601222, 44.9316581536335, 47.7568227989573, 50.3093864093436, 50.0461546149579, 48.2373271954793, 50.2943295283144, 46.8054846556807, 48.7084493434669, 46.5421233124519, 50.1222951953386, 49.4207933535255, 46.1521667031027, 51.8644029994928, 53.4831581582472, 51.4972139096679, 51.5181187952616, 49.1998856564675, 50.3454476017966, 49.7587298896826, 45.6040364435812, 49.7466159629413, 51.0983492421099, 52.2935123336372), sd =c(9.32931925004817, 11.6047417906884, 9.80771691435559, 10.2308327194904, 9.67431773674866, 10.5323558825585, 9.1920111408028, 10.0745961985324, 9.21246056157269, 7.96228334027313, 9.89384474113651, 12.3284041772698, 13.5489518864705, 9.96803285037014, 8.85300006821126, 10.9832318078379, 13.0909163134817, 9.7261635496657, 10.3208509302825, 8.74704188744148, 9.44853223258545, 7.68266078719723, 8.78660429415339, 10.1981152232186,10.0617639380203, 9.70826609005244, 10.4405793131911, 10.0294741532956, 10.5843898470973, 10.0678114702352, 9.72005734098214, 8.71988580635692, 13.0339431549482, 8.85263899155544, 11.1140174478773, 10.6801908843647, 11.0442113270832, 9.6576794753704, 11.193361191756, 9.05129333547447, 11.1414887813967, 10.5985803957382, 10.2422686622522, 11.2629702278102, 10.4774845098793, 9.82520095973172, 10.6775347630735, 8.97029695502126, 8.28221177072086, 11.0689605695813, 10.4298020842373, 9.57012379689429, 10.1587613403527, 11.2343452027682, 9.09417849538438, 10.3529463918792, 8.89434012398308, 7.86166740352018, 8.67472741747663, 10.7631616313607, 11.4634738459674, 10.3773119423003, 10.1261492697498, 9.51153382612954, 8.8445878796955, 9.99640290007654, 9.33226675473664, 10.2637002156788, 9.44339377944955, 10.5843418581127, 9.00425609052502, 10.2822169680166, 10.5184763916409, 10.3495044419935, 9.99843947958033, 8.67736455800308, 10.1508472078283, 9.3450979185795, 9.84066593055499, 9.95417999414617, 9.2622985333717, 11.4445218170255, 9.72729168049685, 9.36415135782777, 10.2272372991057, 10.4800042675175, 10.0067598340318, 10.6127856103593, 8.91026514253112, 10.3451668931764, 9.86818949901026, 10.736479643411, 8.6424538599602, 9.53874170759392, 10.5484021386586, 10.7130372286846, 10.2509775336419, 10.7454025452377, 9.63372860813097, 9.35982465121709, 9.47486350661184, 10.311507962123, 10.0338294237329, 9.50474631714252, 10.1746281369343, 9.75040171260143, 10.8675383447689)), class = c("tbl_df", "tbl", "data.frame"), .Names = c("time", "avg", "sd"), row.names = c(NA, -107L))

However if my currdata starts later than 00 min, the time intervals are calculated depending on the earliest entry in the data.So if the first entry is at 01:00 min, the intervals are 01:00 to 06:00 and so on. However, I want the intervals to be 00:00 - 05:00 and so on, to be consistent with monthlydata intervals. Is there a way to set the lowest limit on the intervals in the cut statement? or is there any other way to ensure that both series use the same minutes intervals.

The data for one day

dput(currday)
structure(c(1533769260, 1533769320, 1533769320, 1533769380, 1533769380,   1533769380, 1533769440, 1533769500, 1533769680, 1533769740, 1533769740,1533769740, 1533769800, 1533769920, 1533769920, 1533769920,1533769980,1533770100, 1533770100, 1533770220, 1533770280, 1533770340, 1533770400, 1533770460, 1533770460, 1533770460, 1533770520, 1533770520, 1533770580, 1533770580, 1533770640, 1533770880, 1533771060, 1533771060, 1533771120, 1533771300, 1533771300, 1533771300, 1533771360, 1533771360, 1533771540, 1533771660, 1533771660, 1533771960, 1533772260, 1533772260, 1533772260, 1533772440, 1533772440, 1533772560, 1533772620, 1533772620, 1533772800, 1533772980, 1533773160, 1533773400, 1533773400, 1533773460, 1533773760, 1533773880, 1533774480, 1533774540, 1533774900, 1533774900, 1533774960, 1533775080, 1533775140, 1533775320, 1533775380, 1533775500,1533775560, 1533775680, 1533776340, 1533776520, 1533776520,1533776580, 1533777660, 1533777900, 1533777900, 1533778200, 1533779100, 1533779100, 1533779220, 1533779460, 1533779640, 1533780360, 1533780960, 1533781740, 1533782040, 1533782340, 1533782700, 1533782880, 1533783120, 1533783600, 1533784020, 1533784140, 1533785280, 1533785280, 1533785400, 1533785460, 1533786660), class = c("POSIXct", "POSIXt"), tzone = "Europe/London")

I have looked at other questions and found create-a-24-hour-vector-with-5-minutes-time-interval-in-r. It is not quite what I am after. Any help is highly appreciated.

NKaz
  • 47
  • 8
  • 1
    [link](https://stackoverflow.com/questions/27594959/grouping-every-n-minutes-with-dplyr) and [link2](https://stackoverflow.com/questions/34913317/how-to-group-time-series-data-into-round-intervals-of-5-minutes-in-r) SO questions can help you? – AleBdC Aug 09 '18 at 12:10

1 Answers1

0

As suggested by @AleBdC, I checked the link: Grouping every n minutes with dplyr and found the comment suggested by @MikeyHarper very useful.

So, I first create five-minute intervals in currday, then separate the date and time into two columns and then converted the time column into factor and use it for grouping and summarizing.

library(lubridate)
x <- ymd_hms(currday, tz="Europe/London")
x<-data.frame(floor_date(x, unit = "5minutes"))
names(x)<-"by5"
x%>%separate(by5, into = c("date", "time"),sep = "\\s" )
x$time<-as.factor(x$time)

I also created five-minute intervals in my monthly data using the same approach and calculated the mean and sd and compared it with the daily data.

NKaz
  • 47
  • 8