0

I have a dataframe with working times of every weekday. I have start and end times for each day. The problem arises when the employee worked twice on the same day. He started to work for example on Friday 10pm and ended on Saturday 8am but started again working at 10pm on Saturday evening. My dataset is now structured like this :

enter image description here

dput(droplevels(head(data1, 10)))
structure(list(Mon.Start1 = structure(c(2L, 1L, 1L, 4L, 1L, 3L, 
2L, 4L, 1L, 1L), .Label = c(" ", "14:30:00", "22:30:00", "7:00:00"
), class = "factor"), Mon.End1 = structure(c(4L, 1L, 1L, 2L, 
1L, 6L, 5L, 3L, 1L, 1L), .Label = c(" ", "16:00:00", "17:30:00", 
"23:15:00", "23:30:00", "24:00:00"), class = "factor"), Mon.Start2 = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = " ", class = "factor"), 
    Mon.End2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = " ", class = "factor"), Tue.Start1 = structure(c(4L, 
    1L, 3L, 5L, 1L, 2L, 1L, 5L, 5L, 1L), .Label = c(" ", "0:00:00", 
    "14:00:00", "14:30:00", "7:00:00"), class = "factor"), Tue.End1 = structure(c(3L, 
    1L, 4L, 2L, 1L, 5L, 1L, 2L, 2L, 1L), .Label = c(" ", "16:30:00", 
    "23:15:00", "24:00:00", "8:00:00"), class = "factor"), Tue.Start2 = structure(c(1L, 
    1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c(" ", "22:30:00"
    ), class = "factor"), Tue.End2 = structure(c(1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c(" ", "24:00:00"), class = "factor"), 
    Wed.Start1 = structure(c(1L, 3L, 1L, 3L, 1L, 2L, 1L, 3L, 
    3L, 1L), .Label = c(" ", "0:00:00", "7:00:00"), class = "factor"), 
    Wed.End1 = structure(c(1L, 3L, 1L, 3L, 1L, 5L, 1L, 4L, 2L, 
    1L), .Label = c(" ", "16:30:00", "17:00:00", "17:30:00", 
    "8:00:00"), class = "factor"), Wed.Start2 = structure(c(1L, 
    1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c(" ", "22:30:00"
    ), class = "factor"), Wed.End2 = structure(c(1L, 1L, 1L, 
    1L, 1L, 2L, 1L, 1L, 1L, 1L), .Label = c(" ", "24:00:00"), class = "factor"), 
    Thu.Start1 = structure(c(1L, 1L, 1L, 4L, 1L, 2L, 3L, 4L, 
    4L, 1L), .Label = c(" ", "0:00:00", "14:30:00", "7:00:00"
    ), class = "factor"), Thu.End1 = structure(c(1L, 1L, 1L, 
    2L, 1L, 4L, 3L, 2L, 2L, 1L), .Label = c(" ", "16:30:00", 
    "24:00:00", "8:00:00"), class = "factor"), Thu.Start2 = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = " ", class = "factor"), 
    Thu.End2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = " ", class = "factor"), Fri.Start1 = structure(c(2L, 
    1L, 1L, 4L, 3L, 1L, 1L, 4L, 4L, 1L), .Label = c(" ", "6:30:00", 
    "6:45:00", "7:00:00"), class = "factor"), Fri.End1 = structure(c(5L, 
    1L, 1L, 3L, 2L, 1L, 1L, 4L, 2L, 1L), .Label = c(" ", "16:30:00", 
    "16:45:00", "17:30:00", "18:30:00"), class = "factor"), Fri.Start2 = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = " ", class = "factor"), 
    Fri.End2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = " ", class = "factor"), Sa.Start1 = structure(c(4L, 
    5L, 1L, 1L, 3L, 1L, 1L, 5L, 1L, 2L), .Label = c(" ", "14:00:00", 
    "14:45:00", "6:30:00", "7:00:00"), class = "factor"), Sa.End1 = structure(c(4L, 
    3L, 1L, 1L, 5L, 1L, 1L, 2L, 1L, 6L), .Label = c(" ", "16:30:00", 
    "17:00:00", "18:30:00", "23:15:00", "24:00:00"), class = "factor"), 
    Sa.Start2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = " ", class = "factor"), Sa.End2 = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = " ", class = "factor"), 
    Sun.Start1 = structure(c(4L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 
    1L, 2L), .Label = c(" ", "0:00:00", "14:45:00", "6:30:00"
    ), class = "factor"), Sun.End1 = structure(c(3L, 1L, 1L, 
    1L, 4L, 1L, 1L, 1L, 1L, 2L), .Label = c(" ", "0:30:00", "16:45:00", 
    "23:15:00"), class = "factor"), Sun.Start2 = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c(" ", "14:00:00"
    ), class = "factor"), Sun.End2 = structure(c(1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c(" ", "24:00:00"), class = "factor")), row.names = c("21", 
"22", "23", "25", "26", "27", "28", "31", "34", "35"), class = "data.frame") 

I tried to set start and ending times with the package lubridate.I added also dates to all days (as.POSICxt) When the employee just works one time in a day it works but not when he worked twice. Since sometimes my End time is really the End time (as for Monday 24:00 is the end time) but sometimes it continues overnight(as Friday and Saturday) and so the 24:00 is not the end time . Is there a function to set that everytime that the start time is 00 the start time should be the start2 from the day before? Or how could I solve this problem? The goal would be to create breaks and label every shift, by the starting times as night, late shift etc and the difftime. Also for those who worked twice if I cut and label the beginning hours to define the shift, since the employees have two beginning hours I end with one more shift per week that in fact is not correct.

The desired output would be to have the day of the week in one column, start time, end time, shift length, shift type(off, morning, late, night) and then after having this for every day be able to make some calculations at the weekly level and further on the monthly level.

If someone could give me some advices I would be very thankful. Thanks

  • Welcome to Stack Overflow. Help us to help you, provide data in a way that people can work with. Start here https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example Show some code instead of describing what you were trying. Show the expected outcome. – Janhoo Mar 06 '20 at 13:29
  • @Sunshine_student could you try to paste the data again? It looks like it was cut off and appears incomplete. Perhaps you can do `dput(head(data1, 10))` for the first 10 rows of data? – Ben Mar 06 '20 at 16:09
  • @ Ben, like this?Here they are still all as factors, I inserted for each day a date and split them into two, one as date and the other as time. Thank U – Sunshine_student Mar 06 '20 at 16:52

1 Answers1

0

This is not easy, nor is this the best approach to consider. However, if the primary goal is to calculate the length in shift, perhaps this might be helpful.

First I would use pivot_longer to put into a form that would make sense to evaluate, based on day, event (start or end), and shift number. It also would help to have a column for day number, since dates are not available.

It appears there are blank spaces - these were made NA.

Times were converted to seconds with lubridate function hms.

Then this was put back into wide format, and additional columns were added to denote the time past midnight that was included in a day (if date's start was 0). In addition, another column was made to indicate if day went to midnight (in which case, any following past-midnight time would be added for adjustment later).

Then, for each day, it would summarize the total time worked during the day, and then calculate an adjusted time based on what you asked, which is allocating time past midnight to the previous day, if the previous day went until midnight.

Hope this is helpful.

library(tidyverse)
library(lubridate)

data1 %>%
  pivot_longer(everything(), names_to = c("day", "event", "num"), values_to = "time", names_pattern = "(\\w+).(Start|End)(\\d)") %>%
  mutate(daynum = cumsum(day != lag(day, default = first(day))) + 1) %>%
  na_if(" ") %>%
  mutate(time = as.numeric(hms(time, quiet = TRUE))) %>%
  pivot_wider(id_cols = c(daynum, num, day), names_from = event, values_from = time) %>%
  mutate(PastMN = if_else(Start == 0, End - Start, 0),
         UpToMN = if_else(End == 86400, TRUE, FALSE)) %>%
  group_by(daynum) %>%
  summarise(DayTotal = sum(End - Start, na.rm = TRUE),
            PastMNTotal = sum(PastMN, na.rm = TRUE),
            AnyUpToMN = any(UpToMN, na.rm = T)) %>%
  mutate(DayTotalAdjusted = if_else(lead(PastMNTotal, default = 0) > 0, DayTotal + lead(PastMNTotal), DayTotal),
         DayTotalAdjusted = if_else(lag(AnyUpToMN, default = FALSE) == TRUE, DayTotalAdjusted - PastMNTotal, DayTotalAdjusted)) %>%
  mutate(ShiftLength = DayTotalAdjusted / 3600) %>%
  dplyr::select(daynum, ShiftLength) 

If you also want to have the shift start and end times here as well, you can join (e.g., add left_join(data2) in pipe above) with data2 by daynum (for example). You can assign a data frame based on the initial few lines of code above, but I'll repeat here just in case.

data2 <- data1 %>%
  pivot_longer(everything(), names_to = c("day", "event", "num"), values_to = "time", names_pattern = "(\\w+).(Start|End)(\\d)") %>%
  mutate(daynum = cumsum(day != lag(day, default = first(day))) + 1) %>%
  na_if(" ") %>%
  pivot_wider(id_cols = c(daynum, day), names_from = c(event, num), values_from = time)

Output

Joining, by = "daynum"
# A tibble: 70 x 7
   daynum ShiftLength day   Start_1  End_1    Start_2  End_2   
    <dbl>       <dbl> <chr> <fct>    <fct>    <fct>    <fct>   
 1      1        8.75 Mon   14:30:00 23:15:00 NA       NA      
 2      2        8.75 Tue   14:30:00 23:15:00 NA       NA      
 3      3        0    Wed   NA       NA       NA       NA      
 4      4        0    Thu   NA       NA       NA       NA      
 5      5       12    Fri   6:30:00  18:30:00 NA       NA      
 6      6       12    Sa    6:30:00  18:30:00 NA       NA      
 7      7       10.2  Sun   6:30:00  16:45:00 NA       NA      
 8      8        0    Mon   NA       NA       NA       NA      
 9      9        0    Tue   NA       NA       NA       NA      
10     10       10    Wed   7:00:00  17:00:00 NA       NA      
11     11        0    Thu   NA       NA       NA       NA      
12     12        0    Fri   NA       NA       NA       NA      
13     13       10    Sa    7:00:00  17:00:00 NA       NA      
14     14        0    Sun   NA       NA       NA       NA      
15     15        0    Mon   NA       NA       NA       NA      
16     16       10    Tue   14:00:00 24:00:00 NA       NA      
17     17        0    Wed   NA       NA       NA       NA      
18     18        0    Thu   NA       NA       NA       NA      
19     19        0    Fri   NA       NA       NA       NA      
20     20        0    Sa    NA       NA       NA       NA      
21     21        0    Sun   NA       NA       NA       NA      
22     22        9    Mon   7:00:00  16:00:00 NA       NA      
23     23        9.5  Tue   7:00:00  16:30:00 NA       NA      
24     24       10    Wed   7:00:00  17:00:00 NA       NA      
25     25        9.5  Thu   7:00:00  16:30:00 NA       NA      
26     26        9.75 Fri   7:00:00  16:45:00 NA       NA      
27     27        0    Sa    NA       NA       NA       NA      
28     28        0    Sun   NA       NA       NA       NA      
29     29        0    Mon   NA       NA       NA       NA      
30     30        0    Tue   NA       NA       NA       NA      
31     31        0    Wed   NA       NA       NA       NA      
32     32        0    Thu   NA       NA       NA       NA      
33     33        9.75 Fri   6:45:00  16:30:00 NA       NA      
34     34        8.5  Sa    14:45:00 23:15:00 NA       NA      
35     35        8.5  Sun   14:45:00 23:15:00 NA       NA      
36     36        9.5  Mon   22:30:00 24:00:00 NA       NA      
37     37        9.5  Tue   0:00:00  8:00:00  22:30:00 24:00:00
38     38        9.5  Wed   0:00:00  8:00:00  22:30:00 24:00:00
39     39        0    Thu   0:00:00  8:00:00  NA       NA      
40     40        0    Fri   NA       NA       NA       NA      
41     41        0    Sa    NA       NA       NA       NA      
42     42        0    Sun   NA       NA       NA       NA      
43     43        9    Mon   14:30:00 23:30:00 NA       NA      
44     44        0    Tue   NA       NA       NA       NA      
45     45        0    Wed   NA       NA       NA       NA      
46     46        9.5  Thu   14:30:00 24:00:00 NA       NA      
47     47        0    Fri   NA       NA       NA       NA      
48     48        0    Sa    NA       NA       NA       NA      
49     49        0    Sun   NA       NA       NA       NA      
50     50       10.5  Mon   7:00:00  17:30:00 NA       NA      
51     51        9.5  Tue   7:00:00  16:30:00 NA       NA      
52     52       10.5  Wed   7:00:00  17:30:00 NA       NA      
53     53        9.5  Thu   7:00:00  16:30:00 NA       NA      
54     54       10.5  Fri   7:00:00  17:30:00 NA       NA      
55     55        9.5  Sa    7:00:00  16:30:00 NA       NA      
56     56        0    Sun   NA       NA       NA       NA      
57     57        0    Mon   NA       NA       NA       NA      
58     58        9.5  Tue   7:00:00  16:30:00 NA       NA      
59     59        9.5  Wed   7:00:00  16:30:00 NA       NA      
60     60        9.5  Thu   7:00:00  16:30:00 NA       NA      
61     61        9.5  Fri   7:00:00  16:30:00 NA       NA      
62     62        0    Sa    NA       NA       NA       NA      
63     63        0    Sun   NA       NA       NA       NA      
64     64        0    Mon   NA       NA       NA       NA      
65     65        0    Tue   NA       NA       NA       NA      
66     66        0    Wed   NA       NA       NA       NA      
67     67        0    Thu   NA       NA       NA       NA      
68     68        0    Fri   NA       NA       NA       NA      
69     69       10.5  Sa    14:00:00 24:00:00 NA       NA      
70     70       10    Sun   0:00:00  0:30:00  14:00:00 24:00:00
Ben
  • 28,684
  • 5
  • 23
  • 45
  • Hi Ben, thank you so much for your help. I tried to understand it but in fact I couldn't. I added the identification number of each employee and also a date for each day to see if it gets easier but I couldn't figure out a way to when the shifts starts at 00:00 to count the start 2 and end2 time of the previous day. – Sunshine_student Mar 16 '20 at 10:24