1

I have a df with timesheet data and am looking for an easy way to summarize it. My data looks like df1 and I want to summarize it as df2. I am having a hard time devising a way to create the increments and allocate hours across them. The tricky part is allocating the hours that span across dates, ID 1 and 3, for example.

df1

ID    Garage  Unit_Name START_DATE_TIME  END_DATE_TIME
<chr> <chr>   <chr>     <dttm>           <dttm>
1     A       Truck     1/26/2015 21:00  1/27/2015 7:00
2     B       Truck     5/13/2015 6:00   5/13/2015 16:00
3     C       Car       8/21/2015 21:00  8/22/2015 7:00
6     C       Car       8/21/2015 11:00  8/21/2015 21:00


structure(list(ID = c("<chr>", "1", "2", "3", "6", NA, NA, NA, 
NA, NA, NA), Garage = c("<chr>", "A", "B", "C", "C", NA, NA, 
NA, NA, NA, NA), Unit_Name = c("<chr>", "Truck", "Truck", "Car", 
"Car", NA, NA, NA, NA, NA, NA), START_DATE_TIME = c("<dttm>", 
"1/26/2015 21:00", "5/13/2015 6:00", "8/21/2015 21:00", "8/21/2015 11:00", 
NA, NA, NA, NA, NA, NA), END_DATE_TIME = c("<dttm>", "1/27/2015 7:00", 
"5/13/2015 16:00", "8/22/2015 7:00", "8/21/2015 21:00", NA, NA, 
NA, NA, NA, NA)), .Names = c("ID", "Garage", "Unit_Name", "START_DATE_TIME", 
"END_DATE_TIME"), row.names = c(NA, -11L), class = c("tbl_df", 
"tbl", "data.frame"), spec = structure(list(cols = structure(list(
    ID = structure(list(), class = c("collector_character", "collector"
    )), Garage = structure(list(), class = c("collector_character", 
    "collector")), Unit_Name = structure(list(), class = c("collector_character", 
    "collector")), START_DATE_TIME = structure(list(), class = c("collector_character", 
    "collector")), END_DATE_TIME = structure(list(), class = c("collector_character", 
    "collector"))), .Names = c("ID", "Garage", "Unit_Name", "START_DATE_TIME", 
"END_DATE_TIME")), default = structure(list(), class = c("collector_guess", 
"collector"))), .Names = c("cols", "default"), class = "col_spec"))

df2

Garage  Unit_Name   Date        Increment    Hours
<chr>   <chr>       <dttm>      <chr>        <dbl>
A       Truck       1/26/2015   18:01-00:00   3
A       Truck       1/27/2015   00:01-6:00    6
A       Truck       1/27/2015   6:01-12:00    1
B       Truck       5/13/2015   6:01-12:00    6
B       Truck       5/13/2015   12:01-18:00   4
C       Car         8/21/2015   6:01-12:00    1
C       Car         8/21/2015   12:01-18:00   6
C       Car         8/21/2015   18:01-00:00   6
C       Car         8/22/2015   00:01-6:00    6
C       Car         8/23/2015   6:01-12:00    1 
samuelt
  • 215
  • 1
  • 2
  • 10

1 Answers1

1
library(tidyverse)
library(lubridate)
times=c("00:00","06:00","12:00","18:00")
times1=c("00:01","06:01","12:01","18:01")
df1%>%
  group_by(Garage,Unit_Name)%>%
  mutate(size=n())%>%
  summarise(START_DATE_TIME=min(START_DATE_TIME),
            END_DATE_TIME=max(END_DATE_TIME))%>%
  mutate(S=mdy_hm(START_DATE_TIME),
         b=floor(hour(S)/24*4)+1,
         m=ymd_hm(paste(format(S,"%F"),get("times",.GlobalEnv)[b])),
         n=ymd_hm(paste(format(S,"%F"),get("times",.GlobalEnv)[(b+1)%%4%>%replace(.,.==0,4)]))%>%
           if_else(m>.,.+days(1),.),
         rem=as.numeric(mdy_hm(END_DATE_TIME)-n),
         HOURS=list(as.numeric(c(n-S,rep(6,rem%/%6),rem%%6))))%>%
  unnest()%>%
  mutate(Date=S+hours(cumsum(lag(HOURS,default = 0))),
         b=floor(hour(Date)/24*4)+1,
         increment=paste0(get("times1",.GlobalEnv)[b],"-",
                           get("times",.GlobalEnv)[replace(d<-(b+1)%%4,d==0,4)]),
         Date=as.Date(Date))%>%
  select(Garage,Date,HOURS,increment)

Groups:   Garage [3]
   Garage Date       HOURS increment 
   <chr>  <date>     <dbl> <chr>      
 1 A      2015-01-26    3. 18:01-00:00
 2 A      2015-01-27    6. 00:01-06:00
 3 A      2015-01-27    1. 06:01-12:00
 4 B      2015-05-13    6. 06:01-12:00
 5 B      2015-05-13    4. 12:01-18:00
 6 C      2015-08-21    1. 06:01-12:00
 7 C      2015-08-21    6. 12:01-18:00
 8 C      2015-08-21    6. 18:01-00:00
 9 C      2015-08-22    6. 00:01-06:00
10 C      2015-08-22    1. 06:01-12:00
marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
Onyambu
  • 67,392
  • 3
  • 24
  • 53
  • thanks for the approach! when I run the code I get an error: "Error in mutate_impl(.data, dots) : Evaluation error: invalid 'times' argument." I think this is being thrown at the if_else statement. Any advice on how to handle? – samuelt Jun 13 '18 at 16:18
  • @samuelt can you try running the code upto m? I mean close the paranthesis at m and run the upper chunck of the code.. and see whether it runs.. because in my end it does run.. with no errors – Onyambu Jun 13 '18 at 16:24
  • the parenthesis should be before the `,` at the end of the m line – Onyambu Jun 13 '18 at 16:35
  • did as you directed and all pieces of the first mutate function run well. It's the if-else statement that throws the error – samuelt Jun 13 '18 at 16:38
  • now try running until the end of the mutate.. without the unnest – Onyambu Jun 13 '18 at 16:45
  • the error is being thrown before unnest at the if_else, so taking out unnest does not make a difference – samuelt Jun 13 '18 at 16:51
  • Let us [continue this discussion in chat](https://chat.stackoverflow.com/rooms/173091/discussion-between-samuelt-and-onyambu). – samuelt Jun 13 '18 at 16:54
  • @samuelt now run including the unnest – Onyambu Jun 13 '18 at 16:55
  • @samuelt great so everything runs? – Onyambu Jun 13 '18 at 17:31
  • no, the if_else is throwing the error. still not sure what the issue is. There are no instances where end date is before start, so that is not it – samuelt Jun 13 '18 at 17:41
  • @samuelt I thought you said that it did run.. also the ifelse is not checking the end dates... its cheking the interval.. since I am using time, and there is no class time in base r, I have to do that.. for example if m = "18:00" and n="00:00" then n must be a day ahead.. so the if_else has no problem... – Onyambu Jun 13 '18 at 17:50
  • @samuelt you said that the unnest does run... until unnest, do you get the correct result for hours? – Onyambu Jun 13 '18 at 17:52
  • it runs up to if_else. if i run through if_else, ending at that function I get: Error in if_else(m > ., . + days(1), .) : object 'm' not found – samuelt Jun 13 '18 at 18:00
  • @ that shows you do not have m, not a problem with the data... I can help you with that contact zoom id 7878006726 – Onyambu Jun 13 '18 at 18:04