1

I have two dataframes. One contains the tenure of managers who appointed the staff, and another contains the details of the staff's tenure. I want to create a new column in the staff table to obtain the manager's name who appointed her.

staff_df <- dput(structure(list(employee_name = c("alex", "timor", "tai", "lil", 
"mae", "jae"), joined = c("26-Jan-50", "23-Sep-60", "5-Mar-61", 
"8-Sep-72", "8-Sep-82", "9-Mar-93"), retired = c("18-Sep-51", 
"24-Jan-63", "8-Jun-66", "5-Nov-74", "6-Jun-89", "26-Jul-98")), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame")))


manager_df <- dput(structure(list(manager_name = c("john", "doe", "mary", "jane"
), date_became_manager = c("26-Jan-50", "27-Aug-50", "8-Nov-68", 
"5-Jan-84"), manager_till = c("26-Aug-50", "7-Nov-68", "4-Jan-84", 
"23-Dec-01")), row.names = c(NA, -4L), class = c("tbl_df", "tbl", 
"data.frame")))

I tried the following:

staff_df$appointed_by = NA

staff_df$appointed_by <- manager_df$manager_name[which(staff_df$joined > manager_df$date_became_manager &  staff_df$joined <= manager_df$manager_till)]'''


Effectively, I need the new column in the staff_df called appointed_by and obtain the name of the manager who appointed the personnel. ie., Alex was appointed by John, Timor was appointed by doe, Tai was appointed by Doe (...) Jae was appointed by Jane. 

EDIT: dput()
  • can you `dput()` the dataframes in your post? – Eric Jun 16 '21 at 11:46
  • also, did you look at [this post](https://stackoverflow.com/questions/23934361/merge-2-dataframes-if-value-within-range). I've found the `sqldf` based solution most useful. – Eric Jun 16 '21 at 11:51

2 Answers2

1
    library(lubridate)
    library(dplyr)
    
    staff_df  <- staff_df %>%
      mutate(
        joined = dmy(joined),
        retired = dmy(retired)
      )
    
    manager_df <- manager_df %>%
      mutate(
        date_became_manager = dmy(date_became_manager),
        manager_till = dmy(manager_till)
      )
    
    library(sqldf)
    res <- sqldf("SELECT * 
           FROM staff_df
           INNER JOIN manager_df ON 
           staff_df.joined>=manager_df.date_became_manager AND
           staff_df.joined<manager_till")

> res
  employee_name     joined    retired manager_name date_became_manager manager_till
1          alex 2050-01-26 2051-09-18         john          2050-01-26   2050-08-26
2         timor 2060-09-23 2063-01-24          doe          2050-08-27   2068-11-07
3           tai 2061-03-05 2066-06-08          doe          2050-08-27   2068-11-07
4           jae 1993-03-09 1998-07-26         jane          1984-01-05   2001-12-23
Eric
  • 1,381
  • 9
  • 24
1

I propose this tidyverse strategy. However, in the dput given, the dates are in character format, so I changed them in dates type first.

staff_df <- dput(structure(list(employee_name = c("alex", "timor", "tai", "lil", 
                                                  "mae", "jae"), joined = c("26-Jan-50", "23-Sep-60", "5-Mar-61", 
                                                                            "8-Sep-72", "8-Sep-82", "9-Mar-93"), retired = c("18-Sep-51", 
                                                                                                                             "24-Jan-63", "8-Jun-66", "5-Nov-74", "6-Jun-89", "26-Jul-98")), row.names = c(NA, 
                                                                                                                                                                                                           -6L), class = c("tbl_df", "tbl", "data.frame")))
#> structure(list(employee_name = c("alex", "timor", "tai", "lil", 
#> "mae", "jae"), joined = c("26-Jan-50", "23-Sep-60", "5-Mar-61", 
#> "8-Sep-72", "8-Sep-82", "9-Mar-93"), retired = c("18-Sep-51", 
#> "24-Jan-63", "8-Jun-66", "5-Nov-74", "6-Jun-89", "26-Jul-98")), row.names = c(NA, 
#> -6L), class = c("tbl_df", "tbl", "data.frame"))


manager_df <- dput(structure(list(manager_name = c("john", "doe", "mary", "jane"
), date_became_manager = c("26-Jan-50", "27-Aug-50", "8-Nov-68", 
                           "5-Jan-84"), manager_till = c("26-Aug-50", "7-Nov-68", "4-Jan-84", 
                                                         "23-Dec-01")), row.names = c(NA, -4L), class = c("tbl_df", "tbl", 
                                                                                                          "data.frame")))
#> structure(list(manager_name = c("john", "doe", "mary", "jane"
#> ), date_became_manager = c("26-Jan-50", "27-Aug-50", "8-Nov-68", 
#> "5-Jan-84"), manager_till = c("26-Aug-50", "7-Nov-68", "4-Jan-84", 
#> "23-Dec-01")), row.names = c(NA, -4L), class = c("tbl_df", "tbl", 
#> "data.frame"))

#change data_type
staff_df$joined <- as.Date(staff_df$joined, '%d-%b-%y')
staff_df$retired <- as.Date(staff_df$retired, '%d-%b-%y')
manager_df$date_became_manager <- as.Date(manager_df$date_became_manager, '%d-%b-%y')
manager_df$manager_till <- as.Date(manager_df$manager_till, '%d-%b-%y')

library(tidyverse)

staff_df %>%
  mutate(manager_app = map_chr(joined, ~{xx <- manager_df$manager_name[manager_df$date_became_manager <= .x & 
                                                                        manager_df$manager_till >= .x]; 
                                           if (length(xx) == 0) NA else xx})) %>% 
  filter(!is.na(manager_app)) %>%
  left_join(manager_df, by = c('manager_app' = 'manager_name'))

#> # A tibble: 4 x 6
#>   employee_name joined     retired    manager_app date_became_mana~ manager_till
#>   <chr>         <date>     <date>     <chr>       <date>            <date>      
#> 1 alex          2050-01-26 2051-09-18 john        2050-01-26        2050-08-26  
#> 2 timor         2060-09-23 2063-01-24 doe         2050-08-27        2068-11-07  
#> 3 tai           2061-03-05 2066-06-08 doe         2050-08-27        2068-11-07  
#> 4 jae           1993-03-09 1998-07-26 jane        1984-01-05        2001-12-23

Even the filter step is also optional

staff_df %>%
  mutate(manager_app = map_chr(joined, ~{xx <- manager_df$manager_name[manager_df$date_became_manager <= .x & 
                                                                        manager_df$manager_till >= .x]; 
                                           if (length(xx) == 0) NA else xx})) %>% 
  left_join(manager_df, by = c('manager_app' = 'manager_name'))

# A tibble: 6 x 6
  employee_name joined     retired    manager_app date_became_manager manager_till
  <chr>         <date>     <date>     <chr>       <date>              <date>      
1 alex          2050-01-26 2051-09-18 john        2050-01-26          2050-08-26  
2 timor         2060-09-23 2063-01-24 doe         2050-08-27          2068-11-07  
3 tai           2061-03-05 2066-06-08 doe         2050-08-27          2068-11-07  
4 lil           1972-09-08 1974-11-05 NA          NA                  NA          
5 mae           1982-09-08 1989-06-06 NA          NA                  NA          
6 jae           1993-03-09 1998-07-26 jane        1984-01-05          2001-12-23
AnilGoyal
  • 25,297
  • 4
  • 27
  • 45