1

The 1st dataframe contains drugs administered to patients and the start and stop dates of these. I need to take this dataframe and expand each drug entry per id so there is a row entry for each date administered, i.e. so it matches the format of the 2nd dataframe.

Dataframe 1

enter image description here

structure(list(id = c(1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 
1010002), drug = c("Acetaminophen", "Ampicillin", "Calcium Carbonate", 
"Cefalexin", "Cefazolin", "Cefotaxime", "Ceftazidime", "Dexamethasone", 
"Dextrose 5%/Sodium Chloride 0.9%/Potassium Chloride 20mmol/L", 
"Dimenhydrinate", "Dimenhydrinate", "Lactulose", "Magnesium Oxide", 
"Magnesium Oxide", "Metoclopramide", "Metoclopramide", "Morphine", 
"Morphine", "Morphine", "Mycophenolate Mofetil", "Nadolol", "Omeprazole", 
"Ondansetron", "Ondansetron", "Oxybutynin", "Oxycodone Immediate Release", 
"Prednisone", "Sirolimus", "Sirolimus", "Sirolimus", "Tacrolimus", 
"Tacrolimus", "Tacrolimus", "Vitamin D3"), start = structure(c(1247875200, 
1247702400, 1247702400, 1248652800, 1248912000, 1249948800, 1248739200, 
1247875200, 1247788800, 1247702400, 1247788800, 1248220800, 1247961600, 
1247702400, 1249862400, 1249430400, 1247788800, 1247961600, 1247961600, 
1247702400, 1247702400, 1247702400, 1247875200, 1249084800, 1247702400, 
1248134400, 1247788800, 1249603200, 1249862400, 1249430400, 1248652800, 
1247875200, 1247702400, 1247875200), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), stop = structure(c(1250035200, 1248825600, 
1249948800, 1249689600, 1248998400, 1250035200, 1248825600, 1248134400, 
1247875200, 1250121600, 1249084800, 1248307200, 1250121600, 1247961600, 
1250121600, 1249862400, 1247875200, 1248048000, 1248048000, 1250121600, 
1250121600, 1250121600, 1248998400, 1250121600, 1250121600, 1248998400, 
1250121600, 1249776000, 1250121600, 1249516800, 1249430400, 1249171200, 
1249171200, 1250121600), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
dose = c(1000, 1500, 200, 1000, 1000, 2000, 1000, 8, 150, 
50, 25, 10, 500, 210, 5, 10, 50, 4, 15, 500, 40, 20, 4, 8, 
7.5, 5, 10, 4, 3, 6, 3, 3.5, 4, 400), units = c("mg", "mg", 
"mg (ca++)", "mg", "mg", "mg", "mg", "mg", "ml/hr", "mg", 
"mg", "ml", "mg (mg++)", "mg (mg++)", "mg", "mg", "mg", "mg", 
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", 
"mg", "mg", "mg", "mg", "mg", "IU")), row.names = c(NA, -34L
), class = c("tbl_df", "tbl", "data.frame"))

Dataframe 2

enter image description here

structure(list(id = c(1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002
), drug = c("Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen"), start = structure(c(1247875200, 
1247961600, 1248048000, 1248134400, 1248220800, 1248307200, 1248393600, 
1248480000, 1248566400, 1248652800, 1248739200, 1248825600, 1248912000, 
1248998400, 1249084800, 1249171200, 1249257600, 1249344000, 1249430400, 
1249516800, 1249603200, 1249689600, 1249776000, 1249862400, 1249948800, 
1250035200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
dose = c(1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000), units = c("mg", 
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", 
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", 
"mg", "mg", "mg", "mg", "mg")), row.names = c(NA, -26L), class = c("tbl_df", 
"tbl", "data.frame"))
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
  • Possible duplicate of https://stackoverflow.com/questions/11494511/expand-ranges-defined-by-from-and-to-columns – Ronak Shah Sep 05 '19 at 03:28
  • 1
    That question is just looking at years. My question involves full dates where I need multiple rows for each date. –  Sep 05 '19 at 13:42

1 Answers1

0

We can use map2 from purrr to create a list column from each of the 'start' to 'stop' by '1 day' and then unnest the list

library(dplyr)
library(purrr)
library(tidyr)
df1 %>%
   mutate(start = map2(as.Date(start), as.Date(stop), ~ 
                    as.POSIXct(seq(.x, .y, by = '1 day')))) %>% 
   unnest(start) %>%
   select(-stop) 
akrun
  • 874,273
  • 37
  • 540
  • 662