8

I have a data.frame of surfaces touched over time. I would like to simply append a duplicate of the last row for each AcvitivityID:

head(movsdf.rbind)
  ActivityID CareType HCWType Orientation    Surface       Date     Time       Dev.Date.Time SurfaceCategories
1         01       IV    RN01  leftFacing AlcOutside 2019-08-03 11:08:01 2019-08-03 11:08:01       HygieneArea
2         01       IV    RN01  leftFacing         In 2019-08-03 11:08:12 2019-08-03 11:08:12                In
3         01       IV    RN01  leftFacing       Door 2019-08-03 11:08:12 2019-08-03 11:08:12        FarPatient
4         02       IV    RN01  leftFacing       Door 2019-08-03 11:08:18 2019-08-03 11:08:18        FarPatient
5         02       IV    RN01  leftFacing      Other 2019-08-03 11:08:22 2019-08-03 11:08:22        FarPatient
6         03       IV    RN01  leftFacing      Table 2019-08-03 11:10:26 2019-08-03 11:10:26       NearPatient

Example data:

movsdf.rbind<-data.frame(ActivityID=rep(1:4, each=10),Surface=rep(c("In","Table","Out"),each=10))

So I can get this to work from here :

repeatss <- aggregate(movsdf.rbind, by=list(movsdf.rbind$ActivityID), FUN = function(x) { last = tail(x,1) })

movsdf.rbind <-rbind(movsdf.rbind, repeatss)

This does the trick but it looks clunky and then the data is not in order (not that it really matters but I feel something more elegant might exist in dplyr or data.table). Any thoughts?

Jaap
  • 81,064
  • 34
  • 182
  • 193
HCAI
  • 2,213
  • 8
  • 33
  • 65

5 Answers5

7

Another alternative using slice:

library(dplyr)

DF %>% 
  group_by(ActivityID) %>% 
  slice(c(1:n(),n()))

which gives:

# A tibble: 9 x 9
# Groups:   ActivityID [3]
  ActivityID CareType HCWType Orientation Surface    Date      Time     Dev.Date.Time     SurfaceCategori~
       <int> <chr>    <chr>   <chr>       <chr>      <chr>     <chr>    <chr>             <chr>           
1          1 IV       RN01    leftFacing  AlcOutside 2019-08-~ 11:08:01 2019-08-03 11:08~ HygieneArea     
2          1 IV       RN01    leftFacing  In         2019-08-~ 11:08:12 2019-08-03 11:08~ In              
3          1 IV       RN01    leftFacing  Door       2019-08-~ 11:08:12 2019-08-03 11:08~ FarPatient      
4          1 IV       RN01    leftFacing  Door       2019-08-~ 11:08:12 2019-08-03 11:08~ FarPatient      
5          2 IV       RN01    leftFacing  Door       2019-08-~ 11:08:18 2019-08-03 11:08~ FarPatient      
6          2 IV       RN01    leftFacing  Other      2019-08-~ 11:08:22 2019-08-03 11:08~ FarPatient      
7          2 IV       RN01    leftFacing  Other      2019-08-~ 11:08:22 2019-08-03 11:08~ FarPatient      
8          3 IV       RN01    leftFacing  Table      2019-08-~ 11:10:26 2019-08-03 11:10~ NearPatient     
9          3 IV       RN01    leftFacing  Table      2019-08-~ 11:10:26 2019-08-03 11:10~ NearPatient

Two base R alternatives:

# one
lastrows <- cumsum(aggregate(CareType ~ ActivityID, DF, length)[[2]])
DF[sort(c(seq(nrow(DF)), lastrows)),]

# two
idx <- unlist(tapply(1:nrow(DF), DF$ActivityID, FUN = function(x) c(x, tail(x, 1))))
DF[idx,]

which both give the same result.


Two alternatives:

library(data.table)
setDT(DF)          # convert 'DF' to a data.table

# one
DF[DF[, .I[c(1:.N,.N)], by = ActivityID]$V1]

# two
DF[, .SD[c(1:.N,.N)], by = ActivityID]

Used data:

DF <- structure(list(ActivityID = c(1L, 1L, 1L, 2L, 2L, 3L),
                     CareType = c("IV", "IV", "IV", "IV", "IV", "IV"),
                     HCWType = c("RN01", "RN01", "RN01", "RN01", "RN01", "RN01"),
                     Orientation = c("leftFacing", "leftFacing", "leftFacing", "leftFacing", "leftFacing", "leftFacing"),
                     Surface = c("AlcOutside", "In", "Door", "Door", "Other", "Table"),
                     Date = c("2019-08-03", "2019-08-03", "2019-08-03", "2019-08-03", "2019-08-03", "2019-08-03"),
                     Time = c("11:08:01", "11:08:12", "11:08:12", "11:08:18", "11:08:22", "11:10:26"),
                     Dev.Date.Time = c("2019-08-03 11:08:01", "2019-08-03 11:08:12", "2019-08-03 11:08:12", "2019-08-03 11:08:18", "2019-08-03 11:08:22", "2019-08-03 11:10:26"),
                     SurfaceCategories = c("HygieneArea", "In", "FarPatient", "FarPatient", "FarPatient", "NearPatient")),
                class = "data.frame", row.names = c(NA, -6L))
Jaap
  • 81,064
  • 34
  • 182
  • 193
3

One dplyr and tidyr possibility could be (using sample data from @Jaap):

DF %>%
 group_by(ActivityID) %>%
 uncount((row_number() == max(row_number())) + 1)

  ActivityID CareType HCWType Orientation Surface   Date     Time   Dev.Date.Time   SurfaceCategori…
       <int> <chr>    <chr>   <chr>       <chr>     <chr>    <chr>  <chr>           <chr>           
1          1 IV       RN01    leftFacing  AlcOutsi… 2019-08… 11:08… 2019-08-03 11:… HygieneArea     
2          1 IV       RN01    leftFacing  In        2019-08… 11:08… 2019-08-03 11:… In              
3          1 IV       RN01    leftFacing  Door      2019-08… 11:08… 2019-08-03 11:… FarPatient      
4          1 IV       RN01    leftFacing  Door      2019-08… 11:08… 2019-08-03 11:… FarPatient      
5          2 IV       RN01    leftFacing  Door      2019-08… 11:08… 2019-08-03 11:… FarPatient      
6          2 IV       RN01    leftFacing  Other     2019-08… 11:08… 2019-08-03 11:… FarPatient      
7          2 IV       RN01    leftFacing  Other     2019-08… 11:08… 2019-08-03 11:… FarPatient      
8          3 IV       RN01    leftFacing  Table     2019-08… 11:10… 2019-08-03 11:… NearPatient     
9          3 IV       RN01    leftFacing  Table     2019-08… 11:10… 2019-08-03 11:… NearPatient 

Or:

DF %>%
 group_by(ActivityID) %>%
 uncount((row_number() == n()) + 1)
tmfmnk
  • 38,881
  • 4
  • 47
  • 67
  • Thank you for your help. I would like to accept Jaap's for the extra code snippets. – HCAI Aug 20 '19 at 13:51
3

If we want to repeat only last row for each group it is enough to know last row number for each group. We can duplicated with fromLast argument as TRUE to get those row numbers and then add them with current rows. Using @Jaap's data

DF[sort(c(seq_len(nrow(DF)), which(!duplicated(DF$ActivityID, fromLast = TRUE)))),]


#    ActivityID CareType HCWType Orientation    Surface       Date     Time       Dev.Date.Time SurfaceCategories
#1            1       IV    RN01  leftFacing AlcOutside 2019-08-03 11:08:01 2019-08-03 11:08:01       HygieneArea
#2            1       IV    RN01  leftFacing         In 2019-08-03 11:08:12 2019-08-03 11:08:12                In
#3            1       IV    RN01  leftFacing       Door 2019-08-03 11:08:12 2019-08-03 11:08:12        FarPatient
#3.1          1       IV    RN01  leftFacing       Door 2019-08-03 11:08:12 2019-08-03 11:08:12        FarPatient
#4            2       IV    RN01  leftFacing       Door 2019-08-03 11:08:18 2019-08-03 11:08:18        FarPatient
#5            2       IV    RN01  leftFacing      Other 2019-08-03 11:08:22 2019-08-03 11:08:22        FarPatient
#5.1          2       IV    RN01  leftFacing      Other 2019-08-03 11:08:22 2019-08-03 11:08:22        FarPatient
#6            3       IV    RN01  leftFacing      Table 2019-08-03 11:10:26 2019-08-03 11:10:26       NearPatient
#6.1          3       IV    RN01  leftFacing      Table 2019-08-03 11:10:26 2019-08-03 11:10:26       NearPatient
Ronak Shah
  • 377,200
  • 20
  • 156
  • 213
2

Here is a base R solution.

result <- lapply(split(movsdf.rbind, movsdf.rbind$ActivityID), function(DF){
  rbind(DF, DF[nrow(DF), ])
})
result <- do.call(rbind, result)

result
#     ActivityID value
#1.1           1     1
#1.2           1     2
#1.3           1     3
#1.31          1     3
#2.4           2     4
#2.5           2     5
#2.6           2     6
#2.61          2     6
#3.7           3     7
#3.8           3     8
#3.9           3     9
#3.91          3     9

If the new row numbers are ugly, they can be made consecutive with

row.names(result) <- NULL

Data creation code.

movsdf.rbind <- data.frame(ActivityID = rep(1:3, each = 3),
                           value = 1:9)
Rui Barradas
  • 70,273
  • 8
  • 34
  • 66
  • Thank you for your help. I would like to accept Jaap's for the extra code snippets. – HCAI Aug 20 '19 at 13:51
1

We can split then map to bind_rowsing each data frame with its last row

library(dplyr)
library(purrr)
movsdf.rbind %>% 
      split(.$ActivityID) %>% 
      #Use map if you need the output as a list
      #instead of filter you can try slice(.x, n()) 
      map_df(~bind_rows(.x, filter(.x, row_number()==n())))
A. Suliman
  • 12,923
  • 5
  • 24
  • 37
  • Thank you for your help. I would like to accept Jaap's for the extra code snippets. – HCAI Aug 20 '19 at 13:51