1

Here is my dataframe:

sensors_data <- structure(list(timestamp = c(164424852623558, 164424852623558, 
164424855959288, 164424855959288, 164424857931288, 164424857931288, 
164424859915288, 164424859915288, 164424861778558, 164424861903788
), acc_x = c(5.4230323, 5.4230323, 5.4086666, 5.4086666, 5.4086666, 
5.4086666, 5.3895125, 5.3895125, 5.3895125, 5.3895125), acc_y = c(7.9681463, 
7.9681463, 7.9777236, 7.9777236, 7.9777236, 7.9777236, 7.987301, 
7.987301, 7.987301, 7.987301), acc_z = c(2.6097596, 2.6097596, 
2.6337023, 2.6337023, 2.6337023, 2.6337023, 2.6528566, 2.6528566, 
2.6528566, 2.6528566), grav_x = c(-1, -1, -1, -1, -1, -1, -1, 
-1, 5.82316, 5.82316), grav_y = c(-1, -1, -1, -1, -1, -1, -1, 
-1, 7.7164946, 7.7164946), grav_z = c(-1, -1, -1, -1, -1, -1, 
-1, -1, 1.6482342, 1.6482342), gyro_x = c(-1, -0.3230286, -0.3230286, 
-0.3230286, -0.3230286, -0.3230286, -0.3230286, -0.3230286, -0.3230286, 
-0.13915816), gyro_y = c(-1, 0.77723867, 0.77723867, 0.77723867, 
0.77723867, 0.77723867, 0.77723867, 0.77723867, 0.77723867, 1.0466303
), gyro_z = c(-1, -0.365287, -0.365287, -0.365287, -0.365287, 
-0.365287, -0.365287, -0.365287, -0.365287, -0.57420295), press_id = c(-1, 
-1, -1, -1, -1, -1, -1, -1, -1, -1)), .Names = c("timestamp", 
"acc_x", "acc_y", "acc_z", "grav_x", "grav_y", "grav_z", "gyro_x", 
"gyro_y", "gyro_z", "press_id"), row.names = c(NA, -10L), class = c("tbl_df", 
"tbl", "data.frame"))

I want to mutate/change the press_id to specific value according other dataframe:

pin_press_time_range <- structure(list(press_id = 1:4, start_time = c(164429106370978, 
164429411618824, 164429837271939, 164430399454284), end_time = c(164429182443824, 
164429512525747, 164429903243169, 164430465927554)), class = c("tbl_df", 
"tbl", "data.frame"), .Names = c("press_id", "start_time", "end_time"
), row.names = c(NA, -4L))

Meaning that for all time stamps that fall between the start and end times for press_id 1, 2, ... mark it according to the press id.

I am trying to do this with mutate_if or mutate_at but without luck.

That is how I would do this manually, but I need it automatic:

sensors_data %>% filter(timestamp >= pin_press_time_range[1,]$start_time & 
           timestamp <= pin_press_time_range[1,]$end_time) %>%
  mutate(press_id = pin_press_time_range[1,]$press_id)

Please advise.

Miha
  • 2,559
  • 2
  • 19
  • 34
SteveS
  • 3,789
  • 5
  • 30
  • 64
  • 2
    Perhaps not the best reproducible example since no `timestamp`'s are actually between `start_time` and `end_time`. – missuse Jul 31 '18 at 12:32
  • @missuse I will try to edit so you'll have them, a bit long but give me a sec. – SteveS Jul 31 '18 at 12:34
  • Possible duplicate of [Find which interval row in a data frame that each element of a vector belongs in](https://stackoverflow.com/questions/41132081/find-which-interval-row-in-a-data-frame-that-each-element-of-a-vector-belongs-in) – missuse Jul 31 '18 at 12:42
  • @steves Still no representative sample data... – Maurits Evers Jul 31 '18 at 12:50

3 Answers3

2

Using sqldf

library(sqldf)
sqldf("select a.*, b.*
      from sensors_data a
          left join pin_press_time_range b
           on a.timestamp >= b.start_time 
           AND a.timestamp <= b.end_time")
A. Suliman
  • 12,923
  • 5
  • 24
  • 37
2

using fuzzyjoin

library(fuzzyjoin)    

sensors_data %>%
  fuzzy_left_join(pin_press_time_range,
                  by = c("timestamp" = "start_time", "timestamp" = "end_time"), 
                  match_fun = list(`>=`, `<=`))
missuse
  • 19,056
  • 3
  • 25
  • 47
2

Using data.table::foverlaps

library(data.table)
setDT(sensors_data)[, `:=`(start_time = timestamp, end_time = timestamp)]
setDT(pin_press_time_range)

setkey(pin_press_time_range, start_time, end_time)
dt <- foverlaps(sensors_data, pin_press_time_range)

Explanation: foverlaps performs an overlap-join, based on the intervals from two data.tables; as foverlaps requires a start and end point, we choose timestamp as the start and end point for sensors_data. We then left overlap-join sensors_data and pin_press_time_range based on the two common keys start_time and end_time.

Maurits Evers
  • 49,617
  • 4
  • 47
  • 68