Ive been trying to clean a and split a data set that contains a timeseries of temperature data. Using R. I've already been able to take this data frame and split it based on other discrete conditions. I am now trying to split it everytime the temperature profile dips near 100 F. This would basically signal the start of the new data frame. I've tried splitting based on time intervals but not all smaller data frames/process last the same. If anyone has an idea on how to tackle this it would be extemele appreciated.
Here is a fragment of the data set. It is quite a bit but its by far the minimum for it to be representative:
structure(list(timestamp = structure(c(1416213315, 1416213317,
1416213339, 1416213345, 1416213356, 1416213358, 1416213361, 1416213365,
1416213384, 1416213386, 1416213427, 1416213439, 1416213441, 1416213478,
1416213490, 1416213494, 1416213524, 1416213557, 1416213560, 1416213595,
1416213598, 1416213600, 1416213618, 1416213624, 1416213635, 1416213645,
1416213663, 1416213665, 1416213689, 1416213707, 1416213708, 1416213713,
1416213715, 1416213719, 1416213729, 1416213732, 1416213742, 1416213744,
1416213749, 1416213752, 1416213753, 1416213754, 1416213756, 1416213757,
1416213759, 1416213763, 1416213765, 1416213767, 1416213773, 1416213775,
1416213777, 1416213784, 1416213788, 1416213790, 1416213792, 1416213794,
1416213796, 1416213798, 1416213802, 1416213808, 1416213812, 1416213814,
1416213816, 1416213818, 1416213828, 1416213833, 1416213836, 1416213846,
1416213847, 1416213849, 1416213854, 1416213856, 1416213858, 1416213863,
1416213871, 1416213873, 1416213876, 1416213878, 1416213880, 1416213892,
1416213893, 1416213896, 1416213898, 1416213906, 1416213910, 1416213912,
1416213914, 1416213916, 1416213917, 1416213921, 1416213925, 1416213927,
1416213928, 1416213935, 1416213936, 1416213939, 1416213944, 1416213947,
1416213954, 1416213956), class = c("POSIXct", "POSIXt"), tzone = "CST6CDT"),
averagetemp = c(162.6, 162.6, 162.6, 162.6, 162.6, 162.6,
162.6, 155.5, 155.5, 152.8, 152.8, 147.1, 147.1, 147.1, 147.1,
142.4, 142.4, 139.4, 139.4, 139.4, 139.4, 136.4, 136.4, 135.3,
135.3, 135.3, 133, 133, 133, 131.5, 131.5, 124.8, 123.8,
123.8, 120.3, 120.3, 118.8, 118.7, 118.7, 145.5, 145.1, 143.9,
143.9, 143.9, 134.6, 147.1, 147.1, 147.1, 139.2, 139.2, 144.7,
146.4, 135.9, 135.9, 135.9, 137.2, 140.7, 147.2, 137.1, 137.1,
137.1, 149.3, 148.2, 148.2, 156.8, 151.4, 150, 150, 158.6,
158.6, 162.2, 162.1, 162.1, 160.9, 167.7, 164.5, 162.9, 162.9,
171, 165.6, 165.6, 164.1, 164.1, 164.1, 166.3, 166.3, 165.7,
164.7, 164.5, 164.5, 164.5, 167.3, 167.3, 167.9, 168, 168,
168, 166.2, 166.2, 167.4), fronttemp = c(163.2, 163.2, 159.2,
159.2, 159.2, 157.5, 157.5, 157.5, 157.5, 157.5, 157.5, 157.5,
157.5, 150.8, 150.8, 150.8, 150.8, 150.8, 150.8, 146.4, 146.4,
146.4, 146.4, 146.4, 145.6, 145.6, 145.6, 145.6, 144.9, 144.9,
144.9, 144.9, 144.9, 143.9, 143.9, 143.9, 143.9, 143.9, 144.7,
144.7, 153.3, 155.5, 157.2, 157.2, 157.2, 157.2, 157.2, 130.7,
155.8, 159.9, 159.3, 159.3, 159.3, 174, 172.4, 172.4, 172.4,
181.1, 181.1, 181.1, 173, 174.2, 170.3, 172.1, 172.1, 166.5,
160, 167, 167, 159.6, 162, 165.9, 167.2, 160.3, 160.3, 163.2,
162.5, 162.5, 171.4, 164.7, 164.7, 164.3, 164.3, 164.3, 164.3,
170.7, 170.7, 169.1, 169.1, 167.2, 167.2, 167.2, 170.3, 171,
170.6, 170.6, 170.6, 170.6, 167, 167), recipe = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("a",
"b", "c", "d", "e", "f",
"g", "h", "i", "j"), class = "factor"),
recipenumner = c(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7), dischargetemp = c(166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5,
166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5, 166.5),
backtemp = c(163.4, 161.8, 161.8, 158.1, 155.8, 155.8, 155,
155, 151, 151, 144.8, 144.8, 143.1, 143.1, 137.5, 137.5,
135.7, 135.7, 133.5, 133.5, 129.9, 129.9, 129, 129, 129,
126.1, 126.1, 124.7, 124.7, 124.7, 123, 111.9, 110.4, 110.4,
110.4, 103.7, 102.1, 101.9, 101.9, 141.1, 139.7, 136.2, 136.2,
130.1, 130.1, 130.1, 150.6, 150.6, 127.8, 128.9, 135, 135,
113.5, 113.5, 113.5, 113.7, 119.6, 119.6, 110.9, 126, 126,
132.7, 133.5, 133.5, 146.6, 141.3, 143.4, 143.4, 154.6, 154.6,
162.4, 159.5, 159.5, 159.5, 169.9, 165.4, 163.2, 171.4, 171.4,
171.4, 165.9, 164, 164.3, 162.3, 162.3, 162.3, 163.1, 161.7,
161.7, 161.7, 165.6, 165.6, 161.4, 165.9, 165.9, 166.6, 167.3,
165.5, 165.5, 167.7)), .Names = c("timestamp", "averagetemp",
"fronttemp", "recipe", "recipenumner", "dischargetemp", "backtemp"
), row.names = 168229:168328, class = "data.frame")
This data frame comes from a bigger data frame I was able to split by label names. See code here:
# Step 1: Import the data set
cooker <- read.delim(file.choose(), header=TRUE, "\t")#Choose the tab delimited fileof your choosing
attach(cooker)
cooker$timestamp <- as.POSIXct(timestamp, tz="CST6CDT", "%Y-%m-%d %H:%M:%S")#convert timestamp to POSIXlt to enable time calculations
# Step 2: Subset the data based on recipe/label
cooker_split <- split(cooker, recipe)#split based on recipe
new_names <- names(cooker_split)#created new names based on the names of split
for (i in 1:length(cooker_split)) { #for loop to assign names and creatre individual data frames
assign(new_names[i], cooker_split[[i]])
}
After I uploaded the data and had it subset into recipe/label names I tried to create subsets based on time intervals like this:
time_interval <- cut(df$timestamp, breaks = "10 sec")
new_df <- cbind(df, time_interval)
batches <- split(new_df, new_df$time_interval)
Issue here is that given the complete data set, the increase and decrease in temp does not last 10 secs, this is only an approximate.
What I want to be able to do, if at all possible is to have R determine that when the temperature has reached 100 or close to 100 F on the variable backtemp, or x condition for backtemp has been met to subset and keep going to subset the next section until the condition canot be met. I sounds like a for-if loop but I cannot even begin to think how it would do that in R. Any help would be appreciated.
Thanks