I have datetimes that samples were taken in one dataframe:
samples <- structure(list(EC = c(2.31, 1.225, 1.749, 1.268, 1.904, 2.09,
2.08, 2.08, 2.17, 2.23, 2.29, 2.29, 2.38, 2.44, 2.33, 2.35, 2.23,
2.2, 2.2, 2.3, 2.37, 2.33, 1.301, 1.292, 1.471, 1.888, 1.977,
2.05, 2.13, 2.21, 2.3, 2.34, 2.41, 2.43, 2.48, 2.5), TP = c(25.5,
148, 121, 65.1, 68, 104, 175, 80.7, 75, 68.7, 26.7, 59.6, 114,
39.1, 27.8, 26.8, 34.2, 136, 47.3, 52.7, 30.4, 37.7, 504, 426,
225, 61.2, 133, 167, 109, 58, 46.6, 34.6, 37.2, 27.9, 47.5, 21.8
), SRP = c(14.3, 24, 32.7, 22, 22.5, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), DateTime = structure(c(1615287900,
1615332900, 1615376100, 1615697700, 1615998900, 1616060700, 1616147100,
1616319900, 1616406300, 1616492700, 1616579100, 1616607600, 1616694000,
1616780400, 1616851800, 1616951700, 1617033600, 1617076500, 1617120000,
1617162900, 1617206400, 1617249300, 1617274500, 1617275100, 1617292800,
1617379200, 1617422400, 1617465600, 1617508800, 1617552000, 1617595200,
1617638400, 1617681600, 1617724800, 1617768000, 1617809400), tzone = "UTC", class = c("POSIXct",
"POSIXt"))), row.names = c(NA, -36L), class = c("tbl_df", "tbl",
"data.frame"))
and datetimes of flow record every 5 minutes in another (both POSIXct)
Flow <- structure(list(DateTime = structure(c(1616848500, 1616848800,
1616849100, 1616849400, 1616849700, 1616850000, 1616850300, 1616850600,
1616850900, 1616851200, 1616851500, 1616853000, 1616853300, 1616853600,
1616853900, 1616854200, 1616854500, 1616854800, 1616855100, 1616855400,
1616855700, 1616856000, 1616856300, 1616856600, 1616856900, 1616857200,
1616857500, 1616857800, 1616858100, 1616858400, 1616858700, 1616859000,
1616859300, 1616859600, 1616859900, 1616860200, 1616860500, 1616860800,
1616861100, 1616861400, 1616861700, 1616862000, 1616862300, 1616862600,
1616862900, 1616863200, 1616863500, 1616863800, 1616864100, 1616864400,
1616864700, 1616865000, 1616865300, 1616865600, 1616865900, 1616866200,
1616866500, 1616866800, 1616867100, 1616867400, 1616867700, 1616868000,
1616868300, 1616868600, 1616868900, 1616869200, 1616869500, 1616869800,
1616870100, 1616870400, 1616870700, 1616871000, 1616871300, 1616871600,
1616871900, 1616872200, 1616872500, 1616872800, 1616873100, 1616873400,
1616873700, 1616874000, 1616874300, 1616874600, 1616874900, 1616875200,
1616875500, 1616875800, 1616876100, 1616876400, 1616876700, 1616877000,
1616877300, 1616877600, 1616877900, 1616878200, 1616878500, 1616878800,
1616879100, 1616879400, 1616879700, 1616880000, 1616880300, 1616880600,
1616880900, 1616881200, 1616881500, 1616881800, 1616882100, 1616882400,
1616882700, 1616883000, 1616883300, 1616883600, 1616883900, 1616884200,
1616884500, 1616884800, 1616885100, 1616885400, 1616885700, 1616886000,
1616886300, 1616886600, 1616886900, 1616887200, 1616887500, 1616887800,
1616888100, 1616888400, 1616888700, 1616889000, 1616889300, 1616889600,
1616889900, 1616890200, 1616890500, 1616890800, 1616891100, 1616891400,
1616891700, 1616892000, 1616892300, 1616892600, 1616892900, 1616893200,
1616893500, 1616893800, 1616894100, 1616894400, 1616894700, 1616895000,
1616895300, 1616895600, 1616895900, 1616896200, 1616896500, 1616896800,
1616897100, 1616897400, 1616897700, 1616898000, 1616898300, 1616898600,
1616898900, 1616899200, 1616899500, 1616899800, 1616900100, 1616900400,
1616900700, 1616901000, 1616901300, 1616901600, 1616901900, 1616902200,
1616902500, 1616902800, 1616903100, 1616903400, 1616903700, 1616904000,
1616904300, 1616904600, 1616904900, 1616905200, 1616905500, 1616905800,
1616906100, 1616906400, 1616906700, 1616907000, 1616907300, 1616907600,
1616907900, 1616908200, 1616908500, 1616908800, 1616909100, 1616909400,
1616909700), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Flow = c(0.125, 0.146, 0.176, 0.219, 0.177, 0.187, 0.183,
0.13, 0.125, 0.14, 0.153, 0.182, 0.162, 0.146, 0.172, 0.218,
0.251, 0.13, 0.177, 0.197, 0.182, 0.156, 0.182, 0.208, 0.219,
0.177, 0.197, 0.224, 0.161, 0.177, 0.182, 0.182, 0.219, 0.171,
0.198, 0.166, 0.156, 0.172, 0.135, 0.182, 0.203, 0.203, 0.192,
0.172, 0.192, 0.234, 0.177, 0.198, 0.187, 0.239, 0.161, 0.213,
0.167, 0.14, 0.192, 0.203, 0.203, 0.161, 0.203, 0.208, 0.172,
0.129, 0.208, 0.198, 0.182, 0.213, 0.218, 0.203, 0.151, 0.161,
0.188, 0.187, 0.166, 0.166, 0.187, 0.187, 0.172, 0.192, 0.229,
0.161, 0.214, 0.203, 0.182, 0.167, 0.14, 0.166, 0.188, 0.166,
0.182, 0.177, 0.146, 0.161, 0.156, 0.182, 0.182, 0.166, 0.136,
0.156, 0.177, 0.213, 0.183, 0.161, 0.162, 0.161, 0.172, 0.141,
0.156, 0.166, 0.125, 0.14, 0.166, 0.157, 0.125, 0.166, 0.151,
0.14, 0.177, 0.172, 0.177, 0.172, 0.125, 0.171, 0.234, 0.187,
0.177, 0.197, 0.156, 0.146, 0.198, 0.182, 0.187, 0.192, 0.167,
0.161, 0.177, 0.151, 0.198, 0.167, 0.208, 0.162, 0.156, 0.182,
0.156, 0.208, 0.172, 0.135, 0.177, 0.156, 0.177, 0.151, 0.13,
0.203, 0.125, 0.145, 0.151, 0.161, 0.14, 0.182, 0.14, 0.197,
0.162, 0.151, 0.208, 0.198, 0.172, 0.166, 0.161, 0.13, 0.166,
0.125, 0.177, 0.182, 0.156, 0.167, 0.156, 0.229, 0.172, 0.13,
0.182, 0.115, 0.176, 0.172, 0.177, 0.198, 0.172, 0.166, 0.161,
0.156, 0.192, 0.208, 0.198, 0.203, 0.161, 0.156, 0.176, 0.188,
0.172, 0.172, 0.14, 0.135, 0.146)), row.names = c(5254L,
5255L, 5256L, 5257L, 5258L, 5259L, 5260L, 5261L, 5262L, 5263L,
5264L, 5266L, 5267L, 5268L, 5269L, 5270L, 5271L, 5272L, 5273L,
5274L, 5275L, 5276L, 5277L, 5278L, 5279L, 5280L, 5281L, 5282L,
5283L, 5284L, 5285L, 5286L, 5287L, 5288L, 5289L, 5290L, 5291L,
5292L, 5293L, 5294L, 5295L, 5296L, 5297L, 5298L, 5299L, 5300L,
5301L, 5302L, 5303L, 5304L, 5305L, 5306L, 5307L, 5308L, 5309L,
5310L, 5311L, 5312L, 5313L, 5314L, 5315L, 5316L, 5317L, 5318L,
5319L, 5320L, 5321L, 5322L, 5323L, 5324L, 5325L, 5326L, 5327L,
5328L, 5329L, 5330L, 5331L, 5332L, 5333L, 5334L, 5335L, 5336L,
5337L, 5338L, 5339L, 5340L, 5341L, 5342L, 5343L, 5344L, 5345L,
5346L, 5347L, 5348L, 5349L, 5350L, 5351L, 5352L, 5353L, 5354L,
5355L, 5356L, 5357L, 5358L, 5359L, 5360L, 5361L, 5362L, 5363L,
5364L, 5365L, 5366L, 5367L, 5368L, 5369L, 5370L, 5371L, 5372L,
5373L, 5374L, 5375L, 5376L, 5377L, 5378L, 5379L, 5380L, 5381L,
5382L, 5383L, 5384L, 5385L, 5386L, 5387L, 5388L, 5389L, 5390L,
5391L, 5392L, 5393L, 5394L, 5395L, 5396L, 5397L, 5398L, 5399L,
5400L, 5401L, 5402L, 5403L, 5404L, 5405L, 5406L, 5407L, 5408L,
5409L, 5410L, 5411L, 5412L, 5413L, 5414L, 5415L, 5416L, 5417L,
5418L, 5419L, 5420L, 5421L, 5422L, 5423L, 5424L, 5425L, 5426L,
5427L, 5428L, 5429L, 5430L, 5431L, 5432L, 5433L, 5434L, 5435L,
5436L, 5437L, 5438L, 5439L, 5440L, 5441L, 5442L, 5443L, 5444L,
5445L, 5446L, 5447L, 5448L, 5449L, 5450L, 5451L, 5452L, 5453L,
5454L, 5455L), class = "data.frame")
I am only dput()'ting the rows from the flow data I think are the issue
The sample times are random (can occur at any time) and I want the flow rate of the time closest to it, so I first round the sample datetimes to every 5 minutes so they match the flow data timestep, then I try to inner join the dataframes:
samples$DateTime<- round_date(samples$DateTime, "5 minutes")
samples$Flow<-inner_join(Flow, samples)$Flow
This doesn't work because inner_join(Flow, samples)$Flow
has 35 rows and samples has 36 rows
so then I try:
merge(Flow, samples, by = "DateTime", all.y = T)
DateTime Flow Site Sample EC TP SRP Flowrate
1 2021-03-09 11:05:00 0.160 DCS TG-24a 2.310 25.5 14.3 .1X
2 2021-03-09 23:35:00 0.572 DCS BA-53a 1.225 148.0 24.0 0.66
3 2021-03-10 11:35:00 0.249 DCS BA-53b 1.749 121.0 32.7 0.19
4 2021-03-14 04:55:00 0.869 DCS BA-56a 1.268 65.1 22.0 0.89
5 2021-03-17 16:35:00 0.242 DCS TG-26a 1.904 68.0 22.5 0-.3
6 2021-03-18 09:45:00 0.245 DCS BA-57 2.090 104.0 NA 0.2
7 2021-03-19 09:45:00 0.130 DCS BA-58 2.080 175.0 NA 0.2
8 2021-03-21 09:45:00 0.114 DCS BA-59 2.080 80.7 NA 0.2
9 2021-03-22 09:45:00 0.140 DCS BA-60 2.170 75.0 NA 0.2
10 2021-03-23 09:45:00 0.125 DCS BA-61 2.230 68.7 NA 0.2
11 2021-03-24 09:45:00 0.140 DCS BA-62 2.290 26.7 NA 0.2
12 2021-03-24 17:40:00 0.229 DCS BA-72.1 2.290 59.6 NA 0.1
13 2021-03-25 17:40:00 0.213 DCS BA-72.2 2.380 114.0 NA 0.1
14 2021-03-26 17:40:00 0.192 DCS BA-72.3 2.440 39.1 NA 0.1
15 2021-03-27 13:30:00 NA <NA> BA-72.4 2.330 27.8 NA 0.1
16 2021-03-28 17:15:00 0.163 DCS TG-27.1 2.350 26.8 NA 0.1
17 2021-03-29 16:00:00 0.062 DCS BS1 2.230 34.2 NA .0X
18 2021-03-30 03:55:00 0.036 DCS BS2 2.200 136.0 NA .0X
19 2021-03-30 16:00:00 0.084 DCS BS3 2.200 47.3 NA .0X
20 2021-03-31 03:55:00 0.068 DCS BS4 2.300 52.7 NA .0X
21 2021-03-31 16:00:00 0.115 DCS BS5 2.370 30.4 NA .0X
22 2021-04-01 03:55:00 0.108 DCS BS6 2.330 37.7 NA .0X
23 2021-04-01 10:55:00 0.900 DCS BS7 1.301 504.0 NA 0.93
24 2021-04-01 11:05:00 0.915 DCS BS8 1.292 426.0 NA 1.18
25 2021-04-01 16:00:00 0.785 DCS BS9 1.471 225.0 NA 0.79
26 2021-04-02 16:00:00 0.234 DCS BS10 1.888 61.2 NA 0.23
27 2021-04-03 04:00:00 0.166 DCS BS11 1.977 133.0 NA 0.17
28 2021-04-03 16:00:00 0.235 DCS BS12 2.050 167.0 NA 0.23
29 2021-04-04 04:00:00 0.063 DCS BS13 2.130 109.0 NA 0.06
30 2021-04-04 16:00:00 0.161 DCS BS14 2.210 58.0 NA 0.16
31 2021-04-05 04:00:00 0.161 DCS BS15 2.300 46.6 NA 0.16
32 2021-04-05 16:00:00 0.140 DCS BS16 2.340 34.6 NA 0.14
33 2021-04-06 04:00:00 0.109 DCS BS17 2.410 37.2 NA 0.11
34 2021-04-06 16:00:00 0.125 DCS BS18 2.430 27.9 NA 0.13
35 2021-04-07 04:00:00 0.114 DCS BS19 2.480 47.5 NA 0.11
36 2021-04-07 15:30:00 0.095 DCS BS20 2.500 21.8 NA 0.17
You can see row 15 contains NA so I check to see if I am missing flow data at that datetime:
which(Flow$DateTime == "2021-03-27 13:30:00 ")
[1] 5255
So according to my R knowledge this means the Flow dataframe has this datetime at this row, so I open the dataframe and look, but at that row there is a different datetime:
5255 2021-03-27 12:40:00 0.146
What is even stranger is that if I go to where 2021-03-27 13:30:00 should be, it doesnt even exist:
5264 2021-03-27 13:25:00 0.153
5266 2021-03-27 13:50:00 0.182
Why would R give this wrong row?