0

I have datetimes that samples were taken in one dataframe:

samples <- structure(list(EC = c(2.31, 1.225, 1.749, 1.268, 1.904, 2.09, 
2.08, 2.08, 2.17, 2.23, 2.29, 2.29, 2.38, 2.44, 2.33, 2.35, 2.23, 
2.2, 2.2, 2.3, 2.37, 2.33, 1.301, 1.292, 1.471, 1.888, 1.977, 
2.05, 2.13, 2.21, 2.3, 2.34, 2.41, 2.43, 2.48, 2.5), TP = c(25.5, 
148, 121, 65.1, 68, 104, 175, 80.7, 75, 68.7, 26.7, 59.6, 114, 
39.1, 27.8, 26.8, 34.2, 136, 47.3, 52.7, 30.4, 37.7, 504, 426, 
225, 61.2, 133, 167, 109, 58, 46.6, 34.6, 37.2, 27.9, 47.5, 21.8
), SRP = c(14.3, 24, 32.7, 22, 22.5, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA), DateTime = structure(c(1615287900, 
1615332900, 1615376100, 1615697700, 1615998900, 1616060700, 1616147100, 
1616319900, 1616406300, 1616492700, 1616579100, 1616607600, 1616694000, 
1616780400, 1616851800, 1616951700, 1617033600, 1617076500, 1617120000, 
1617162900, 1617206400, 1617249300, 1617274500, 1617275100, 1617292800, 
1617379200, 1617422400, 1617465600, 1617508800, 1617552000, 1617595200, 
1617638400, 1617681600, 1617724800, 1617768000, 1617809400), tzone = "UTC", class = c("POSIXct", 
"POSIXt"))), row.names = c(NA, -36L), class = c("tbl_df", "tbl", 
"data.frame"))

and datetimes of flow record every 5 minutes in another (both POSIXct)

Flow <- structure(list(DateTime = structure(c(1616848500, 1616848800, 
1616849100, 1616849400, 1616849700, 1616850000, 1616850300, 1616850600, 
1616850900, 1616851200, 1616851500, 1616853000, 1616853300, 1616853600, 
1616853900, 1616854200, 1616854500, 1616854800, 1616855100, 1616855400, 
1616855700, 1616856000, 1616856300, 1616856600, 1616856900, 1616857200, 
1616857500, 1616857800, 1616858100, 1616858400, 1616858700, 1616859000, 
1616859300, 1616859600, 1616859900, 1616860200, 1616860500, 1616860800, 
1616861100, 1616861400, 1616861700, 1616862000, 1616862300, 1616862600, 
1616862900, 1616863200, 1616863500, 1616863800, 1616864100, 1616864400, 
1616864700, 1616865000, 1616865300, 1616865600, 1616865900, 1616866200, 
1616866500, 1616866800, 1616867100, 1616867400, 1616867700, 1616868000, 
1616868300, 1616868600, 1616868900, 1616869200, 1616869500, 1616869800, 
1616870100, 1616870400, 1616870700, 1616871000, 1616871300, 1616871600, 
1616871900, 1616872200, 1616872500, 1616872800, 1616873100, 1616873400, 
1616873700, 1616874000, 1616874300, 1616874600, 1616874900, 1616875200, 
1616875500, 1616875800, 1616876100, 1616876400, 1616876700, 1616877000, 
1616877300, 1616877600, 1616877900, 1616878200, 1616878500, 1616878800, 
1616879100, 1616879400, 1616879700, 1616880000, 1616880300, 1616880600, 
1616880900, 1616881200, 1616881500, 1616881800, 1616882100, 1616882400, 
1616882700, 1616883000, 1616883300, 1616883600, 1616883900, 1616884200, 
1616884500, 1616884800, 1616885100, 1616885400, 1616885700, 1616886000, 
1616886300, 1616886600, 1616886900, 1616887200, 1616887500, 1616887800, 
1616888100, 1616888400, 1616888700, 1616889000, 1616889300, 1616889600, 
1616889900, 1616890200, 1616890500, 1616890800, 1616891100, 1616891400, 
1616891700, 1616892000, 1616892300, 1616892600, 1616892900, 1616893200, 
1616893500, 1616893800, 1616894100, 1616894400, 1616894700, 1616895000, 
1616895300, 1616895600, 1616895900, 1616896200, 1616896500, 1616896800, 
1616897100, 1616897400, 1616897700, 1616898000, 1616898300, 1616898600, 
1616898900, 1616899200, 1616899500, 1616899800, 1616900100, 1616900400, 
1616900700, 1616901000, 1616901300, 1616901600, 1616901900, 1616902200, 
1616902500, 1616902800, 1616903100, 1616903400, 1616903700, 1616904000, 
1616904300, 1616904600, 1616904900, 1616905200, 1616905500, 1616905800, 
1616906100, 1616906400, 1616906700, 1616907000, 1616907300, 1616907600, 
1616907900, 1616908200, 1616908500, 1616908800, 1616909100, 1616909400, 
1616909700), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    Flow = c(0.125, 0.146, 0.176, 0.219, 0.177, 0.187, 0.183, 
    0.13, 0.125, 0.14, 0.153, 0.182, 0.162, 0.146, 0.172, 0.218, 
    0.251, 0.13, 0.177, 0.197, 0.182, 0.156, 0.182, 0.208, 0.219, 
    0.177, 0.197, 0.224, 0.161, 0.177, 0.182, 0.182, 0.219, 0.171, 
    0.198, 0.166, 0.156, 0.172, 0.135, 0.182, 0.203, 0.203, 0.192, 
    0.172, 0.192, 0.234, 0.177, 0.198, 0.187, 0.239, 0.161, 0.213, 
    0.167, 0.14, 0.192, 0.203, 0.203, 0.161, 0.203, 0.208, 0.172, 
    0.129, 0.208, 0.198, 0.182, 0.213, 0.218, 0.203, 0.151, 0.161, 
    0.188, 0.187, 0.166, 0.166, 0.187, 0.187, 0.172, 0.192, 0.229, 
    0.161, 0.214, 0.203, 0.182, 0.167, 0.14, 0.166, 0.188, 0.166, 
    0.182, 0.177, 0.146, 0.161, 0.156, 0.182, 0.182, 0.166, 0.136, 
    0.156, 0.177, 0.213, 0.183, 0.161, 0.162, 0.161, 0.172, 0.141, 
    0.156, 0.166, 0.125, 0.14, 0.166, 0.157, 0.125, 0.166, 0.151, 
    0.14, 0.177, 0.172, 0.177, 0.172, 0.125, 0.171, 0.234, 0.187, 
    0.177, 0.197, 0.156, 0.146, 0.198, 0.182, 0.187, 0.192, 0.167, 
    0.161, 0.177, 0.151, 0.198, 0.167, 0.208, 0.162, 0.156, 0.182, 
    0.156, 0.208, 0.172, 0.135, 0.177, 0.156, 0.177, 0.151, 0.13, 
    0.203, 0.125, 0.145, 0.151, 0.161, 0.14, 0.182, 0.14, 0.197, 
    0.162, 0.151, 0.208, 0.198, 0.172, 0.166, 0.161, 0.13, 0.166, 
    0.125, 0.177, 0.182, 0.156, 0.167, 0.156, 0.229, 0.172, 0.13, 
    0.182, 0.115, 0.176, 0.172, 0.177, 0.198, 0.172, 0.166, 0.161, 
    0.156, 0.192, 0.208, 0.198, 0.203, 0.161, 0.156, 0.176, 0.188, 
    0.172, 0.172, 0.14, 0.135, 0.146)), row.names = c(5254L, 
5255L, 5256L, 5257L, 5258L, 5259L, 5260L, 5261L, 5262L, 5263L, 
5264L, 5266L, 5267L, 5268L, 5269L, 5270L, 5271L, 5272L, 5273L, 
5274L, 5275L, 5276L, 5277L, 5278L, 5279L, 5280L, 5281L, 5282L, 
5283L, 5284L, 5285L, 5286L, 5287L, 5288L, 5289L, 5290L, 5291L, 
5292L, 5293L, 5294L, 5295L, 5296L, 5297L, 5298L, 5299L, 5300L, 
5301L, 5302L, 5303L, 5304L, 5305L, 5306L, 5307L, 5308L, 5309L, 
5310L, 5311L, 5312L, 5313L, 5314L, 5315L, 5316L, 5317L, 5318L, 
5319L, 5320L, 5321L, 5322L, 5323L, 5324L, 5325L, 5326L, 5327L, 
5328L, 5329L, 5330L, 5331L, 5332L, 5333L, 5334L, 5335L, 5336L, 
5337L, 5338L, 5339L, 5340L, 5341L, 5342L, 5343L, 5344L, 5345L, 
5346L, 5347L, 5348L, 5349L, 5350L, 5351L, 5352L, 5353L, 5354L, 
5355L, 5356L, 5357L, 5358L, 5359L, 5360L, 5361L, 5362L, 5363L, 
5364L, 5365L, 5366L, 5367L, 5368L, 5369L, 5370L, 5371L, 5372L, 
5373L, 5374L, 5375L, 5376L, 5377L, 5378L, 5379L, 5380L, 5381L, 
5382L, 5383L, 5384L, 5385L, 5386L, 5387L, 5388L, 5389L, 5390L, 
5391L, 5392L, 5393L, 5394L, 5395L, 5396L, 5397L, 5398L, 5399L, 
5400L, 5401L, 5402L, 5403L, 5404L, 5405L, 5406L, 5407L, 5408L, 
5409L, 5410L, 5411L, 5412L, 5413L, 5414L, 5415L, 5416L, 5417L, 
5418L, 5419L, 5420L, 5421L, 5422L, 5423L, 5424L, 5425L, 5426L, 
5427L, 5428L, 5429L, 5430L, 5431L, 5432L, 5433L, 5434L, 5435L, 
5436L, 5437L, 5438L, 5439L, 5440L, 5441L, 5442L, 5443L, 5444L, 
5445L, 5446L, 5447L, 5448L, 5449L, 5450L, 5451L, 5452L, 5453L, 
5454L, 5455L), class = "data.frame")

I am only dput()'ting the rows from the flow data I think are the issue

The sample times are random (can occur at any time) and I want the flow rate of the time closest to it, so I first round the sample datetimes to every 5 minutes so they match the flow data timestep, then I try to inner join the dataframes:

samples$DateTime<- round_date(samples$DateTime, "5 minutes")

samples$Flow<-inner_join(Flow, samples)$Flow

This doesn't work because inner_join(Flow, samples)$Flow has 35 rows and samples has 36 rows

so then I try:

merge(Flow, samples, by = "DateTime", all.y = T)

DateTime  Flow Site  Sample    EC    TP  SRP Flowrate
1  2021-03-09 11:05:00 0.160  DCS  TG-24a 2.310  25.5 14.3      .1X
2  2021-03-09 23:35:00 0.572  DCS  BA-53a 1.225 148.0 24.0     0.66
3  2021-03-10 11:35:00 0.249  DCS  BA-53b 1.749 121.0 32.7     0.19
4  2021-03-14 04:55:00 0.869  DCS  BA-56a 1.268  65.1 22.0     0.89
5  2021-03-17 16:35:00 0.242  DCS  TG-26a 1.904  68.0 22.5     0-.3
6  2021-03-18 09:45:00 0.245  DCS   BA-57 2.090 104.0   NA      0.2
7  2021-03-19 09:45:00 0.130  DCS   BA-58 2.080 175.0   NA      0.2
8  2021-03-21 09:45:00 0.114  DCS   BA-59 2.080  80.7   NA      0.2
9  2021-03-22 09:45:00 0.140  DCS   BA-60 2.170  75.0   NA      0.2
10 2021-03-23 09:45:00 0.125  DCS   BA-61 2.230  68.7   NA      0.2
11 2021-03-24 09:45:00 0.140  DCS   BA-62 2.290  26.7   NA      0.2
12 2021-03-24 17:40:00 0.229  DCS BA-72.1 2.290  59.6   NA      0.1
13 2021-03-25 17:40:00 0.213  DCS BA-72.2 2.380 114.0   NA      0.1
14 2021-03-26 17:40:00 0.192  DCS BA-72.3 2.440  39.1   NA      0.1
15 2021-03-27 13:30:00    NA <NA> BA-72.4 2.330  27.8   NA      0.1
16 2021-03-28 17:15:00 0.163  DCS TG-27.1 2.350  26.8   NA      0.1
17 2021-03-29 16:00:00 0.062  DCS     BS1 2.230  34.2   NA      .0X
18 2021-03-30 03:55:00 0.036  DCS     BS2 2.200 136.0   NA      .0X
19 2021-03-30 16:00:00 0.084  DCS     BS3 2.200  47.3   NA      .0X
20 2021-03-31 03:55:00 0.068  DCS     BS4 2.300  52.7   NA      .0X
21 2021-03-31 16:00:00 0.115  DCS     BS5 2.370  30.4   NA      .0X
22 2021-04-01 03:55:00 0.108  DCS     BS6 2.330  37.7   NA      .0X
23 2021-04-01 10:55:00 0.900  DCS     BS7 1.301 504.0   NA     0.93
24 2021-04-01 11:05:00 0.915  DCS     BS8 1.292 426.0   NA     1.18
25 2021-04-01 16:00:00 0.785  DCS     BS9 1.471 225.0   NA     0.79
26 2021-04-02 16:00:00 0.234  DCS    BS10 1.888  61.2   NA     0.23
27 2021-04-03 04:00:00 0.166  DCS    BS11 1.977 133.0   NA     0.17
28 2021-04-03 16:00:00 0.235  DCS    BS12 2.050 167.0   NA     0.23
29 2021-04-04 04:00:00 0.063  DCS    BS13 2.130 109.0   NA     0.06
30 2021-04-04 16:00:00 0.161  DCS    BS14 2.210  58.0   NA     0.16
31 2021-04-05 04:00:00 0.161  DCS    BS15 2.300  46.6   NA     0.16
32 2021-04-05 16:00:00 0.140  DCS    BS16 2.340  34.6   NA     0.14
33 2021-04-06 04:00:00 0.109  DCS    BS17 2.410  37.2   NA     0.11
34 2021-04-06 16:00:00 0.125  DCS    BS18 2.430  27.9   NA     0.13
35 2021-04-07 04:00:00 0.114  DCS    BS19 2.480  47.5   NA     0.11
36 2021-04-07 15:30:00 0.095  DCS    BS20 2.500  21.8   NA     0.17

You can see row 15 contains NA so I check to see if I am missing flow data at that datetime:

which(Flow$DateTime == "2021-03-27 13:30:00 ")
[1] 5255

So according to my R knowledge this means the Flow dataframe has this datetime at this row, so I open the dataframe and look, but at that row there is a different datetime:

5255 2021-03-27 12:40:00 0.146 

What is even stranger is that if I go to where 2021-03-27 13:30:00 should be, it doesnt even exist:

5264 2021-03-27 13:25:00 0.153 
5266 2021-03-27 13:50:00 0.182

Why would R give this wrong row?

MrFlick
  • 195,160
  • 17
  • 277
  • 295
ruggntub
  • 95
  • 5
  • How exactly are you "opening the dataframe and looking"? `which` should give you the index. You should be able to see that row with `Flow[5255, ]`. The value that precedes the output is the row name. Note that row names may be different than row indexes, especially if the data has been filtered or joined in anyway. It's easier to help you if you provide data in a [reproducible format](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) like `dput` which will make it clear what's actually in the data. – MrFlick Apr 22 '21 at 00:32
  • @MrFlick I editted the post, this is my first time using dput() so not sure how much include. When I do `Flow[5255,]` I get `DateTime Flow 5310 2021-03-27 17:30:00 0.203` – ruggntub Apr 22 '21 at 00:49
  • 1
    I think you are running into a time zone issues. When you specify a date as a string, your local time zone is assumed. But your times in `Flow` are UTC/GMT, so try `which(Flow$DateTime == as.POSIXct("2021-03-27 13:30:00", tz="GMT"))` to be explict. – MrFlick Apr 22 '21 at 01:07
  • Thank you it was a time zone issue, when formatting `samples` I did not also make `tz = 'UTC"` which I did for `Flow` to avoid dealing with daylight savings. Also thank you for editing the post, I see now how to use dput(). – ruggntub Apr 22 '21 at 01:20

0 Answers0