0

Aloha all,

I've struggled to build a legend for a mix/match of time series data I'm making. Here is some code:

My understanding is that I need to somehow clean my data and put it all in the same data frame, but all of the time series don't line up very well. Some is at 15 minutes, other one hour. Is there any way to force a legend for these datasets? I don't know what else to post here - since the 5 datasets are quite large.

Plot I'm working on:

enter image description here

q<- ggplot(subset(cr200_Auwai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS)) +
  geom_point(color="coral4")+
  geom_point(data=subset(dsloi_wl, timedate>startd & timedate<endd), aes(timedate, level), color="blue")+
  #geom_point(data=subset(flow_data, mdate>startd & mdate<endd), aes(as.POSIXct(mdate), flow_cfs*1000), color="red")+
  geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Lvl_m*1000), color="forestgreen", size=1)+ #aquamarine3
  geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS), color="orange")+
  #geom_point(data=subset(hihimanu_wl, timedate>startd & timedate<endd), aes(timedate, level), color="azure4", size=0.1)+
  #geom_point(data=subset(rain_data, timedate>startd & timedate<endd), aes(timedate, rainmm), color="red",size=5)+
  geom_point(data=subset(haptuk_ysi, datetime>startd & datetime<endd), aes(datetime, Turb), color="pink")+
  
  #scale_x_date(breaks=date_breaks("month"), labels = date_format("%b-%y"))+
  xlab("Date")+
  ylab("Turbidity (NTU) and Water Level (mm)")+
  coord_cartesian(ylim=c(0, 1500))+
  theme_bw()+
  theme(axis.text=element_text(size=14),
        axis.title=element_text(size=16,face="bold"),
        legend.justification = c(1, 1), 
        legend.position = c(1, 1),
        legend.title=element_text(size=14),
        legend.text=element_text(size=12))

Here is a sample of two of the datasets: Note that the times don't line up at all... since I'm mixing sources.

dsloi_wl:

structure(list(ReceptionTime = c(1533895414.1134, 1533895414.1733, 1533895414.19397, 1533895414.20708, 1533895414.22283, 1533895414.23634, 1533895414.25135, 1533895414.26387, 1533895414.27653, 1533895414.29126, 1533896013.68755, 1533896013.7638, 1533896013.79232, 1533896013.80917, 1533896013.82312, 1533896013.83648, 1533896013.84988, 1533896013.8648, 1533896013.87724, 1533896013.8894), d2w = c(776.7, 789.7, 790.2, 777.1, 777.2, 777.7, 778.4, 793.4, 779.6, 794.1, 819.9, 780.7, 794.1, 806.9, 781.9, 781.9, 782.7, 782.8, 783.1, 783.4), timedate = structure(c(1533895414.1134, 1533895414.1733, 1533895414.19397, 1533895414.20708, 1533895414.22283, 1533895414.23634, 1533895414.25135, 1533895414.26387, 1533895414.27653, 1533895414.29126, 1533896013.68755, 1533896013.7638, 1533896013.79232, 1533896013.80917, 1533896013.82312, 1533896013.83648, 1533896013.84988, 1533896013.8648, 1533896013.87724, 1533896013.8894), class = c("POSIXct", "POSIXt"), tzone = ""), level = c(723.3, 710.3, 709.8, 722.9, 722.8, 722.3, 721.6, 706.6, 720.4, 705.9, 680.1, 719.3, 705.9, 693.1, 718.1, 718.1, 717.3, 717.2, 716.9, 716.6)), .Names = c("ReceptionTime", "d2w", "timedate", "level"), row.names = c(NA, 20L), class = "data.frame")

CR300_Wai1

structure(list(RECORD = 73027:73046, Temp_C = c(24.62861, 24.62332, 24.61533, 24.60857, 24.60189, 24.59733, 24.59068, 24.58404, 24.57869, 24.57327, 24.56781, 24.5606, 24.55551, 24.55218, 24.54648, 24.5416, 24.5358, 24.5319, 24.52781, 24.52294), Turb_BS = c(94.50522, 88.65939, 109.354, 57.71527, 134.1903, 46.37191, 78.17719, 52.22319, 58.07111, 96.95719, 51.47488, 44.65616, 70.43825, 99.58217, 93.68374, 87.4787, 175.5395, 167.6757, 110.8119, 132.5971), Turb_SS = c(36.63349, 34.31228, 37.02223, 32.97258, 36.68553, 33.82083, 37.43391, 33.43639, 31.17306, 33.6327, 34.69954, 30.99891, 34.69988, 33.64369, 32.54948, 32.1177, 32.86558, 48.97706, 30.65004, 33.71646), Temp_C_2 = c(24.9014, 24.89474, 24.88837, 24.88279, 24.87574, 24.86852, 24.86357, 24.85751, 24.85236, 24.84759, 24.84091, 24.83577, 24.83192, 24.82713, 24.8229, 24.81832, 24.81237, 24.80821, 24.8051, 24.80015), WD_OBS = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Lvl_m = c(0.6907353, 0.6905226, 0.6896195, 0.6890779, 0.6881586, 0.6878724, 0.6862501, 0.6848835, 0.6844589, 0.6837503, 0.6836612, 0.6831629, 0.6821692, 0.6812283, 0.6799452, 0.6791196, 0.6782504, 0.6772775, 0.6763596, 0.6755115), timedate = structure(c(1533895500, 1533895800, 1533896100, 1533896400, 1533896700, 1533897000, 1533897300, 1533897600, 1533897900, 1533898200, 1533898500, 1533898800, 1533899100, 1533899400, 1533899700, 1533900000, 1533900300, 1533900600, 1533900900, 1533901200), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("RECORD", "Temp_C", "Turb_BS", "Turb_SS", "Temp_C_2", "WD_OBS", "Lvl_m", "timedate"), row.names = c(NA, 20L), class = "data.frame")

Community
  • 1
  • 1
  • please share sample of your data using dput() (not str or head or picture/screenshot) so others can help. See more here https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example?rq=1 – Tung Aug 22 '18 at 00:20
  • I guess a solution is to have a `time` column with the lowest step (15 min in your example) and complete with NA when some variables are recorded with a larger timestep (1 hour for example). Then prefer a long format data frame (see `gather` from `tidyverse` or `melt`), you might have 3 columns : `time`, `variable_name` and `variable_value`, then it's easy to plot, just add `color = variable_name` inside `aes(...)`. – bVa Aug 22 '18 at 07:05
  • @thanhtungmilan I shared a sample of the dataset to help. The problem is the times won't line up on the hour/fifteen etc, since all the clocks are different on all the different sensors. I'll try below and be back. – Kim Falinski Aug 22 '18 at 18:16
  • @KimFalinski : data frame `cr200_Auwai1` is missing. I edited my answer below with the 2 datasets available. – bVa Aug 23 '18 at 11:52
  • great, thank you. success! – Kim Falinski Aug 25 '18 at 06:41

1 Answers1

0

Here is a solution using mock data (next time provide a sample of your data) :

library(tidyverse)
library(lubridate)
#> 
#> Attachement du package : 'lubridate'
#> The following object is masked from 'package:base':
#> 
#>     date

# mock data
time_15m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "15 min")
time_30m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "30 min")
time_60m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "60 min")

data_1 <- data.frame(time = time_15m,
                     var_1 = cos(hour(time_15m) + minute(time_15m)))

data_2 <- data.frame(time = time_30m,
                     var_2 = sin(hour(time_30m) + minute(time_30m)))

data_3 <- data.frame(time = time_60m,
                     var_3 = cos(1 - hour(time_60m) + minute(time_60m)))

# the kind of plot you have (prefer the 2nd version)
ggplot(data_1, aes(x = time, y = var_1)) +
  geom_point(color = "red") +
  geom_point(data = data_2, aes(time, var_2), color = "green") + 
  geom_point(data = data_3, aes(time, var_3), color = "blue") +
  theme_bw()

# a version with long format data and use of gather function
data_1 %>%
  left_join(data_2) %>% # join data from data_2 (timestep = 30m), missing data is NA
  left_join(data_3) %>% # join data from data_3 (timestep = 60m), missing data is NA
  gather(variable_name, variable_value, var_1, var_2, var_3) %>% # gather var_1, var_2 and var_3 in a single column
  ggplot(., aes(x = time, y = variable_value, color = variable_name)) +
  theme_bw() +
  geom_point(size = 2)
#> Joining, by = "time"
#> Joining, by = "time"
#> Warning: Removed 120 rows containing missing values (geom_point).

Created on 2018-08-22 by the reprex package (v0.2.0).

EDIT 1 (include provided datasets)

library(tidyverse)
dsloi_wl %>%
  full_join(cr300_Wai1) %>%
  mutate(Lvl_m = 100 * Lvl_m) %>%
  gather(variable_name, variable_value, level, Lvl_m, Turb_SS)  %>%
  ggplot(., aes(x = timedate, y = variable_value, color = variable_name)) +
  geom_point() +
  scale_color_manual("Legend title", 
                     values = c("level" = "blue",
                                "Lvl_m" = "forestgreen",
                                "Turb_SS" = "orange"))
#> Joining, by = "timedate"
#> Warning: Removed 60 rows containing missing values (geom_point).

Created on 2018-08-23 by the reprex package (v0.2.0).

bVa
  • 3,839
  • 1
  • 13
  • 22
  • This only worked for me for "full_join" - and I originally had an issue with dplyr that wouldn't allow gather to work. In the end, I seem to finally be on my way, though. Thanks. – Kim Falinski Aug 25 '18 at 08:26