1

I am working with a soil moisture dataset of the form:

structure(list(DATE = c("1966-09-14", "1966-09-14", "1966-09-14", 
"1966-09-14", "1966-09-14", "1966-09-14"), LOCATION = c("S1S", 
"S2W", "S3E", "S3W", "S4S", "S5"), D_15 = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_), D_46 = c(3.81, 3.05, 
1.52, 1.37, 1.75, 1.6), D_76 = c(2.13, 2.97, 0.91, 0.91, 1.68, 
3.28), D_107 = c(2.67, 2.97, 2.67, 2.51, 2.97, 3.73), D_137 = c(2.44, 
2.74, 3.81, 4.11, 3.28, 3.43), D_168 = c(2.74, 2.9, 1.37, 4.27, 
3.96, 2.67), D_198 = c(2.44, 3.51, 2.97, 3.2, 2.74, 2.59), D_229 = c(2.74, 
3.81, 1.83, 4.88, 3.51, 3.05), D_259 = c(10.36, 3.12, 1.52, 3.43, 
2.67, NA), D_290 = c(11.51, 1.45, 0.46, 6.25, 2.59, NA), D_320 = c(11.05, 
2.9, NA, 5.79, NA, NA)), row.names = c(NA, 6L), class = "data.frame")

where LOCATION indicates the watershed and D_15, D_46, etc. are soil moisture depths. I am trying to create an application that would allow a user to plot various combinations of location and depth so all depths at a site could be compared or the same depths can be compared across watersheds. Where I am getting stuck is implementing the legend. I would like the legend to change depending on the combination of site and depth. Is there any way to map legend aesthetics as a combination of the subset data frame and the user defined soil depth so when the LOCATION is the same but the depth is different (or vice versa) they are drawn and labeled separately in the legend?? Below are my current server.R and ui.R configurations.

# Define server logic ----

df <- read.csv('data/SM_alpha.csv')
df$DATE <- as.Date(df$DATE)

server <- function(input, output) {


    ## filter df by LOCATION
    filtered_data_1 <- reactive({
        dplyr::filter(df, LOCATION == input$subset_to_plot_1)
    })
    
    filtered_data_2 <- reactive({
        dplyr::filter(df, LOCATION == input$subset_to_plot_2)

    })
    
    filtered_data_3 <- reactive({
        dplyr::filter(df, LOCATION == input$subset_to_plot_3)

    })

    filtered_data_4 <- reactive({
        dplyr::filter(df, LOCATION == input$subset_to_plot_4)

    })

    filtered_data_5 <- reactive({
        dplyr::filter(df, LOCATION == input$subset_to_plot_5)

    })
    
    filtered_data_6 <- reactive({
        dplyr::filter(df, LOCATION == input$subset_to_plot_6)

    })
    
    ## ggplot selected data 
       output$Plot <- renderPlot({
        
    ggplot(df, aes_string(x = "DATE")) + 
        geom_line(data = filtered_data_1(), aes(y = !!sym(input$first_variable_to_plot), colour = input$subset_to_plot_1)) +
            geom_line(data = filtered_data_2(), aes(y = !!sym(input$second_variable_to_plot), colour = input$subset_to_plot_2)) +
            geom_line(data = filtered_data_3(), aes(y = !!sym(input$third_variable_to_plot), colour = input$subset_to_plot_3)) + 
            geom_line(data = filtered_data_4(), aes(y = !!sym(input$fourth_variable_to_plot), colour = input$subset_to_plot_4)) + 
            geom_line(data = filtered_data_5(), aes(y = !!sym(input$fifth_variable_to_plot), colour = input$subset_to_plot_5))  +
            geom_line(data = filtered_data_6(), aes(y = !!sym(input$sixth_variable_to_plot), colour = input$subset_to_plot_6))  +
        xlim(input$mindate, input$maxdate) +
        theme_light()
 
        
       })

       
}
## Load packages ----
library(shiny)
library(ggplot2)
## Load data ----
df <- read.csv("data/SM_alpha.csv")
## Format DATE column
df$DATE <- as.Date(df$DATE)


## Define UI ----
ui <- fluidPage(

    titlePanel("MEF Data Explorer"),
        

    title = "MEF Data Explorer",

    plotOutput('Plot'),

    hr(),

    fluidRow(
        column(3,
            h4("SELECT DATE RANGE"),
            
            sliderInput("mindate", "Min date:", min = min(df$DATE), max = max(df$DATE), value = min(df$DATE)),
            sliderInput("maxdate", "Max date:", min = min(df$DATE), max = max(df$DATE), value = max(df$DATE)
        )),
        column(4,
            h4("SELECT SITE"),
            selectInput(inputId = "subset_to_plot_1", label = NULL, choices = c(" ", unique(df$LOCATION))),
            selectInput(inputId = "subset_to_plot_2", label = NULL, choices = c(" ", unique(df$LOCATION))),
            selectInput(inputId = "subset_to_plot_3", label = NULL, choices = c(" ", unique(df$LOCATION))),
            selectInput(inputId = "subset_to_plot_4", label = NULL, choices = c(" ", unique(df$LOCATION))),
            selectInput(inputId = "subset_to_plot_5", label = NULL, choices = c(" ", unique(df$LOCATION))),
            selectInput(inputId = "subset_to_plot_6", label = NULL, choices = c(" ", unique(df$LOCATION)))),
        column(4,
            h4("SELECT DEPTH"),
            selectInput(inputId = "first_variable_to_plot", label = NULL, choices = c("D_15", "D_46", "D_76", "D_107", "D_137", "D_168", "D_198", "D_229", "D_259", "D_290", "D_320")),
            selectInput(inputId = "second_variable_to_plot",label = NULL, choices = c("D_15", "D_46", "D_76", "D_107", "D_137", "D_168", "D_198", "D_229", "D_259", "D_290", "D_320")),
            selectInput(inputId = "third_variable_to_plot",label = NULL, choices = c("D_15", "D_46", "D_76", "D_107", "D_137", "D_168", "D_198", "D_229", "D_259", "D_290", "D_320")),
            selectInput(inputId = "fourth_variable_to_plot",label = NULL, choices = c("D_15", "D_46", "D_76", "D_107", "D_137", "D_168", "D_198", "D_229", "D_259", "D_290", "D_320")),
            selectInput(inputId = "fifth_variable_to_plot",label = NULL, choices = c("D_15", "D_46", "D_76", "D_107", "D_137", "D_168", "D_198", "D_229", "D_259", "D_290", "D_320")),
            selectInput(inputId = "sixth_variable_to_plot",label = NULL, choices = c("D_15", "D_46", "D_76", "D_107", "D_137", "D_168", "D_198", "D_229", "D_259", "D_290", "D_320"))
        )
    )
)


I have tried adding a geom_point for each input with colour mapped by the soil depth but this results in the same issue, namely, when two depths are the same they are mapped the same in the legend.

istheflesh
  • 11
  • 2
  • 1
    Welcome to SO! It would be easier to help you if you provide [a minimal reproducible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) including a snippet of your data or some fake data shared via `dput()`. Please do not post an image of code/data/errors [for these reasons](https://meta.stackoverflow.com/questions/285551/why-not-upload-images-of-code-errors-when-asking-a-question/285557#285557). – stefan Mar 22 '23 at 07:08
  • ggplot, as part of the tidyverse, is designed to work with [tidy](https://cran.r-project.org/web/packages/tidyr/vignettes/tidy-data.html) data. Your data frame is not tidy because information you need is contained in column names. To make it tidy, pivot your data longer so that information on depth is contained in a single column and you have multiple rows, one for each depth, for each of your current observations. I (and others) are not going to take the time to create your current data frame manually to show you how to do it. That's one of the reasons @stefan writes what they do... – Limey Mar 22 '23 at 09:08
  • 1
    @stefan Thanks for the welcome and the heads up! I believe things are in order now. – istheflesh Mar 22 '23 at 20:24
  • @Limey So it looks like I didn't understand how the tidyverse ecosystem operates. I'll transform the data per your suggestions and see where I land. Thanks for pointing that out! – istheflesh Mar 22 '23 at 20:27

1 Answers1

0

Here's a solution to your problem.

Based on the sample data you've posted, here's how to tidy your data:

df <- as_tibble(df) %>% 
        mutate(DATE=as.Date(DATE)) %>% 
        pivot_longer(
          starts_with("D_"), 
          names_to="Depth", 
          values_to="Moisture", 
          names_prefix="D_"
        ) %>% 
        mutate(Depth=as.numeric(Depth))
df
# A tibble: 66 × 4
   DATE       LOCATION Depth Moisture
   <date>     <chr>    <dbl>    <dbl>
 1 1966-09-14 S1S         15    NA   
 2 1966-09-14 S1S         46     3.81
 3 1966-09-14 S1S         76     2.13
 4 1966-09-14 S1S        107     2.67
 5 1966-09-14 S1S        137     2.44
 6 1966-09-14 S1S        168     2.74
 7 1966-09-14 S1S        198     2.44
 8 1966-09-14 S1S        229     2.74
 9 1966-09-14 S1S        259    10.4 
10 1966-09-14 S1S        290    11.5 
# … with 56 more rows

Now your UI can be greatly simplified: you need only a single widget for depth and a single widget for location, provided you add multiple=TRUE to each. You also need only a single filtered dataset.

I strongly urge against using xlim to filter by date. If you want to filter as you produce the plot, use coord_cartesian. xlim filters the dataset and then produces the plot. coord_cartesian produces the plot and then "zooms" to the requested region. Here, it makes no difference, but if you're imputing or modelling in any way, xlim (and ylim) will produce unexpected and incorrect results and the cause can be dificult to debug. It's never too soon to develop good habits.

A third option is to filter by date when constructing your filtered dataset, and that's what I've chosen to do here.

Since you've provided data from only a single date, I've added a geom_point to your plot so that you can actually see something.

Finally, if you wish, you can create a single date range sliderInput by providing a vector of length 2 as its value parameter.

ui <- fluidPage(
  titlePanel(
    "MEF Data Explorer"),
    title = "MEF Data Explorer",
    plotOutput('Plot'),
    hr(),
    fluidRow(
      column(
        3,
        h4("SELECT DATE RANGE"),
        sliderInput(
          "mindate", 
          "Min date:", 
          min = min(df$DATE), 
          max = max(df$DATE), 
          value = min(df$DATE)
        ),
        sliderInput(
          "maxdate", 
          "Max date:", 
          min = min(df$DATE), 
          max = max(df$DATE), 
          value = max(df$DATE)
        )
    ),
    column(
      4,
      h4("SELECT SITE"),
      selectInput(
        inputId = "subset", 
        label = NULL, 
        choices = c(" ", unique(df$LOCATION)), 
        multiple=TRUE
      )
    ),
    column(
      4,
      h4("SELECT DEPTH"),
      selectInput(
        inputId = "depth", 
        label = NULL, 
        choices = unique(df$Depth), 
        multiple=TRUE
      )
    )
  )
)

server <- function(input, output) {
  ## filter df by LOCATION
  filtered_data <- reactive({
    df %>% filter(
             LOCATION %in% input$subset, 
             Depth %in% as.numeric(input$depth),
             between(DATE, input$mindate, input$maxdate)
           )
  })
  
  ## ggplot selected data 
  output$Plot <- renderPlot({
    filtered_data() %>% 
      ggplot(aes(x = DATE, y=Moisture, colour=as.factor(Depth))) + 
        geom_line() +
        geom_point() +
        theme_light()
  })
}

shinyApp(ui, server)

enter image description here

Limey
  • 10,234
  • 2
  • 12
  • 32
  • Wow! So this significantly simplified what I had. Unfortunately, now that the legend is colored according to depth, if I look at a single depth at two different sites the lines will be the same color (this is the opposite of my initial problem). Is there a way to get the legend to respond to the unique combination of site AND depth? Perhaps this is not possible. Would it be simpler to provide two different tools in the same app; one that compares depths across sites and another that compares depths at a single site? – istheflesh Mar 29 '23 at 05:28
  • The greater simplicity is a direct result of tidying your data. It is *always* worth considering wrangling your data to fit your intended analysis functions rather than trying to shoehorn badly fitting data... Yes, you can differentiate sites. Simply add an additonal `aes`. For example, `shape=LOCATION`. – Limey Mar 29 '23 at 11:11