0

I'm very new to using loops and functions with ggplot. I made a function to loop through a column in my data frame called "HUC14". For each unique value of HUC14 it creates a subset data frame to be used in ggplot and it also uses that unique value to use as the title. I'm wondering however if I can also loop through another column in my data frame to add to the title of plot as well as the HUC14 number? The code I have used doesn't change the plot or the HUC14 it only loops through the name..Not sure what I am doing wrong! I want the HUC14 and Name to match with the given value for the two parameters I want to plot!

Sample Data:

structure(list(stdate = structure(c(11359, 16498, 12149, 12437, 
13277, 17536, 16517, 16503, 16134, 16105, 15783, 16470, 14266, 
13566, 14984), class = "Date"), orgid = c("USGS-NJ", "USGS-NJ", 
"USGS-NJ", "21NJDEP1", "21NJDEP1", "USGS-NJ", "NJDEP_BFBM", "NJDEP_BFBM", 
"NJDEP_BFBM", "USGS-NJ", "NJDEP_BFBM", "USGS-NJ", "21NJDEP1", 
"GSWA", "NJDEP_BFBM"), locid = c("USGS-01396030", "USGS-01378560", 
"USGS-01393400", "21NJDEP1-01396030", "21NJDEP1-AN0770", "USGS-01378560", 
"NJDEP_BFBM-01394180", "NJDEP_BFBM-AN0425A", "NJDEP_BFBM-01394180", 
"USGS-01378560", "NJDEP_BFBM-01394180", "USGS-01394500", "21NJDEP1-01379525", 
"GSWA-LB4S", "NJDEP_BFBM-01379525"), sttime = structure(c(34200, 
50400, 80280, 35700, 0, NA, 41400, 45300, 39600, 46800, 40500, 
42300, 34800, 42900, 37380), class = c("hms", "difftime"), units = "secs"), 
    valunit = c("uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", 
    "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", 
    "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", "uS/cm @25C", 
    "uS/cm @25C"), swqs = c("FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", 
    "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", 
    "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT", "FW2-NT"), WMA = c(7L, 
    5L, 7L, 7L, 16L, NA, 7L, 9L, 7L, 5L, 7L, 7L, 6L, 6L, 6L), 
    year = c(2001L, 2015L, 2003L, 2004L, 2006L, NA, 2015L, 2015L, 
    2014L, 2014L, 2013L, 2015L, 2009L, 2007L, 2011L), locid2 = c("01396030", 
    "01378560", "01393400", "01396030", "AN0770", "01378560", 
    "01394180", "AN0425A", "01394180", "01378560", "01394180", 
    "01394500", "01379525", "LB4S", "01379525"), HUC14 = c("HUC02030104050090", 
    "HUC02030103180010", "HUC02030104020020", "HUC02030104050090", 
    "HUC02040206230040", "HUC02030103180010", "HUC02030104050040", 
    "HUC02030105120120", "HUC02030104050040", "HUC02030103180010", 
    "HUC02030104050040", "HUC02030104050040", "HUC02030103010190", 
    "HUC02030103010040", "HUC02030103010140"), MonLocName = c("Rahway R S Br in Merrill Park off Fairview Rd in Woodbridge", 
    "Coles Bk at Hackensack", "Elizabeth R at Hillside", "Rahway R S Br in Merrill Park off Fairview Rd in Woodbridge", 
    "Green Ck on Rt 47 in Middle Twp", "Coles Bk at Hackensack", 
    "Rahway R trib at Springfield", "Ambrose Bk at Behmer Rd in Piscataway", 
    "Rahway R trib at Springfield", "Coles Bk at Hackensack", 
    "Rahway R trib at Springfield", "Rahway R near Springfield", 
    "Canoe Bk on Parsonage Hill Rd in Millburn Twp", "Loantaka Bk at Woodland Ave (upstream)", 
    "Canoe Bk on Parsonage Hill Rd in Millburn Twp"), Chloride = structure(c(903, 
    2100, NA, 1409.3, 151, NA, 1340, 52.062, 1170, 1020, 1240, 
    1030, 1220, 209, 1040), na.action = structure(c(1L, 2L, 3L, 
    4L, 7L, 8L, 9L, 10L), class = "omit")), Specific_conductance = structure(c(7450, 
    7190, 6080, 5550, 4680, 4490, 4250, 4090, 3890, 3710, 3710, 
    3580, 3570, 3570, 3380), na.action = structure(5:10, class = "omit")), 
    tds = structure(c(1620, 3630, NA, 3056, 606, NA, 2530, 141, 
    2590, 1840, 2050, 1970, 57, 604, 1870), na.action = structure(1:6, class = "omit"))), .Names = c("stdate", 
"orgid", "locid", "sttime", "valunit", "swqs", "WMA", "year", 
"locid2", "HUC14", "MonLocName", "Chloride", "Specific_conductance", 
"tds"), class = c("data.table", "data.frame"), row.names = c(NA, 
-15L), .internal.selfref = <pointer: 0x00000000028f0788>)

Code I'm Using:

corr_plots<-function(df,x,y){

  # create list of HUCs in data to loop over 
  HUC_list <- unique(df2$HUC14)
  name_list<-unique(df2$MonLocName)

  for (i in seq_along(HUC_list)) { 
    for(j in seq_along(name_list)){
        x_var <- enquo(x)
        y_var <- enquo(y)

      plot<-ggplot(subset(df2, df2$HUC14==HUC_list[i]),
             aes(x = !!x_var, y = !!y_var))+
      geom_point(size=2,alpha=0.5)+
      geom_smooth(method = "lm", se = FALSE)+ 

      scale_x_continuous(limits = c(0,6200), expand = c(0, 0)) +
      scale_y_continuous(limits = c(0,2000), expand = c(0, 0)) +
        ggtitle(paste(HUC_list[i],as.character(name_list[j])))


      print(plot)

    }
  }
}

Working Example Without Loop:

corr_plots<-function(df,HUC,x,y){

        x_var <- enquo(x)
        y_var <- enquo(y)

      ggplot(subset(df, HUC14 == HUC),
             aes(x = !!x_var, y = !!y_var))+
      geom_point(size=2,alpha=0.5)+
      geom_smooth(method = "lm", se = FALSE)+ 

      scale_x_continuous(limits = c(0,6200), expand = c(0, 0)) +
      scale_y_continuous(limits = c(0,2000), expand = c(0, 0)) 

  }
corr_plots(df2,"HUC02030104020020",Specific_conductance,Chloride)
NBE
  • 641
  • 2
  • 11
  • 33
  • I think your problem is in the subset you are using for the plot, it take only the HUC_list variable and not the name_list. I think that changing that may work – Santiago I. Hurtado Sep 19 '18 at 14:45
  • In your example, one of your `HUC14` has multiple `MonLocName`. What would you do in that case? (See HUC02030104050040). If each HUC had a unique name you could use the `i` index to refer to the `name_list` for each plot and you don't need nested second loop. – aosmith Sep 19 '18 at 14:54
  • @SantiagoHurtado How can I do that? – NBE Sep 19 '18 at 14:55
  • @aosmith Sorry totally forgot about that... Some HUC14s have multipe different MonLocNames. There are 26 unique values for HUC14 and 84 unique values for MonLocName – NBE Sep 19 '18 at 14:57
  • subset(df2, df2$HUC14==HUC_list[i] & df2$MonLocName ==name_list[j]) – Santiago I. Hurtado Sep 19 '18 at 15:00
  • 1
    So do you want a separate plot for each HUC/name combination? If you want that you would need to subset to the appropriate HUC/name combination. Also, if you're comfortable with loops you should def stick with your approach, but this is a situation that I find `split()` and then `lapply()` or `map()` handy for. – aosmith Sep 19 '18 at 15:02
  • Yes I want a separate plot for each HUC/name combination! This is my first time using loops with ggplot. I tried using map() but was unsuccessful. – NBE Sep 19 '18 at 15:05
  • @SantiagoHurtado That didn't work.. It's still giving me the same thing – NBE Sep 19 '18 at 15:09
  • @aosmith anyway you can guide me on how to accomplish this with my current approach? – NBE Sep 19 '18 at 15:23
  • Maybe make a new variable that is the combination of `HUC14` and `MonLocName` and loop through that new variable? You could then use that variable as the plot name, as well. – aosmith Sep 19 '18 at 15:24
  • @aosmith Not sure what you mean by that? How could I combine HUC14 and MonLocname? – NBE Sep 19 '18 at 15:30
  • @KWANGER: can you post one working example (function call + plot) without using the `for` loop? How can you use `lm` when one `MonLocnam` has only 1 NA value? – Tung Sep 19 '18 at 16:27
  • I think this can be useful https://stackoverflow.com/a/50522928/ – Tung Sep 19 '18 at 16:30
  • @Tung Added working example without loop. Also, thanks for link! – NBE Sep 19 '18 at 16:33
  • @KWANGER: how do you call that function? `x = ?`, `y = ?`. What does the plot look like? – Tung Sep 19 '18 at 16:35
  • 1
    I mean literally make a new variable from your two existing variables that is the combination of their levels. You can then use as your variable for looping and subsetting and plot naming. Like `df$combo = with(df, paste(HUC14, MonLocName))`. – aosmith Sep 19 '18 at 16:35

1 Answers1

2

You can do this many ways, I find the tidyverse / purrr approach a nice balance of flexibility and brevity:

library(tidyverse)

corr_plot <- function(df, x, y, title) {
  x_var <- enquo(x)
  y_var <- enquo(y)

  ggplot(df, aes(x = !!x_var, y = !!y_var)) +
    geom_point(size = 2) +
    geom_smooth(method = "lm", se = FALSE) +
    scale_x_continuous(limits = c(0, 6200), expand = c(0, 0)) +
    scale_y_continuous(limits = c(0, 2000), expand = c(0, 0)) +
    labs(
      title = title,
      subtitle = paste(rlang::quo_text(x_var), "vs.", rlang::quo_text(y_var))
    )
}

nested_by_HUC14 <- 
  df %>%
  group_by(HUC14) %>% 
  nest()

nested_by_HUC14 %>%
  mutate(plot = map2(data, HUC14, ~ corr_plot(.x, Specific_conductance, Chloride, .y))) %>%
  walk(print(.$plot))

If you do not want [[1]]...[[2]]...etc printed to the console, you can change the walk statement to pull(plot) %>% walk(print)

JasonAizkalns
  • 20,243
  • 8
  • 57
  • 116
  • I am getting the following error: Error in UseMethod("group_by_") : no applicable method for 'group_by_' applied to an object of class "function" – NBE Sep 19 '18 at 18:16
  • 1
    What does `df` look like for you? It should match your sample data. That is `df <- list(stdate = structure(c...` – JasonAizkalns Sep 19 '18 at 18:45
  • Sorry I was using the wrong dataframe, it works! However, I was looking to also loop through the column "MonLocName" and add that to the title along with the HUC column. Your answer only loops through the HUC column – NBE Sep 19 '18 at 18:53