1

I am quite new to R in general and currently struggling with a specific plot I want to create.

Preferably I would like to group my boxplots (2 per Area -> Good Habitat, average habitat etc.) and add a legend. Since I am using two datasets from two separate excel spreadsheets using the fill = Area command in the aesthetics does not help me group the plots or give me a legend. I also tried manually adding a legend as seen in other posts, but again since I am using two datasets it does not work.

This is the base code I use for this plot. The striped boxplots are supposed to show that larva are present whereas the empty boxplots show the availability of hostplant:

#DATASETS 
ControlPlots <- read_excel("ApolloLarva_EnvVari_PerGrid.xlsx", sheet = "ControlPlots_InHabitat")
LarvaDataAreas <- read_excel("ApolloLarva_M_CombinedAreas.xlsx", sheet = "OnlyLarvaPlots")

#Renaming Areas for x-axis 
ControlPlots$Area <- gsub("^.*Core", "Good Habitat", ControlPlots$Area) 
LarvaDataAreas $Area <- gsub("^.*Core_Area", "Good Habitat", LarvaDataAreas $Area)
ControlPlots$Area <- gsub("^.*Ref", "Average Habitat", ControlPlots$Area) 
LarvaDataAreas $Area <- gsub("^.*Ref_Area", "Average Habitat", LarvaDataAreas $Area)
ControlPlots$Area <- gsub("^.*Rest2020", "Targeted Restoration", ControlPlots$Area) 
LarvaDataAreas $Area <- gsub("^.*Rest_Area_2020", "Targeted Restoration", LarvaDataAreas $Area)
ControlPlots$Area <- gsub("^.*Rest2021", "Non-targeted Restoration", ControlPlots$Area) 

#Plot 
a <- ggplot(ControlPlots, aes(x = Area, y = Hostplant)) + 
geom_boxplot(data= ControlPlots, colour = "black") +  
geom_boxplot_pattern(data= LarvaDataAreas,fill = "white", colour = "black",
pattern_density = 0.02, pattern_spacing = 0.01, 
pattern_fill = 'black', pattern_colour  = 'black', alpha = 0.8) + 
  theme_bw() +
  labs(x = "Areas",
       y = "Hostplant cover [%]", 
       title = "") +
  theme() 

What the plot currently looks like

If anyone has any tips, I would be really grateful! Thank you!!

UPDATE: Dataset:

dput(ControlPlots2)
structure(list(Area = c("Rest2020", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Core", "Core", 
"Core", "Core", "Core", "Core", "Core", "Core", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", "Ref", 
"Ref", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", "Rest2020", 
"Rest2020", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", "Rest2021", 
"Rest2021"), GridID = c(13069, 11053, 11053, 11053, 11053, 11053, 
11053, 11053, 11053, 11053, 11053, 11053, 11053, 11053, 11052, 
11052, 11052, 11052, 11052, 11052, 11052, 11052, 11052, 11052, 
11052, 11051, 11051, 11051, 11051, 11051, 11051, 11154, 11154, 
11154, 11154, 11154, 11154, 11154, 11154, 11154, 11154, 11153, 
11153, 11153, 11153, 11153, 11153, 11153, 11153, 11153, 11153, 
11153, 11153, 11153, 11153, 11152, 11152, 11152, 11152, 11152, 
11152, 11152, 11152, 11152, 11152, 11255, 11255, 11255, 11255, 
11255, 11255, 11255, 11255, 11255, 11255, 11255, 11254, 11254, 
11254, 11254, 11254, 11254, 11254, 11254, 11254, 11254, 11254, 
11253, 11253, 11253, 11253, 11253, 11253, 11253, 11253, 11253, 
11253, 11253, 11253, 12161, 12161, 12161, 12161, 12161, 12161, 
12160, 12160, 12160, 12263, 12263, 12263, 12263, 12263, 12263, 
12263, 12262, 12262, 12262, 12262, 12262, 12262, 12262, 12262, 
12262, 12261, 12261, 12261, 12261, 12261, 12261, 12261, 12261, 
12466, 12466, 12466, 12466, 12466, 12466, 12466, 12567, 12567, 
12567, 12567, 12567, 12567, 12668, 12668, 12668, 12668, 12668, 
12668, 12668, 12668, 12667, 12667, 12667, 13272, 13272, 13272, 
13272, 13272, 13272, 13272, 13272, 13171, 13171, 13171, 13171, 
13171, 13171, 13171, 13171, 13171, 13171, 13171, 13171, 13171, 
13171, 13171, 13171, 13171, 13171, 13171, 13170, 13170, 13170, 
13170, 13170, 13170, 13170, 13170, 13170, 13170, 13170, 13170, 
13170, 13170, 13170, 13170, 13069, 13069, 13069, 13069, 13069, 
10867, 10867, 10867, 10867, 10766, 10766, 10766, 10766, 10766, 
10766, 10766, 10767, 10767, 10767, 10767, 10767, 10767, 10767, 
10767, 10767, 10768, 10768, 10768, 10768, 10768, 10768, 10768, 
10666, 10666, 10666, 10666, 10666, 10666, 10666, 10666, 10666, 
10666, 10666, 10666, 10666, 10666, 10666, 10565, 10565, 10565, 
10565, 10565, 10565, 10566, 10566, 10566, 10566, 10566, 10566, 
10566, 10566, 10566, 10566, 10568, 10568, 10568, 10568, 10568, 
10568, 10568, 10568, 10568, 10568, 10668, 10668, 10668, 10668, 
10668, 10668, 10668, 10668, 10668, 10668, 10669, 10669, 10669, 
10669, 10669, 10669, 10669, 10669, 10669, 10669, 10669, 10669, 
10770, 10770, 10770, 10770, 10770, 10770, 10770, 10770, 10770, 
10770, 10770, 10770), Hostplant = c(0, 1, 0, 0, 5, 6, 12, 14, 
0, 5, 12, 13, 16, 3, 1, 0, 0, 14, 0, 2, 2, 10, 6, 0, 0, 0, 0, 
0, 0, 0, 1, 23, 2, 3, 0, 14, 5, 0, 0, 0, 0, 3, 1, 4, 0, 6, 6, 
2, 9, 3, 6, 7, 36, 16, 3, 2, 1, 0, 4, 16, 1, 0, 8, 0, 4, 0, 9, 
7, 0, 2, 8, 4, 0, 9, 2, 7, 16, 1, 0, 0, 5, 2, 5, 0, 4, 13, 4, 
4, 9, 0, 6, 1, 1, 1, 1, 1, 3, 22, 1, 0, 0, 1, 22, 0, 1, 0, 0, 
0, 2, 0, 3, 7, 0, 17, 0, 2, 4, 5, 9, 0, 3, 3, 0, 0, 1, 2, 4, 
0, 1, 6, 0, 6, 6, 6, 15, 2, 25, 0, 5, 21, 14, 0, 3, 2, 3, 0, 
12, 10, 2, 0, 13, 4, 0, 1, 11, 6, 0, 1, 0, 0, 3, 0, 0, 0, 0, 
0, 0, 3, 0, 0, 0, 0, 3, 18, 0, 0, 3, 0, 13, 0, 0, 0, 0, 0, 5, 
0, 5, 0, 5, 10, 7, 0, 3, 21, 4, 0, 3, 0, 0, 0, 0, 0, 3, 15, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), LarvaeCount = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -306L))

Larva Dataset: > dput(LarvaDataAreas2)
structure(list(Area = c("Rest_Area_2020", "Rest_Area_2020", "Rest_Area_2020", 
"Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", 
"Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", "Ref_Area", 
"Ref_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area", "Core_Area", "Core_Area", 
"Core_Area", "Core_Area", "Core_Area"), FID = c(241, 243, 291, 
226, 162, 150, 151, 154, 156, 158, 161, 174, 179, 181, 210, 213, 
24, 1, 2, 3, 5, 7, 11, 12, 13, 14, 16, 17, 18, 19, 23, 29, 30, 
31, 36, 38, 41, 44, 64, 78, 108, 123, 135), Hostplant = c(5, 
9, 15, 17, 24, 4, 9, 6, 8, 12, 12, 11, 19, 18, 11, 12, 17, 5, 
10, 24, 8, 12, 11, 10, 5, 20, 0, 4, 24, 8, 4, 1, 5, 4, 4, 2, 
8, 8, 4, 9, 16, 5, 29)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -43L))
Quinten
  • 35,235
  • 5
  • 20
  • 53
  • Can you make your post [reproducible](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) and provide your datasets using `dput()`? I can't say for certain because I can't access your data, but it would likely be easier to solve this issue if you could merge your two datasets into one, assuming they have some common identifier to merge by. – jrcalabrese Jan 18 '23 at 14:51
  • I see.. I guess I could add a column "Larva Count" to the Larva Dataset and put all values to 1. Then merge the datasets and when making the plot use group or fill = LarvaCount... Is this something you had in mind? Thank you for helping! – beginEcologist Jan 18 '23 at 16:02

1 Answers1

1

As suggested in the comments, easiest is to bind the data frames by using a source identifying column as an aesthetic. dplyr::bind_rows allows you to easily create an ID column "on the fly".

library(tidyverse)
library(ggpattern)

## bind data sets
df <- bind_rows(test = LarvaDataAreas, control = ControlPlots, .id = "control")
ggplot(df, aes(x = Area, y = Hostplant)) +  
  geom_boxplot_pattern(aes(pattern = control),
                       pattern_density = 0.02, pattern_spacing = 0.01,
                       pattern_colour  = 'black', alpha = 0.8) +
  scale_pattern_manual(NULL, values=c("none", "stripe")) 

tjebo
  • 21,977
  • 7
  • 58
  • 94