0

I need to plot my offshore data on top of a map of the US. The ggplot answer by jlhoward was helpful but I am trying to overlay my own data on top of it and having problems. My data is in a data.frame as follows:

> dput(dat)
 structure(list(YEAR = 1982:2014, Longitude = c(-70.412294, -72.364029, 
-71.718199, -71.026232, -70.057038, -71.256635, -71.33416, -71.921617, 
-70.764959, -69.996114, -70.735911, -70.804216, -70.971903, -70.380952, 
-70.840294, -70.246243, -69.998864, -70.415875, -70.803285, -71.590647, 
-70.911025, -70.893613, -72.290058, -72.178312, -71.545455, -72.59155, 
-72.386746, -72.427943, -72.799752, -72.897847, -71.893173, -72.749717, 
-69.732889), Latitude = c(40.723863, 39.711704, 40.196502, 40.38192, 
40.618407, 40.461734, 40.63319, 40.255586, 40.566896, 40.636625, 
40.658859, 40.600427, 40.496296, 40.886167, 40.597873, 40.74334, 
40.996396, 40.88574, 41.015681, 40.751503, 40.917864, 40.756501, 
40.370116, 40.232457, 40.761132, 39.940429, 40.312277, 40.165928, 
39.83173, 39.814042, 40.530672, 39.967331, 40.730932), biomass = c(0.338144811453591, 
0.279218697044777, 0.237201626514534, 0.336660946471182, 0.342330121351236, 
0.369994373320721, 0.324471285554554, 0.329548963755295, 0.385566422852207, 
0.371231941937523, 0.363874003449712, 0.394639716203538, 0.38023963566932, 
0.399016421268399, 0.386505432090588, 0.42981456156909, 0.343536916969732, 
0.39892372171312, 0.355308306725635, 0.336114098583543, 0.41596425093632, 
0.329576428474592, 0.306697644479785, 0.262190121610882, 0.379213749266384, 
0.362876021807967, 0.409606747502569, 0.358005533873342, 0.308136419268288, 
0.370401008590535, 0.400519345003107, 0.381512816639217, 0.401243046609029
), code = c("late", "early", "average", "average", "average", 
"average", "late", "average", "late", "average", "average", "average", 
"late", "average", "late", "late", "average", "average", "average", 
"average", "average", "average", "average", "average", "average", 
"average", "average", "average", "average", "average", "early", 
"early", "average"), alpha = c(1, 1, 0.5, 0.5, 0.5, 0.5, 1, 0.5, 
1, 0.5, 0.5, 0.5, 1, 0.5, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1, 1, 0.5)), .Names = c("YEAR", 
"Longitude", "Latitude", "biomass", "code", "alpha"), row.names = c(NA, 
-33L), class = "data.frame")

The code I have for plotting is.

library(raster)
library(maps)
library(ggmap)
library(ggplot2)
library(scales)
states    <- c('Maine', 'New Hampshire', 'Vermont', 'Massachusetts', 'Rhode Island', 'Connecticut','New York', 'Pennsylvania','West Virginia', 'Delaware', 'New Jersey', 'Maryland', 'Virginia', 'North Carolina')
provinces <- c("Nova Scotia", 'New Brunswick')
us <- getData("GADM",country="USA",level=1)
canada <- getData("GADM",country="CAN",level=1)
us.states <- us[us$NAME_1 %in% states,]
ca.provinces <- canada[canada$NAME_1 %in% provinces,]
xlim <- c(-77, -65)
ylim <- c(35,45)

ggplot(us.states, aes(x=long, y=lat,group=group))+
  geom_path()+
  geom_path(data=ca.provinces)+
  labs(title='title') +
  coord_map(xlim=xlim, ylim=ylim, project='mercator') +
  geom_point(aes(x=Longitude,y=Latitude,size=biomass),
           color='black',fill='gray',
           data=dat[dat$code=='average' & dat$YEAR>1982,],
           shape=21,alpha=1) + 
   scale_size_continuous(name='Biomass') +
   geom_point(aes(x=Longitude,y=Latitude,size=biomass),
           color='black',fill='red',
           data=dat[dat$code=='early',],shape=21,alpha=1) +   
   geom_point(aes(x=Longitude,y=Latitude,size=biomass),
           color='black',fill='cyan',data=dat[dat$code=='late',],
           shape=21,alpha=1)+    
   geom_point(aes(x=Longitude,y=Latitude,size=biomass),
           color='black',fill='gray',
           data=dat[dat$code=='average' & dat$YEAR==1982,],
           shape=21,alpha=1) + 
   # to fix the color in the scale
   labs(x = "Longitude", y = "Latitude", size = 20) +
   theme(axis.text = element_text(size = rel(1.25)),
           axis.title = element_text(size = rel(1.25)),
           plot.title = element_text(size = rel(2))) +
   theme_bw()

I receive the following error message.

Error in eval(expr, envir, enclos) : object 'group' not found

Does anyone have thoughts on what is going wrong here? Everything runs fine if I comment out the code below coord_map(...).

Community
  • 1
  • 1
struggleBus
  • 365
  • 2
  • 5
  • 20
  • 1
    Please use dput(dat) and paste the output where you currently have head(dat). That will allow us to replicate the data. You might have to do dput(head(dat)) if its large. – Michael Davidson Aug 26 '16 at 17:25
  • please provide a reproducible working example if you want help – Cyrus Mohammadian Aug 26 '16 at 17:26
  • I added the dput(dat) output. Also, I just figured out that it almost works if I take out the 'group=group' part in ggplot(). Almost.. – struggleBus Aug 26 '16 at 18:09
  • That's because you are referring to a column called 'group' in your data, which does not exist. If you want to group by a specific column, you need to enter the correct column name in `group=xxx` – dww Aug 26 '16 at 18:10

1 Answers1

1

Because you specify group = group in the main call to aes(), ggplot expects each geom to contain a column called "group". In your geom_point calls, you specify a data set that has no "group" column. This is the main source of your error (the other is that your code contains a chunk of disembodied code that appears to have been pasted incorrectly).

You can tell each call to geom_point to reset the "group" variable by specifying group = 1.

This code:

ggplot(us.states, aes(x=long, y=lat,group=group))+
    geom_path()+
    geom_path(data=ca.provinces)+
    labs(title='title') +
    coord_map(xlim=xlim, ylim=ylim, project='mercator') +
    geom_point(aes(x=Longitude,y=Latitude,size=biomass), color='black',fill='gray', data=dat[dat$code=='average' & dat$YEAR>1982,], shape=21,alpha=1, group = 1) + 
    scale_size_continuous(name='Biomass') +   
    geom_point(aes(x=Longitude,y=Latitude,size=biomass), group = 1, color='black',fill='cyan',data=dat[dat$code=='late',],shape=21,alpha=1) +    
    geom_point(aes(x=Longitude,y=Latitude,size=biomass),group = 1, color='black',fill='gray', data=dat[dat$code=='average' & dat$YEAR==1982,],shape=21,alpha=1) + 
    # to fix the color in the scale
    labs(x = "Longitude", y = "Latitude", size = 20) +
    theme(axis.text = element_text(size = rel(1.25)),
          axis.title = element_text(size = rel(1.25)),
          plot.title = element_text(size = rel(2))) +
    theme_bw()

Produces this image:

enter image description here

Edit: An alternate take

The version of the plot below cleans up the code in several ways:

  1. The "dat" data set is now the main data set. The Canada and US map data are referenced only to draw boundary lines, so you save more typing this way. To ensure that all the "late" points are plotted above the others, I use dplyr::arrange on the data set beforehand (this puts all the "late" points alphabetically last, so they will be plotted last).
  2. Points are plotted with a single call to geom_point. Fill and size are defined as aesthetics, saving you the redundancy of multiple geom_point calls.
  3. Plot elements are arranged by order of importance: main call, then geoms, then coordinate specification, then scales, then labels and theming.

Overall this makes the plotting code easier to follow.

library(dplyr)
dat <- arrange(dat, code)

ggplot(data = subset(dat, YEAR > 1982), aes(x = Longitude, y = Latitude)) +
    geom_path(data = us.states, aes(x = long, y = lat, group = group)) +
    geom_path(data=ca.provinces, aes(x = long, y = lat, group = group)) +
    geom_point(aes(fill = code, size = biomass), color='black', shape = 21) + 
    coord_map(xlim = xlim, ylim = ylim, project = 'mercator') +
    scale_fill_manual(values = c(early = 'gray', average = 'gray', late = 'cyan')) +
    scale_size_continuous(name = 'Biomass') +
    labs(title = 'Title', x = 'Longitude', y = 'Latitude') +
    theme(axis.text = element_text(size = rel(1.25)),
          axis.title = element_text(size = rel(1.25)),
          plot.title = element_text(size = rel(2))) +
    theme_bw()

enter image description here

jdobres
  • 11,339
  • 1
  • 17
  • 37
  • Your suggestion makes sense but I couldn't get your code to work (Also, I fixed the pasting error, thanks for pointing it out). I am still getting that same group error. – struggleBus Aug 26 '16 at 18:54
  • @ jdobres it may not be the best fix, but I ended up adding a column called 'group' to my data.frame that was filled with rep(1). And within the aes(...) argument, I added group=group. Thanks for your suggestions! – struggleBus Aug 26 '16 at 19:02
  • I've added an alternate take that significantly cleans up the code. Hopefully this version will run on your system? – jdobres Aug 26 '16 at 19:22