1

I am trying to trim (take difference of) a polygon by another polygon.

I've created SpatialPolygons with the package sp, and I can use rgeos::gDifference (from https://gis.stackexchange.com/a/54146/171788) for a duplicated (but shifted) polygon, but it doesn't work for the polygon of the states from ggplot2 (see below).

## Load data
library("ggplot2") 
states <- map_data("state") ## load state data
states<-states[states$region=="washington"|states$region=="oregon"|states$region=="california",]  ## trim to continental west coast

## load data for primary polygon
WCG<-structure(list(X = c(665004L, 665232L, 661983L, 663266L, 660980L, 
                          666562L, 660979L, 659316L, 661115L, 665803L, 663685L, 666535L, 
                          667728L, 660758L, 661000L, 665903L, 664469L, 659077L, 665725L
), Survey = c("WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", 
              "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", 
              "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS"
), Lat = c(33.07667, 32.91278, 32.70306, 32.57472, 32.0075, 31.99861, 
           32.01028, 32.28583, 32.38222, 32.81528, 40.13528, 40.25611, 48.07222, 
           48.175, 48.42278, 48.44444, 48.45083, 48.41556, 48.37028), Lon = c(-117.3383, 
                                                                              -117.2867, -117.2897, -117.3006, -118.3397, -118.6144, -118.8803, 
                                                                              -119.6567, -119.885, -120.2967, -125.07, -125.1383, -125.9614, 
                                                                              -125.9075, -125.1892, -124.9861, -124.8464, -124.7778, -124.755
           ), Date = c("7/15/2011", "7/17/2012", "7/17/2012", "7/17/2015", 
                       "7/14/2010", "10/12/2016", "7/14/2010", "7/15/2007", "10/13/2010", 
                       "10/9/2017", "6/22/2015", "9/18/2016", "5/29/2019", "5/26/2010", 
                       "8/24/2010", "5/23/2017", "5/29/2009", "8/22/2007", "8/25/2017"
           )), row.names = c(478258L, 478486L, 475237L, 476520L, 474234L, 
                             479816L, 474233L, 472570L, 474369L, 479057L, 476939L, 479789L, 
                             480982L, 474012L, 474254L, 479157L, 477723L, 472331L, 478979L
           ), class = "data.frame")

There is probably a simpler way to make polygons that can be subtracted from one another, but sp::Polygon doesn't work with rgeos::gDifference, so I convert to SpatialPolygons

library(sp)
WCGPoly<-Polygon(as.matrix(cbind(WCG$Lon,WCG$Lat)))
statesPoly<-Polygon(as.matrix(cbind(states$long,states$lat)))

crdref <- CRS('+proj=longlat +datum=WGS84')

p1 <- SpatialPolygons(list(Polygons(list(WCGPoly), "p1")),proj4string=crdref)
p2 <- SpatialPolygons(list(Polygons(list(statesPoly), "p2")),proj4string=crdref)

To visualize the difference we are trying to keep:

plot(p1,col="red")
plot(p2,add=T)

enter image description here

We want to keep the red that is off the west coast (not overlapped by the states).

library("rgeos")
Real <- gDifference(p1,p2)

Here I get a huge output, with errors:

p2 is invalid
Input geom 1 is INVALID: Self-intersection at or near point -122.33795166 48.281641190312918 (-122.33795166000000165 48.2816411903129179)
<A>
...
Error in RGEOSBinTopoFunc(spgeom1, spgeom2, byid, id, drop_lower_td, unaryUnion_if_byid_false,  : 
  TopologyException: Input geom 1 is invalid: Self-intersection at -122.33795166 48.281641190312918
In addition: Warning messages:
1: In RGEOSUnaryPredFunc(spgeom, byid, "rgeos_isvalid") :
  Self-intersection at or near point -122.33795166 48.281641190312918
2: In gDifference(p1, p2) :
  Invalid objects found; consider using set_RGEOS_CheckValidity(2L)

I am assuming this is because the state polygon isn't one cohesive polygon (state borders are intact somehow and lines intersect one another, even though I only fed Lat/Lon to Polygon.

If I create a test polygon just by shifting the initial polygon it works:

 testPoly<-Polygon(as.matrix(cbind(WCG$Lon+1,WCG$Lat)))
 p3 <- SpatialPolygons(list(Polygons(list(testPoly), "p3")),proj4string=crdref)

plot(p1,col="red")
plot(p3,add=T)

enter image description here

Test <- gDifference(p1,p3)
plot(Test,col="red")

enter image description here

I've tried combining the "states" polygon with various forms of "union" functions (https://gis.stackexchange.com/a/63696/171788, https://gis.stackexchange.com/a/385360/171788), but this doesn't really make sense to me because most examples are using two distinct polygons, not a polygon with multiple "separated" areas like these states.

Trying this different answer (https://gis.stackexchange.com/a/169597/171788):

x<- p1-p2
x<- erase(p1,p2)

I get the following error:

x is invalid
Attempting to make x valid by zero-width buffering
Warning message:
In RGEOSUnaryPredFunc(spgeom, byid, "rgeos_isvalid") :
  Self-intersection at or near point -122.33795166 48.281641190312918

Which again is telling me that my states polygon has a self-intersection. Is there a better way to trim my original polygon by the North American continent?

I've posted another (related) question here (How to use `polyclip::polysimplify` on a polygon) to understand how I might get rid of this self-intersection and create a solid polygon.

Any pointers would be greatly appreciated.

Dylan_Gomes
  • 2,066
  • 14
  • 29

2 Answers2

2

In case you are willing to switch from {sp} to {sf}, you might be looking for st_difference():

library(sf)
#> Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(dplyr)

# load data
states <- ggplot2::map_data("state") 

# trim to continental west coast, create sf from df
states <- states[states$region=="washington"|states$region=="oregon"|states$region=="california",] |> 
  st_as_sf(coords = c("long", "lat"), crs = "epsg:4326") 

# point to polygon
states <- states |> 
  group_by(region) |> 
  summarise(geometry = st_combine(geometry)) |> 
  st_cast("POLYGON") |> 
  st_make_valid()

# load data for primary polygon
WCG <- structure(list(X = c(
  665004L, 665232L, 661983L, 663266L, 660980L,
  666562L, 660979L, 659316L, 661115L, 665803L, 663685L, 666535L,
  667728L, 660758L, 661000L, 665903L, 664469L, 659077L, 665725L
), Survey = c(
  "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS",
  "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS",
  "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS"
), Lat = c(
  33.07667, 32.91278, 32.70306, 32.57472, 32.0075, 31.99861,
  32.01028, 32.28583, 32.38222, 32.81528, 40.13528, 40.25611, 48.07222,
  48.175, 48.42278, 48.44444, 48.45083, 48.41556, 48.37028
), Lon = c(
  -117.3383,
  -117.2867, -117.2897, -117.3006, -118.3397, -118.6144, -118.8803,
  -119.6567, -119.885, -120.2967, -125.07, -125.1383, -125.9614,
  -125.9075, -125.1892, -124.9861, -124.8464, -124.7778, -124.755
), Date = c(
  "7/15/2011", "7/17/2012", "7/17/2012", "7/17/2015",
  "7/14/2010", "10/12/2016", "7/14/2010", "7/15/2007", "10/13/2010",
  "10/9/2017", "6/22/2015", "9/18/2016", "5/29/2019", "5/26/2010",
  "8/24/2010", "5/23/2017", "5/29/2009", "8/22/2007", "8/25/2017"
)), row.names = c(
  478258L, 478486L, 475237L, 476520L, 474234L,
  479816L, 474233L, 472570L, 474369L, 479057L, 476939L, 479789L,
  480982L, 474012L, 474254L, 479157L, 477723L, 472331L, 478979L
), class = "data.frame")

# df to sf again
WCG <- st_as_sf(WCG, coords = c("Lon", "Lat"), crs = "epsg:4326") 

# point to polygon
WCG <- WCG |> 
  group_by(Survey) |> 
  summarise(geometry = st_combine(geometry)) |> 
  st_cast("POLYGON") 

# inspect 
plot(st_geometry(states))
plot(st_geometry(WCG), border = "red", add = TRUE)

Note the topological artifact in Washington. I was hoping to remove this via st_make_valid(), but was not able to do so unfortunately. However, I hope this does not cause significant problems in demonstrating the workflow.

I assume this is what causes st_intersection() and st_difference() to fail with the following message, but there seems to be a workaround using sf_use_s2(FALSE) (c.f. r-spatial/sf#1762).

Error in wk_handle.wk_wkb(wkb, s2_geography_writer(oriented = oriented,  : 
  Loop 0 is not valid: Edge 344 crosses edge 346 

So while st_intersection() returns the overlapping area between your two polygon datasets, st_difference() returns the not overlapping area and this should be exactly what you're looking for, as far as I understand.

sf_use_s2(FALSE)

# intersect
x <- st_intersection(states, WCG)
plot(st_geometry(x))

enter image description here

# difference
y <- st_combine(states) |> st_union() |> st_difference(WCG, y = _) 
plot(st_geometry(y))

enter image description here

Update:

Just had to dig a little bit further and I'd say, that geometries acquired by map_data() seem to be broken from the beginning.

# cast MULTIPOLYGON to POLYGON
wash <- states[3, ] |> st_cast("POLYGON")

wash
#> Simple feature collection with 5 features and 1 field
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: -124.6813 ymin: 45.53296 xmax: -116.9235 ymax: 49.00508
#> Geodetic CRS:  WGS 84
#> # A tibble: 5 × 2
#>   region                                                                geometry
#>   <chr>                                                            <POLYGON [°]>
#> 1 washington ((-123.0198 48.56963, -123.0542 48.61547, -123.0943 48.60974, -123…
#> 2 washington ((-123.0198 48.56963, -122.825 48.40347, -122.7677 48.60974, -122.…
#> 3 washington ((-122.825 48.40347, -122.7963 48.40921, -122.7677 48.40921, -122.…
#> 4 washington ((-122.7677 48.60974, -122.7505 48.63839, -122.7161 48.65557, -122…
#> 5 washington ((-116.9521 46.06581, -116.9235 46.16894, -116.9579 46.20905, -116…

# get coordinates of doubtful polygon
c <- wash[2, ] |> st_coordinates() 

head(c)
#>              X        Y L1 L2
#> [1,] -123.0198 48.56963  1  1
#> [2,] -122.8250 48.40347  1  1
#> [3,] -122.7677 48.60974  1  1
#> [4,] -122.6703 48.17429  1  1
#> [5,] -116.9292 45.99705  1  1
#> [6,] -123.0198 48.56963  1  1

plot(c)
lines(c, type = "l", col = "red")

As you can clearly see, there are at least four displaced points, hence the artifacts, especially because of the fifth one (-116.9292 45.99705).

However, I do not get why most of this (you can still see the little triangle...) disappears when you plot the sf object instead of its geometry. Thought you'd just drop attributes this way without changing (just getting) geometry information.

plot(states[3, ])

dimfalk
  • 853
  • 1
  • 5
  • 15
0

I wasn't able to solve this with the original states dataset from ggplot2, which is what is causing issues.

Instead I followed the beginning of this post (https://pjbartlein.github.io/REarthSysSci/RMaps2.html) to get a spatialpolygon of world land from naturalearthdata.com (code below)

Shape file: https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_land.zip

## download shape file: https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_land.zip

library(rgdal)
layer <- ogrListLayers("../../../Downloads/ne_10m_land/ne_10m_land.shp")
ogrInfo("../../../Downloads/ne_10m_land/ne_10m_land.shp", layer=layer)
coast_lines <- readOGR("../../../Downloads/ne_10m_land/ne_10m_land.shp", layer=layer)

WCG<-structure(list(X = c(665004L, 665232L, 661983L, 663266L, 660980L, 
                          666562L, 660979L, 659316L, 661115L, 665803L, 663685L, 666535L, 
                          667728L, 660758L, 661000L, 665903L, 664469L, 659077L, 665725L
), Survey = c("WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", 
              "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", 
              "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS", "WCGBTS"
), Lat = c(33.07667, 32.91278, 32.70306, 32.57472, 32.0075, 31.99861, 
           32.01028, 32.28583, 32.38222, 32.81528, 40.13528, 40.25611, 48.07222, 
           48.175, 48.42278, 48.44444, 48.45083, 48.41556, 48.37028), Lon = c(-117.3383, 
                                                                              -117.2867, -117.2897, -117.3006, -118.3397, -118.6144, -118.8803, 
                                                                              -119.6567, -119.885, -120.2967, -125.07, -125.1383, -125.9614, 
                                                                              -125.9075, -125.1892, -124.9861, -124.8464, -124.7778, -124.755
           ), Date = c("7/15/2011", "7/17/2012", "7/17/2012", "7/17/2015", 
                       "7/14/2010", "10/12/2016", "7/14/2010", "7/15/2007", "10/13/2010", 
                       "10/9/2017", "6/22/2015", "9/18/2016", "5/29/2019", "5/26/2010", 
                       "8/24/2010", "5/23/2017", "5/29/2009", "8/22/2007", "8/25/2017"
           )), row.names = c(478258L, 478486L, 475237L, 476520L, 474234L, 
                             479816L, 474233L, 472570L, 474369L, 479057L, 476939L, 479789L, 
                             480982L, 474012L, 474254L, 479157L, 477723L, 472331L, 478979L
           ), class = "data.frame")


library(sp)
WCGPoly<-Polygon(as.matrix(cbind(WCG$Lon,WCG$Lat)))

crdref <- CRS('+proj=longlat +datum=WGS84')
p1 <- SpatialPolygons(list(Polygons(list(WCGPoly), "p1")),proj4string=crdref)

plot(p1)
plot(coast_lines,add=T)

enter image description here

library(rgeos)
trim<-gDifference(p1,coast_lines)

plot(trim)

enter image description here

Dylan_Gomes
  • 2,066
  • 14
  • 29