2

I have 100 sampling points located across my study area as well as spatial polygons and rasters describing the environmental context of this study area. I'm interested in calculating a weighted sum of the area (for polygons) or weighted mean (for rasters) around each sampling point based on a non-linear function that describes how the relative importance of the environmental context variable decreases as you get further away from the sampling point.

This is the non-linear equation, where r is the distance (km) from the sampling point: w(r)=0.0382131 × exp⁡(-0.49r)

I haven't had much luck finding previous examples of this being attempted on spatial data. At this point, my strategy is to create many buffer zones of increasing size around each sampling point, quantify the area of the polygons or mean of the raster in each one, then apply the above function on the resulting numeric vector.

Here's a toy example where I calculate the weighted values in the simplistic way mentioned above:

library(raster)
library(mapview)
library(sp)
library(rgeos)
library(maptools)

#loading a raster from the raster package
filename <- system.file("external/test.grd", package="raster")
r <- raster(filename)

r<-projectRaster(r, crs="+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")

#generating some coordinates
coords_df<-data.frame( long=c(5.75718, 5.74224, 5.73521),lat=c(50.98469, 50.97551, 50.96372))

xy<-SpatialPoints(coords_df, proj4string = CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))

xy_df<-SpatialPointsDataFrame(coords_df,data=data.frame(ID=c(1,2,3), row.names=c(1,2,3)), match.ID=T, proj4string = CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"))

#creating some polygons
poly1_xcoord<-c(5.757752, 5.758310, 5.756508, 5.755950)

poly1_ycoord<-c(50.986693,50.985828,50.985072,50.986017)

poly2_xcoord<-c(5.739311,5.740770,5.741907,  5.740480)
poly2_ycoord<-c(50.976658,50.975942,50.976253,50.976929)

poly3_xcoord<-c(5.734416,5.734759,5.735425,5.735510)
poly3_ycoord<-c(50.964193,50.963706,50.963652,50.964315)

poly4_xcoord<-c(5.737270,5.738643,5.760530, 5.759328)
poly4_ycoord<-c(50.961017,50.960368,50.983555, 50.983231)

poly1_coords <- cbind(poly1_xcoord, poly1_ycoord)
poly1 = Polygon(poly1_coords, hole = F)
poly2_coords <- cbind(poly2_xcoord, poly2_ycoord)
poly2 = Polygon(poly2_coords, hole=F)
poly3_coords <- cbind(poly3_xcoord, poly3_ycoord)
poly3 = Polygon(poly3_coords, hole=F)
poly4_coords <- cbind(poly4_xcoord, poly4_ycoord)
poly4 = Polygon(poly4_coords, hole=F)


polys_spatial = SpatialPolygons(list(Polygons(list(poly1,poly2,poly3,poly4), 1)))

proj4string(polys_spatial) = CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")


#mapview(r) +mapview(xy)+ mapview(polys_spatial)

#bad way of calculating weighted mean for raster

#extracting mean raster value within buffers around each point
raster_vals_100<-sapply(extract(r, xy, buffer=c(100)), mean)
raster_vals_200<-sapply(extract(r, xy, buffer=c(200)), mean)
raster_vals_300<-sapply(extract(r, xy, buffer=c(300)), mean)
#and so on..

raster_vals<-list(raster_vals_100, raster_vals_200, raster_vals_300)

#nonlinear function
weight<-function(x){0.0382131 * exp(-.49*x)}

#distances of buffers
dist<-c(.1,.2,.3)

#calculating weighted mean of raster values
site1_raster_mean<-mean(w(c(dist)) * sapply( raster_vals, function(x) x[1]))
site2_raster_mean<-mean(w(c(dist)) * sapply( raster_vals, function(x) x[2]))
site3_raster_mean<-mean(w(c(dist)) * sapply( raster_vals, function(x) x[3]))


#bad way of calculating weighted sum for polygons

#calculating weighted sum of area for polygons (I know the width argument isn't in proper units, but doesn't affect main question)
site_buffers_100<-gBuffer(xy_df, width=.001, byid=T)
site_buffers_200<-gBuffer(xy_df, width=.002, byid=T)
site_buffers_300<-gBuffer(xy_df, width=.003, byid=T)

#preventing weird orphaned hole issue
slot(polys_spatial, "polygons") <- lapply(slot(polys_spatial, "polygons"), checkPolygonsHoles)

#extracting intersection between polygons and buffers around sites
poly_intersect_100<-raster::intersect(site_buffers_100,polys_spatial)
poly_intersect_200<-raster::intersect(site_buffers_200, polys_spatial)
poly_intersect_300<-raster::intersect(site_buffers_300, polys_spatial)

#summing the area of the intersecting polygons
poly_intersect_100_area<-gArea(poly_intersect_100, byid = TRUE)
poly_intersect_200_area<-gArea(poly_intersect_200, byid = TRUE)
poly_intersect_300_area<-gArea(poly_intersect_300, byid = TRUE)

area_list<-list(poly_intersect_100_area,poly_intersect_200_area,poly_intersect_300_area)

#calculating the weighted sum by site
dist<-c(.1,.2,.3)
site1_polygon_sum<-sum(w(c(dist)) * sapply( area_list, function(x) x[1]))
site2_polygon_sum<-sum(w(c(dist)) * sapply( area_list, function(x) x[2]))
site3_polygon_sum<-sum(w(c(dist)) * sapply( area_list, function(x) x[3]))
jsta
  • 3,216
  • 25
  • 35
Nick_89
  • 117
  • 2
  • 11
  • 3
    Please provide a [*reproducible example*](https://stackoverflow.com/a/5963610/6574038), thank you. – jay.sf Mar 06 '18 at 20:00
  • ok, I added an example, although it's no very realistic given the constraints of generating/sharing spatial data. – Nick_89 Mar 07 '18 at 09:33
  • I don't understand why someone down-voted this, what else can I do to improve it? – Nick_89 Mar 07 '18 at 21:29

1 Answers1

1

I tried to use a continuous measure of distance rather than buffers although I wasn't sure if this makes sense for the polygons since there is no overlap per se. Could probably rasterize the polygons to do this.

The general idea is to weight the raster values by distance to polygonized raster cells. I used the spex package to polygonize just because it is fastest:

library(sf)
library(spex)
library(fasterize)
library(lwgeom)
library(mapview)

xy_sf   <- st_as_sf(xy_df)
r_poly  <- st_as_sf(spex::polygonize(r))
r_poly  <- r_poly[,-1]
r_poly$vals <- r[]

r_dists <- st_distance(xy_sf, r_poly)
units(r_dists) <- "km"
r_dists <- data.frame(matrix(t(r_dists), ncol = 3))
r_dists <- data.frame(apply(r_dists, 2, function(x) w(x) * r_poly$vals))
r_dists <- dplyr::bind_cols(r_poly, r_dists)

par(mfrow = c(1, 3))
plot(fasterize(r_dists, r, field = "X1"))
plot(fasterize(r_dists, r, field = "X2"))
plot(fasterize(r_dists, r, field = "X3"))
par(mfrow = c(1,1))

polys_sf            <- st_cast(st_as_sf(polys_spatial), "POLYGON")
polys_sf$vals       <- st_area(polys_sf)
polys_dists         <- matrix(st_distance(xy_sf, polys_sf), nrow = 3)
polys_dists         <- w(polys_dists)
xy_sf$polys_weights <- rev(colSums(polys_sf$vals * t(polys_dists)))
xy_sf$raster_weights <- colSums(st_set_geometry(r_dists[,c("X1", "X2", "X3")], NULL), na.rm = TRUE)

mapview(polys_sf) +  mapview(xy_sf)
Nick_89
  • 117
  • 2
  • 11
jsta
  • 3,216
  • 25
  • 35
  • Thanks for putting this together! I'm running into a boat-load of problems installing stars off of github b/c of dependencies on a dev version of sf which won't install b/c 'gdal-config not found or not executable'. I'll have more time to figure it out early next week. – Nick_89 Mar 17 '18 at 22:11
  • It looks like `stars` was unnecessary. I edited the post. – jsta Mar 19 '18 at 14:01
  • Thanks, got it running! Out of curiosity, why do you need to polygonize the raster in order to calculate distances between the xy sites and the raster squares? Eventually I may rasterize the polygons, polygonize that object, then calculate distances between all the grid components of the original polygon in order to account for the fact that some parts of the polygon are closer to the xy sites than others. – Nick_89 Mar 19 '18 at 18:41
  • I just did it to keep all my calculations in the `sf` package. It might be more efficient to use `raster::distanceFromPoints` if you have a really big raster. – jsta Mar 19 '18 at 18:46