0

I'm trying to figure out the USDA CropScape crop value at a GPS location for the calendar year of the location, the previous calendar year, and the following calendar year (e.g., if the date of the GPS location was 10/1/2016, I'd want to extract the crop value in that location for 2015, 2016, and 2017).

Here's the code I used to prepare my GPS and CropScape data:

library(rgdal)
library(raster)
library(sf)
library(sp)
library(rgeos)
library(adehabitatHR)
library(lubridate)

head(data)
proj <- 32612
data <- st_as_sf(data, coords=c("easting","northing"), dim="XY", crs=proj)

cropScape_2014 <- raster("./data/_raw/cropScape/cropScape_2014.tif")
cropScape_2015 <- raster("./data/_raw/cropScape/cropScape_2015.tif")
cropScape_2016 <- raster("./data/_raw/cropScape/cropScape_2016.tif")
cropScape_2017 <- raster("./data/_raw/cropScape/cropScape_2017.tif")
cropScape_2018 <- raster("./data/_raw/cropScape/cropScape_2018.tif")
cropScape_2019 <- raster("./data/_raw/cropScape/cropScape_2019.tif")
cropScape_2020 <- raster("./data/_raw/cropScape/cropScape_2020.tif")
cropScape_2021 <- raster("./data/_raw/cropScape/cropScape_2021.tif")

cropScape <- stack(cropScape_2014, cropScape_2015, cropScape_2016,
                   cropScape_2017, cropScape_2018, cropScape_2019,
                   cropScape_2020, cropScape_2021)

data <- as(data, "Spatial")

popMCP <- mcp(data, percent = 100)
popMCPbuf <- raster::buffer(popMCP, 1000)
cropScape2 <- mask(cropScape, popMCPbuf)
cropScape_cropped <- crop(cropScape2, popMCPbuf)

tz(data$date)
data$calYr <- as.numeric(strftime(data$date, format = "%Y", tz = "MST7MDT"))

dput(data[sample(nrow(data), size = 10),])
dput(head(cropScape_cropped))

Here's a small subset of my GPS data that I took at this point:

new("SpatialPointsDataFrame", data = structure(list(id = c("GT74", 
"GT48", "GT52", "GT82", "GT74", "GT52", "GT82", "GT9", "GT45", 
"GT43"), date = structure(c(1607320849, 1544097667, 1588500050, 
1554854505, 1618632063, 1549476050, 1577080837, 1449784850, 1542376850, 
1548309669), class = c("POSIXct", "POSIXt"), tzone = "MST7MDT"), 
    id_bioYr = c("GT74_2020", "GT48_2018", "GT52_2019", "GT82_2018", 
    "GT74_2020", "GT52_2018", "GT82_2019", "GT9_2015", "GT45_2018", 
    "GT43_2018"), state = c(1L, 1L, 3L, 2L, 1L, 1L, 2L, 2L, 2L, 
    1L), used = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), landcover = c("agricultural", 
    "agricultural", "agricultural", "agricultural", "agricultural", 
    "shadow", "water", "xericGrass", "agricultural", "agricultural"
    ), calYr = c(2020, 2018, 2020, 2019, 2021, 2019, 2019, 2015, 
    2018, 2019)), row.names = c(471519L, 253950L, 310790L, 49469L, 
479572L, 293620L, 573180L, 627720L, 238551L, 151426L), class = "data.frame"), 
    coords.nrs = numeric(0), coords = structure(c(483309.821132995, 
    480304.323982496, 479889.104783906, 469493, 458709.035703965, 
    475920.645473197, 470714.848866241, 474984.326637862, 473390.631355455, 
    479941.915819768, 4866298.15517148, 4867412.65105531, 4865629.72751859, 
    4865043, 4864209.38410713, 4863155.30805073, 4865581.17021548, 
    4865450.06432659, 4865829.2649267, 4861889.90054007), .Dim = c(10L, 
    2L), .Dimnames = list(NULL, c("coords.x1", "coords.x2"))), 
    bbox = structure(c(458709.035703965, 4861889.90054007, 483309.821132995, 
    4867412.65105531), .Dim = c(2L, 2L), .Dimnames = list(c("coords.x1", 
    "coords.x2"), c("min", "max"))), proj4string = new("CRS", 
        projargs = "+proj=utm +zone=12 +datum=WGS84 +units=m +no_defs"))

and the crop data (not sure if this is helpful/what people would need?):

structure(c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_), .Dim = c(20L, 8L), .Dimnames = list(NULL, c("cropScape_2014", 
"cropScape_2015", "cropScape_2016", "cropScape_2017", "cropScape_2018", 
"cropScape_2019", "cropScape_2020", "cropScape_2021")))

I've been trying to write a loop that would extract my three values into new columns in data, but I don't really know how to get started.

Ideally, at the end of the loop, there would be 3 new columns added to data ("crop_calYr-1", "crop_calYr", "crop_calYr+1") and they'd be populated from the correct layers of "cropScape_cropped." I do know that I'll have to add "NA" to "crop_calYr+1" when the GPS location is from 2021.

b786
  • 3
  • 3
  • 2
    Did you use `CropScapeR` to get your above data?. and if so, you might as well add that `library(` and calls you used to get everyone rowing in the same direction. And welcome to Stackoverflow. – Chris Jan 04 '23 at 01:28
  • It'd be great if you shared some of your data. You can do that easily with the GPS points with `dput()`. Since the CDL data are big, that's going to be tricky, but you can try `dput(head())` and add that info to your question. Please also read these other suggested guidelines: https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example – John Polo Jan 04 '23 at 02:06
  • Thank you - post is edited to include code and subset of data. – b786 Jan 04 '23 at 20:28
  • Example data is important, but do *not* use `dput`, especially with spatial data. Create example data in code instead or use a file that ships with R (see all the examples in raster, terra, sf....). – Robert Hijmans Jan 11 '23 at 17:39

2 Answers2

0

I couldn't recreate your raster data. I just downloaded a few years of data for Fremont County, ID., since it looked like that was where your points were located. I used raster package here, but I'd have preferred to use terra.

library(raster)

l1 <- list.files("Downloads/polygonclip", pattern="tif$", full.names
=T)
cd1 <- raster::stack(l1)

# Should be able to change the categories in the stack
# so that the extract returns character value, e.g. 
# "barley" instead of 152 with something like this. 
# Get levels
ca1 <- levels(cd1) 
# MOdify to get desired factors
ca2 <- ca1[[1]][,c(1,5)]
# Assign
levels(cd2, layer=c(1:4)) <- ca2

# ... But that way doesn't work for me for some reason, 
# so I did it this way. You'll need to do this for each
# year/layer in the stack
levels(cd1[[1]]) <- ca2
levels(cd1[[2]]) <- ca2
levels(cd1[[3]]) <- ca2
levels(cd1[[4]]) <- ca2

# Recreate the points data shared
pts1 <- new("SpatialPointsDataFrame", data = structure(list(id = c("GT74", 
"GT48", "GT52", "GT82", "GT74", "GT52", "GT82", "GT9", "GT45", 
"GT43"), date = structure(c(1607320849, 1544097667, 1588500050, 
1554854505, 1618632063, 1549476050, 1577080837, 1449784850, 1542376850, 
1548309669), class = c("POSIXct", "POSIXt"), tzone = "MST7MDT"), 
    id_bioYr = c("GT74_2020", "GT48_2018", "GT52_2019", "GT82_2018", 
    "GT74_2020", "GT52_2018", "GT82_2019", "GT9_2015", "GT45_2018", 
    "GT43_2018"), state = c(1L, 1L, 3L, 2L, 1L, 1L, 2L, 2L, 2L, 
    1L), used = c(0, 0, 0, 1, 0, 0, 0, 0, 0, 0), landcover = c("agricultural", 
    "agricultural", "agricultural", "agricultural", "agricultural", 
    "shadow", "water", "xericGrass", "agricultural", "agricultural"
    ), calYr = c(2020, 2018, 2020, 2019, 2021, 2019, 2019, 2015, 
    2018, 2019)), row.names = c(471519L, 253950L, 310790L, 49469L, 
479572L, 293620L, 573180L, 627720L, 238551L, 151426L), class = "data.frame"), 
    coords.nrs = numeric(0), coords = structure(c(483309.821132995, 
    480304.323982496, 479889.104783906, 469493, 458709.035703965, 
    475920.645473197, 470714.848866241, 474984.326637862, 473390.631355455, 
    479941.915819768, 4866298.15517148, 4867412.65105531, 4865629.72751859, 
    4865043, 4864209.38410713, 4863155.30805073, 4865581.17021548, 
    4865450.06432659, 4865829.2649267, 4861889.90054007), .Dim = c(10L, 
    2L), .Dimnames = list(NULL, c("coords.x1", "coords.x2"))), 
    bbox = structure(c(458709.035703965, 4861889.90054007, 483309.821132995, 
    4867412.65105531), .Dim = c(2L, 2L), .Dimnames = list(c("coords.x1", 
    "coords.x2"), c("min", "max"))), proj4string = new("CRS", 
        projargs = "+proj=utm +zone=12 +datum=WGS84 +units=m +no_defs"))

# Removed one of the points because it was for the year 2015 
# which threw everything off. 
pts1 <- pts1[-8,]

# Need same projection for extract        
pts1 <- spTransform(pts1, crs(cd1))

# Set Z dimension (years) for matching in next function
# Note the years used
cd1 <- setZ(cd1, 2018:2021, "years")

# This takes a year and returns the indices of that year
# from the list of years. The indices are for the year
# prior to the selected year, the year selected, and the
# year after selected year.
# Note the years used. 2018 was my oldest, but I think
# your data goes to 2015, so you'll have to change each
# 2018
rysel <- function(yind) {
    if(yind==2018) {
        yex = c(1, 2)
    } else if(yind==2021) {
        yex = c(3, 4)
    } else if(2018 < yind & yind < 2021) {
        yex = c(yind-2018, yind-2018+1, yind-2018+2)
    }
    return(yex)
}

# This takes the index of the row in the points data
# and uses the rysel function to subset the stack and 
# then extract.
extryr <- function(x) {
pty = pts1$calYr[x]

ytmp = rysel(pty)

yextr = extract(cd1[[ytmp]], pts1[x,])

# Pad the extract for either end
# Note the years used
if(pty==2018){
    yextr = c(yextr, "NA")
} else if(pty==2021) {
    yextr = c("NA",yextr)
}

return(yextr)
}

# Data frame with column labels for holding extract values
df2 <- data.frame(calYrprior = NA, calYrcur = NA, calYrnext = NA)

# 
for(i in 1:nrow(pts1)) df2[i,] = extryr(i)

# Convert the values in df2 to numeric
# You can ignore the warning
df2 <- sapply(df2, as.numeric)
Warning messages:
1: In extryr(i) : NAs introduced by coercion
2: In extryr(i) : NAs introduced by coercion
3: In extryr(i) : NAs introduced by coercion

> df2
      calYrprior calYrcur calYrnext
 [1,]        152      152       152
 [2,]         21       21        NA
 [3,]        152      152       152
 [4,]        152      152        21
 [5,]         NA       23        43
 [6,]          0        0         0
 [7,]          0        0         0
 [8,]        152      152        NA
 [9,]         NA       NA        NA


# Combine the pts1 data and the extract data
pts1 <- cbind(pts1, df2)

You'll need to change the years where I noted. If there is a difference in the years that are in raster and in the points, this won't work. For example, if the stack has 2014, 2016-2019 and the points cover the years 2014-2019, you'll have to revise this.

The extract still returns the numeric value instead of the character/factor value. But you can perform a substitution to change those as needed.

John Polo
  • 547
  • 1
  • 8
  • 25
0

As John Polo suggests, it is much easier to use "terra" (the replacement of "raster").

Example data

library(terra)
f <- system.file("ex/elev.tif", package="terra")
r <- rast(f)
x <- c(r*1, r*2, r*3, r*4, r*5)
time(x, "years") <- 2001:2005
names(x) <- paste0("Y", 2001:2005)

d <- data.frame(lon=c(5.867235,  6.319561), 
       lat=c(49.85114, 49.55199), year=c(2003, 2004))

Solution

# match the year in your data.frame with that of the SpatRaster layers
i <- match(d$year, time(x))
## or 
# i <- match(d$year, 2001:2005)

If you only needed the values for the matching year, you could do

ey <- extract(x, d[, c("lon","lat")], layer=i)
ey
#  ID layer value
#1  1 Y2003  1473
#2  2 Y2004   692

But to also get the year before and after:

## extract values for all years
e <- extract(x, d[, c("lon","lat")], ID=FALSE)

## find the layers (years) for each point
j <- cbind(rep(1:length(d$year), each=3), 
           rep(i, each=3) + c(-1:1))

## subset the extracted data
e[j]
#[1]  982 1473 1964  519  692  865

## to keep track of what is what
b <- data.frame(point=j[,1], year=time(x)[j[,2]], value=e[j])
b
#  point year value
#1     1 2002   982
#2     1 2003  1473
#3     1 2004  1964
#4     2 2003   519
#5     2 2004   692
#6     2 2005   865

reshape(b, direction="wide", idvar="point", timevar="year")
#  point value.2002 value.2003 value.2004 value.2005
#1     1        982       1473       1964         NA
#4     2         NA        519        692        865 

Here is a similar (but more complex) question from a couple of days ago.

Robert Hijmans
  • 40,301
  • 4
  • 55
  • 63
  • Thank you - this is what I have used in the past when I only need to extract a single value for each GPS location. However, I'm trying to extract the cropscape value for the year the GPS location was recorded, as well as the value in the same geographic location during the previous year and the following year (3 cropscape values total). Do you have any suggestions on how I can accomplish this? – b786 Jan 11 '23 at 18:20
  • Sorry, I missed that. I have expanded my answer to cover that. – Robert Hijmans Jan 11 '23 at 20:01
  • Thanks for the help! A couple of follow-up questions: 1. Was something supposed to be assigned to time(cropscape) <- ? 2. If I comment out time(cropscape) <- and run the subsequent lines as written, I get the following error for j <- cbind(rep(1:length(calYr), each=3), rep(calYr, each=3) + c(-1:1)): Error in rep(calYr, each = 3) + c(-1:1) : non-numeric argument to binary operator Any suggestions? – b786 Jan 12 '23 at 00:02
  • I have edited the dangling `time(cropscape) <-` code. The error your are getting suggests that `calYr` is a character vector and that you need to do `as.numeric(calYr)` – Robert Hijmans Jan 12 '23 at 00:05
  • It is difficult to get these tings right without a repex. I have now included one and fixed the code. – Robert Hijmans Jan 12 '23 at 02:14