Here I compare Forrest's approach with a thin plate spline (TPS). Their performance is about the same -- depending on the sample. The TPS could be preferable if the gaps were larger such that focal could not estimate anymore --- but in that case you could also use a a larger (and perhaps Gaussian, see ?focalWeight
) filter.
d <- matrix(c(
1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,2,1,1,1,1,1,
1,1,1,1,2,2,2,1,1,1,1,
1,1,2,2,2,3,2,2,2,1,1,
2,2,2,2,3,3,3,2,2,2,2,
1,1,2,2,2,3,2,2,2,1,1,
1,1,1,1,2,2,2,1,1,1,1,
1,1,1,1,1,2,1,1,1,1,1), ncol=11, byrow=TRUE)
library(raster)
d <- raster(d)
plot(d, col=colorRampPalette(c("blue","yellow","red"))(255))
## Simulate 30% missing data:
set.seed(1)
d_m <- d
d_m[ sample(1:length(d), length(d)/3) ] <- NA
plot(d_m, col=colorRampPalette(c("blue","yellow","red"))(255))
# Forrest's solution:
filter <- matrix(1, nrow=3, ncol=3)
r <- focal(d_m, filter, mean, na.rm=T, NAonly=T, pad=T)
#an alterative:
rp <- rasterToPoints(d_m)
library(fields)
# thin plate spline interpolation
#(for a simple pattern like this, IDW might work, see ?interpolate)
tps <- Tps(rp[,1:2], rp[,3])
# predict
x <- interpolate(d_m, tps)
# use the orginal values where available
m <- cover(d_m, x)
i <- is.na(d_m)
cor(d[i], m[i])
## [1] 0.8846869
cor(d[i], r[i])
## [1] 0.8443165