Here is a strategy:
library(UScensus2000tract)
library(spdep)
library(ggplot2)
library(dplyr)
# load data
data("oregon.tract")
# plot Census Tract map
plot(oregon.tract)
# create Queens contiguity matrix
spatmatrix <- poly2nb(oregon.tract)
# create a neighbours list with spatial weights
listw <- nb2listw(spatmatrix)
# calculate the local moran of the distribution of white population
lmoran <- localmoran(oregon.tract$white, listw)
summary(lmoran)
# padronize the variable and save it to a new column
oregon.tract$s_white <- scale(oregon.tract$white) %>% as.vector()
# create a spatially lagged variable and save it to a new column
oregon.tract$lag_s_white <- lag.listw(listw, oregon.tract$s_white)
# summary of variables, to inform the analysis
summary(oregon.tract$s_white)
summary(oregon.tract$lag_s_white)
# moran scatterplot, in basic graphics (with identification of influential observations)
x <- oregon.tract$s_white
y <- oregon.tract$lag_s_white %>% as.vector()
xx <- data.frame(x, y)
moran.plot(x, listw)
# moran sccaterplot, in ggplot
# (without identification of influential observations - which is possible but requires more effort)
ggplot(xx, aes(x, y)) + geom_point() + geom_smooth(method = 'lm', se = F) + geom_hline(yintercept = 0, linetype = 'dashed') + geom_vline(xintercept = 0, linetype = 'dashed')
# create a new variable identifying the moran plot quadrant for each observation, dismissing the non-significant ones
oregon.tract$quad_sig <- NA
# high-high quadrant
oregon.tract[(oregon.tract$s_white >= 0 &
oregon.tract$lag_s_white >= 0) &
(lmoran[, 5] <= 0.05), "quad_sig"] <- "high-high"
# low-low quadrant
oregon.tract[(oregon.tract$s_white <= 0 &
oregon.tract$lag_s_white <= 0) &
(lmoran[, 5] <= 0.05), "quad_sig"] <- "low-low"
# high-low quadrant
oregon.tract[(oregon.tract$s_white >= 0 &
oregon.tract$lag_s_white <= 0) &
(lmoran[, 5] <= 0.05), "quad_sig"] <- "high-low"
# low-high quadrant
oregon.tract@data[(oregon.tract$s_white <= 0
& oregon.tract$lag_s_white >= 0) &
(lmoran[, 5] <= 0.05), "quad_sig"] <- "low-high"
# non-significant observations
oregon.tract@data[(lmoran[, 5] > 0.05), "quad_sig"] <- "not signif."
oregon.tract$quad_sig <- as.factor(oregon.tract$quad_sig)
oregon.tract@data$id <- rownames(oregon.tract@data)
# plotting the map
df <- fortify(oregon.tract, region="id")
df <- left_join(df, oregon.tract@data)
df %>%
ggplot(aes(long, lat, group = group, fill = quad_sig)) +
geom_polygon(color = "white", size = .05) + coord_equal() +
theme_void() + scale_fill_brewer(palette = "Set1")
This answer was based on this page, suggested by Eli Knaap on twitter, and also borrowed from the answer by @timelyportfolio to this question.
I used the variable white
instead of black
because black
had less explicit results.
Concerning NAs, localmoran()
includes the argument na.action
, about which the documentation says:
na.action is a function (default na.fail), can also be na.omit or > na.exclude - in these cases the weights list will be subsetted to remove NAs in the data. It may be necessary to set zero.policy to TRUE because this subsetting may create no-neighbour observations. Note that only weights lists created without using the glist argument to nb2listw may be subsetted. If na.pass is used, zero is substituted for NA values in calculating the spatial lag.
I tried:
oregon.tract@data$white[3:5] <- NA
lmoran <- localmoran(oregon.tract@data$white, listw, zero.policy = TRUE,
na.action = na.exclude)
But run into problems in lag.listw
but did not have time to look into it. Sorry.