I was asked to have a dataset imputed with both the LOCF and the NOCB methods by using na.locf() function from zoo package and I'm trying now plotting both the observed and the imputed values. The dataset I'm working is the following one:
structure(list(id = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27),
sex = c("F", "F", NA, "F", "F", "F", "F", "F", "F", "F",
"F", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M",
"M", "M", "M", "M", "M"), d8 = c(21, 21, NA, 23.5, 21.5,
20, 21.5, 23, NA, 16.5, 24.5, 26, 21.5, 23, 25.5, 20, 24.5,
22, 24, 23, 27.5, 23, 21.5, 17, 22.5, 23, 22), d10 = c(20,
21.5, 24, 24.5, 23, 21, 22.5, 23, 21, 19, 25, 25, 22.5, 22.5,
27.5, 23.5, 25.5, 22, 21.5, 20.5, 28, 23, 23.5, 24.5, 25.5,
24.5, 21.5), d12 = c(21.5, 24, NA, 25, 22.5, 21, 23, 23.5,
NA, 19, 28, 29, 23, NA, 26.5, 22.5, 27, 24.5, 24.5, 31, 31,
23.5, 24, 26, 25.5, 26, 23.5), d14 = c(23, 25.5, 26, 26.5,
23.5, 22.5, 25, 24, 21.5, 19.5, 28, 31, 26.5, 27.5, 27, 26,
28.5, 26.5, 25.5, 26, 31.5, 25, 28, 29.5, 26, 30, 25)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -27L), spec = structure(list(
cols = list(id = structure(list(), class = c("collector_double",
"collector")), sex = structure(list(), class = c("collector_character",
"collector")), d8 = structure(list(), class = c("collector_double",
"collector")), d10 = structure(list(), class = c("collector_double",
"collector")), d12 = structure(list(), class = c("collector_double",
"collector")), d14 = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
I've imputed the missing values by converting the original wide format towards a long format, and following the remaining steps:
data_long <- tidyr::gather(dati, age, measurements, d8:d14, factor_key = TRUE)
data_locf <- data_long
locf <- na.locf(data_locf$measurements, na.rm = T, fromLast = F)
nocb <- na.locf(data_locf$measurements, na.rm = T, fromLast = T)
data_locf$measurements = ifelse(data_locf$age == 'd12', locf, nocb)
data_locf$sex = na.locf(data_locf$sex, na.rm = T, fromLast = T)
data_complete = complete(data = data_locf, fill = c(data_locf$measurements, data_locf$sex))
Is there someone who knows a way to plot graphically the imputed values togheter with the observed ones? I let you here a couple of function which I was recommed to use and from which I've started putting on the proper modifications, unsuccessfully, though.
#1 plot
par(mfrow=c(1,1))
measurements <- data_complete$measurements
locf <- function(x) {
a <- x[1]
for (i in 2:length(x)) {
if (is.na(x[i])) x[i] <- a
else a <- x[i]
}
return(x)
}
meas1 <- na.locf(measurements)
colvec <- ifelse(is.na(measurements),mdc(2),mdc(1))
plot(measurements,col=colvec,type="l",xlab= 'sex' ,ylab="measurements")
points(measurements, col=colvec,pch=20,cex=1)
that doesn't return back a representation properly separated for both genders and:
#2 plot
par(mfrow=c(1,2))
breaks <- seq(-20, 200, 10)
nudge <- 1
lwd <- 1.5
x <- matrix(c(breaks-nudge, breaks+nudge), ncol=2)
obs <- airquality[,"Ozone"]
mis <- imp$imp$Ozone[,1]
fobs <- c(hist(obs, breaks, plot=FALSE)$counts, 0)
fmis <- c(hist(mis, breaks, plot=FALSE)$counts, 0)
y <- matrix(c(fobs, fmis), ncol=2)
tp <- xyplot(imp, Ozone~Solar.R, na.groups=ici(imp),
ylab="Ozone (ppb)", xlab="Solar Radiation (lang)",
cex = 0.75, lex=lwd, pch=19,
ylim = c(-20, 180), xlim = c(0,350))
print(tp)
that reproduces a nice scatterplot for the airquality dataset fron the mice package. The crucial point is that I'm not able to extract the imputed values by using the na.locf function.
I specify that I should plot age/measurements as response variable vs sex, that's why I need for a separation between the two genders.