I would like to extend the example given here
How to plot a contour line showing where 95% of values fall within, in R and in ggplot2
to data with three dimensions (x, y and z), and instead of plotting the contour line I'd like to get the limits of the x, y and z values.
This is the example from the previous post.
library(ggplot2)
set.seed(1001)
d <- data.frame(x=rnorm(1000),y=rnorm(1000))
kd <- ks::kde(d, compute.cont=TRUE)
contour_95 <- with(kd, contourLines(x=eval.points[[1]], y=eval.points[[2]],
z=estimate, levels=cont["5%"])[[1]])
contour_95 <- data.frame(contour_95)
ggplot(data=d, aes(x, y)) +
geom_point() +
geom_path(aes(x, y), data=contour_95) +
theme_bw()
and then, it's possible to get the limits of the contour like this:
range(contour_95$x)
range(contour_95$y)
I would love to know how to get the x, y and z ranges of 3-D contours at specified percentiles.
ks:kde can deal with higher dimensions, but contourLines() cant.
This is what I've tried...
set.seed(1001)
d <- data.frame(x=rnorm(1000),y=rnorm(1000), y=rnorm(1000))
kd <- ks::kde(d, compute.cont=TRUE)
#what kd$estimates are > 95th percentile?
#make function that can extract from 3d array
multi.which <- function(A){
if ( is.vector(A) ) return(which(A))
d <- dim(A)
T <- which(A) - 1
nd <- length(d)
t( sapply(T, function(t){
I <- integer(nd)
I[1] <- t %% d[1]
sapply(2:nd, function(j){
I[j] <<- (t %/% prod(d[1:(j-1)])) %% d[j]
})
I
}) + 1 )
}
#extract those estimates that have >density than 95th percentile
ests <- multi.which(kd$estimate > kd$cont["5%"])
#make into a long dataframe with column number in the second column and row number in first column
col1=rep(1, nrow(ests))
col2=rep(2, nrow(ests))
col3=rep(3, nrow(ests))
rows=c(ests[,1], ests[,2], ests[,3])
cols=c(col1,col2,col3)
index=cbind(rows,cols)#this is the index so we can extract the coordinates in multi-D space
car::some(index)
#get coordinates with this function
fExtract <- function(dat, indexDat){
dat[as.matrix(indexDat)]
}
#pull three coordinates (x,y,z) from eval.points into 3 columns
eval.pts <- cbind(kd$eval.points[[1]], kd$eval.points[[2]], kd$eval.points[[3]])
v <- fExtract(eval.pts, index) #one long vector
#re-create the three columns of x, y and z coordinates of points at higher density than 95th percentile
x1 <- v[1:nrow(ests)]
y1 <- v[(nrow(ests)+1):(2*nrow(ests))]
z1 <- v[(2*nrow(ests)+1):(3*nrow(ests))]
#the three coordinates.
fin <- cbind(x1,y1,z1)
#get range of each dimension
range(x1)
range(y1)
range(z1)
But I'm not confident it's right.