I am writing a function that performs PCA on pairs of variables in an xts object until the correlation between all of the variables is less than 0.1. Here is the function that I wrote:
PCA_Selection <- function(X, r=0.1){
M <- cor(X) # Creating corrolation matrix
M[M==1] <- 0 # filling the diagnal with 0s so that pairs of the same variables are not considered
while(max(abs(M)) > r){
M <- cor(X)
PCA_vars <- matrix(,nrow = (nrow(M))^2 ,ncol = 2)
for(i in 1:ncol(M)){ # Selects variables that will be use for PCA
for(j in 1:nrow(M)){
if(M[j,i] > r & M[j,i] < 1){
PCA_vars[c(i*j),] <- c(row.names(M)[i],colnames(M)[j])
}}} # works
PCA_vars <- na.omit(PCA_vars) # works
for (i in 1:nrow(PCA_vars)) {
PCA_pre <- prcomp(X[,c(names(X) %in% PCA_vars[i,])])
Sum_PCA <- summary(PCA_pre)
tmp <- data.frame()
if (Sum_PCA[["importance"]][2,1] > 0.95){ # if the first component captures 95% of variance
tmp <- data.frame(predict(PCA_pre, X)[,1]) # then only use the first component for predictions
names(tmp) <- c(paste0("Com_",PCA_vars[i,1],"_",PCA_vars[i,2],"_1"))
}else { # else use all both of the component and do not reduce the dimensions
tmp <- predict(PCA_pre,X)
colnames(tmp) <- c(paste0("Com_",PCA_vars[i,1],"_",PCA_vars[i,2],"_1"),
paste0("Com_",PCA_vars[i,1],"_",PCA_vars[i,2],"_2"))
}
Xnew <- cbind(X,tmp)
X <- Xnew
}
PCA_vars <- unique(as.vector(PCA_vars)) # Variables to be removed
X <- X[, -which(colnames(X) %in% PCA_vars)]
M <- cor(X)
M[M==1] <- 0
}
return(Xnew)
}
However, when I run the function r returns a strange error:
Error in colMeans(x, na.rm = TRUE): 'x' must be numeric
The data that I am testing the function with is an xts object that does not have any missing observations. Furthermore, all of the variables have non-zero variance and there are only continuous numeric variables in the data.