I am performing stack ensembling and I am continuously getting error at
-ncol(test)
as invalid argument to unary operator. I tried all the possible ways in google but nothing worked.
#----- Stacking -----#
stacking_et0 <- function(train, test, nmeta_mdls=1){
nmdls <- 3
test <- test[,-ncol(test)]
out_train <- train[,ncol(train)]
train <- train[,-ncol(train)]
Full Code:
#------ On the suitability of stacking-based ensembles in smart agriculture for evapotranspiration prediction ------#
#===================================================================================================================#
#----- libraries -----#
library(gbm)
library(e1071)
library(xgboost)
library(randomForest)
library(caTools)
dataset<-read.csv("aln.csv")
split_data <- function(dataset, train = TRUE){
length<- nrow(dataset)
total_row <- length *0.8
split <- 1:total_row
if (train ==TRUE){
train_df <- dataset[split, ]
return(train_df)
} else {
test_df <- dataset[-split, ]
return(test_df)
}
}
train <- split_data(dataset, train = TRUE)
test <- split_data(dataset, train = FALSE)
ans <- stacking_et0(train, test)
View(ans)
#----- Stacking -----#
stacking_et0 <- function(train, test, nmeta_mdls=1){
nmdls <- 3
test <- test[,-ncol(test)]
out_train <- train[,ncol(train)]
train <- train[,-ncol(train)]
#==========================================================#
#----- First-level Algorithms (RF, SVM, GBM, XGBoost) -----#
#==========================================================#
pd_train <- matrix(0,nrow(train),12)
pd_test <- matrix(0,nrow(test),12)
train_with_out <- cbind(train, out_train)
frm <- as.formula(paste(names(train_with_out)[ncol(train_with_out)],"~.",sep = ""))
for (i in 1:nmdls) {
#=== RandomForest ===#
trees <- sample(350:600,1)
node <- sample(5:15,1)
model_rdm <- randomForest(frm, data=train_with_out, ntree=trees, nodesize = node)
pd_train[,i] <- predict(model_rdm, train)
pd_test[,i] <- predict(model_rdm, test)
#======= SVM ========#
tole <- round(runif(1, 0.0001,0.01), 4)
reg <- sample(2:14,1)
model_svm <- e1071::svm(x = train, y = out_train, tolerance=tole, cost= reg, scale = TRUE, type = "eps-regression", kernel = "radial")
pd_train[,i+3] <- predict(model_svm, train)
pd_test[,i+3] <- predict(model_svm, test)
#======= GBM ========#
trees <- sample(550:650,1)
model_gbm <- gbm::gbm(formula = frm, data = train_with_out, n.trees = trees, interaction.depth = 3, bag.fraction = 1, distribution = "gaussian")
pd_train[,i+6] <- predict.gbm(model_gbm, train, n.trees = trees)
pd_test[,i+6] <- predict.gbm(model_gbm, test, n.trees = trees)
#===== XGBoost ======#
num_par <- sample(50:200,1)
iter_bos <- sample(40:50,1)
depth <- sample(3:5,1)
weight <- sample(seq(5,7,0.1),1)
model_xgb <- xgboost(data = as.matrix(train), label = as.matrix(out_train), num_parallel_tree=num_par, nrounds = iter_bos, max_depth= depth, min_child_weight=weight, subsample=0.9, eta= 0.1, verbose = 0)
pd_train[,i+9] <- predict(model_xgb, as.matrix(train))
pd_test[,i+9] <- predict(model_xgb, as.matrix(test))
}
new_train <- cbind(train,pd_train)
new_test <- cbind(test,pd_test)
#==========================================================#
#------------ Second-level Algorithm (XGBoost) ------------#
#==========================================================#
pred_meta <- matrix(0,nrow = nrow(new_test), ncol = nmeta_mdls)
for(nm in 1:nmeta_mdls){
num_par <- sample(50:200,1)
iter_bos <- sample(40:50,1)
depth <- sample(3:5,1)
weight <- sample(seq(5,7,0.1),1)
meta_model <- xgboost(data = as.matrix(train), label = as.matrix(out_train), num_parallel_tree=num_par, nrounds = iter_bos, max_depth= depth, min_child_weight=weight, subsample=0.9, eta= 0.1, verbose = 0)
pred_meta[,nm] <- predict(meta_model, as.matrix(new_test))
}
end_pred <- rowMeans(pred_meta)
return(end_pred)
}