1

I am performing stack ensembling and I am continuously getting error at -ncol(test) as invalid argument to unary operator. I tried all the possible ways in google but nothing worked.

#----- Stacking -----#
stacking_et0 <- function(train, test, nmeta_mdls=1){
  nmdls <- 3
  test <- test[,-ncol(test)]
  out_train <- train[,ncol(train)]
  train <- train[,-ncol(train)]

Full Code:


#------ On the suitability of stacking-based ensembles in smart agriculture for evapotranspiration prediction ------#
#===================================================================================================================#
#----- libraries -----#
library(gbm)
library(e1071)
library(xgboost)
library(randomForest)
library(caTools)
dataset<-read.csv("aln.csv")

split_data <- function(dataset, train = TRUE){
  length<- nrow(dataset)
  total_row <- length *0.8
  split <- 1:total_row
  if (train ==TRUE){ 
    train_df <- dataset[split, ] 
    return(train_df)        
  } else {
    test_df <- dataset[-split, ] 
    return(test_df)     
  }
}
train <- split_data(dataset, train = TRUE)
test <- split_data(dataset, train = FALSE)


ans <- stacking_et0(train, test)
View(ans)

#----- Stacking -----#
stacking_et0 <- function(train, test, nmeta_mdls=1){
  nmdls <- 3
  test <- test[,-ncol(test)]
  out_train <- train[,ncol(train)]
  train <- train[,-ncol(train)]
  
  #==========================================================#
  #----- First-level Algorithms (RF, SVM, GBM, XGBoost) -----#
  #==========================================================#
  pd_train <- matrix(0,nrow(train),12)
  pd_test <- matrix(0,nrow(test),12)
  
  train_with_out <- cbind(train, out_train)
  frm <- as.formula(paste(names(train_with_out)[ncol(train_with_out)],"~.",sep = ""))
  
  for (i in 1:nmdls) {
    #=== RandomForest ===#
    trees <- sample(350:600,1)
    node <- sample(5:15,1)
    
    model_rdm <- randomForest(frm, data=train_with_out, ntree=trees, nodesize = node)
    pd_train[,i] <- predict(model_rdm, train)
    pd_test[,i] <- predict(model_rdm, test)
    
    #======= SVM ========#
    tole <- round(runif(1, 0.0001,0.01), 4)
    reg <- sample(2:14,1)
    
    model_svm <- e1071::svm(x = train, y = out_train, tolerance=tole, cost= reg, scale = TRUE, type = "eps-regression", kernel = "radial")
    pd_train[,i+3] <- predict(model_svm, train)
    pd_test[,i+3] <- predict(model_svm, test)
    
    #======= GBM ========#
    trees <- sample(550:650,1)
    
    model_gbm <- gbm::gbm(formula = frm,  data = train_with_out, n.trees = trees, interaction.depth = 3, bag.fraction = 1, distribution = "gaussian")
    pd_train[,i+6] <- predict.gbm(model_gbm, train, n.trees = trees)
    pd_test[,i+6] <- predict.gbm(model_gbm, test, n.trees = trees)
    
    #===== XGBoost ======#
    num_par <- sample(50:200,1)
    iter_bos <- sample(40:50,1)
    depth <- sample(3:5,1)
    weight <- sample(seq(5,7,0.1),1)
    
    model_xgb <- xgboost(data = as.matrix(train), label = as.matrix(out_train), num_parallel_tree=num_par, nrounds = iter_bos, max_depth= depth, min_child_weight=weight, subsample=0.9, eta= 0.1, verbose = 0)
    pd_train[,i+9] <- predict(model_xgb, as.matrix(train))
    pd_test[,i+9] <- predict(model_xgb, as.matrix(test))
  }
  
  new_train <- cbind(train,pd_train)
  new_test <- cbind(test,pd_test)
  
  #==========================================================#
  #------------ Second-level Algorithm (XGBoost) ------------#
  #==========================================================#
  pred_meta <- matrix(0,nrow = nrow(new_test), ncol = nmeta_mdls)
  for(nm in 1:nmeta_mdls){
    num_par <- sample(50:200,1)
    iter_bos <- sample(40:50,1)
    depth <- sample(3:5,1)
    weight <- sample(seq(5,7,0.1),1)
    
    meta_model <- xgboost(data = as.matrix(train), label = as.matrix(out_train), num_parallel_tree=num_par, nrounds = iter_bos, max_depth= depth, min_child_weight=weight, subsample=0.9, eta= 0.1, verbose = 0)
    pred_meta[,nm] <- predict(meta_model, as.matrix(new_test))
  }
  
  end_pred <- rowMeans(pred_meta)
  return(end_pred)
  
}  
camille
  • 16,432
  • 18
  • 38
  • 60
  • HI, welcome to SO, you should provide a [minimal reproductible example](https://stackoverflow.com/questions/5963269/how-to-make-a-great-r-reproducible-example) with a simple dataset so we could help you find the problem. You probably have a problem of type (see [here for example, similar error](https://stackoverflow.com/questions/61624076/error-in-title-invalid-argument-to-unary-operator-execution-halted)). What is the result of `ncol(test)` if you execute it? – julien.leroux5 Oct 26 '21 at 09:35

1 Answers1

1

The error "Error in -ncol(test) : invalid argument to unary operator" indicates that you cannot use - whatever ncol is returning.

If test is a data frame, matrix or array, ncol will return an integer and - that will make sense.

I suspect in your case, that whatever test you are passing into the function is not one of the above, in which case ncol(test) returns NULL. And -NULL is non-sensible:

> -NULL
Error in -NULL : invalid argument to unary operator
MrGumble
  • 5,631
  • 1
  • 18
  • 33