0

I tried the answers posted in response to this question, but the error did not change. I am trying to preprocess both the training and test sets in the same way. They come from two different files and I am not sure if my instructor would be alright with me mixing up the sets so preprocessing before splitting them is not really an option. Why does the predict line work the first time with the training set but not with the test set? the two dataframes should be identical in their structure except for the individual values in the rows and the total number of rows.

##### Load libraries #####
library(readr)
library(caret)

###### Read in data ######

training = read_csv("~/Machine Learning 2/M1/buad5132-m1-training-data.csv")
test = read_csv("~/Machine Learning 2/M1/buad5132-m1-test-data.csv")

##### Preprocessing #####

### Change column classes 

#Training
training$INDEX = as.factor(training$INDEX)
training$TARGET_FLAG = as.factor(training$TARGET_FLAG)
training$PARENT1 = as.factor(training$PARENT1)
training$MSTATUS = as.factor(training$MSTATUS)
training$SEX = as.factor(training$SEX)
training$EDUCATION = as.factor(ifelse(training$EDUCATION == "<High School", "Less than High School", training$EDUCATION))
training$JOB = as.factor(training$JOB)
training$CAR_USE = as.factor(training$CAR_USE)
training$CAR_TYPE = as.factor(training$CAR_TYPE)
training$RED_CAR = as.factor(training$RED_CAR)
training$REVOKED = as.factor(training$REVOKED)
training$INCOME = suppressWarnings(as.numeric(gsub("[^0-9.]", "", training$INCOME)))
training$HOME_VAL = suppressWarnings(as.numeric(gsub("[^0-9.]", "", training$HOME_VAL)))
training$OLDCLAIM = suppressWarnings(as.numeric(gsub("[^0-9.]", "", training$HOME_VAL)))
training$BLUEBOOK = suppressWarnings(as.numeric(gsub("[^0-9.]", "", training$BLUEBOOK)))
training$URBANICITY = ifelse(grepl("Urban", training$URBANICITY), "Urban", "Rural")
training$URBANICITY = as.factor(training$URBANICITY)

#Test
test$INDEX = as.factor(test$INDEX)
test$TARGET_FLAG = as.factor(test$TARGET_FLAG)
test$PARENT1 = as.factor(test$PARENT1)
test$MSTATUS = as.factor(test$MSTATUS)
test$SEX = as.factor(test$SEX)
test$EDUCATION = as.factor(ifelse(test$EDUCATION == "<High School", "Less than High School", test$EDUCATION))
test$JOB = as.factor(test$JOB)
test$CAR_USE = as.factor(test$CAR_USE)
test$CAR_TYPE = as.factor(test$CAR_TYPE)
test$RED_CAR = as.factor(test$RED_CAR)
test$REVOKED = as.factor(test$REVOKED)
test$INCOME = suppressWarnings(as.numeric(gsub("[^0-9.]", "", test$INCOME)))
test$HOME_VAL = suppressWarnings(as.numeric(gsub("[^0-9.]", "", test$HOME_VAL)))
test$OLDCLAIM = suppressWarnings(as.numeric(gsub("[^0-9.]", "", test$HOME_VAL)))
test$BLUEBOOK = suppressWarnings(as.numeric(gsub("[^0-9.]", "", test$BLUEBOOK)))
test$URBANICITY = ifelse(grepl("Urban", test$URBANICITY), "Urban", "Rural")
test$URBANICITY = as.factor(test$URBANICITY)

### Dummy variables

#Training
trainDmyParams = dummyVars(~., training[,-c(1,2)])
training.dmy = as.data.frame(predict(trainDmyParams, training[,-c(1,2)]))
training.dmy$TARGET_FLAG = training$TARGET_FLAG 
names(training.dmy) = make.names(names(training.dmy))

#Test
testDmyParams = dummyVars(~., test[,-c(1,2)])
test.dmy = as.data.frame(predict(testDmyParams, test[,-c(1,2)]))
test.dmy$TARGET_FLAG = test$TARGET_FLAG
names(test.dmy) = make.names(names(test.dmy))

### Standardization and imputation

#Training
preProcessTrain = preProcess(training.dmy, method = c("center", "scale", "bagImpute"))
training.prepped = predict(preProcessTrain, training.dmy)

#Test
preProcessTest = preProcess(test.dmy, method = c("center", "scale", "bagImpute"))    
test.prepped = predict(preProcessTest, test.dmy)    # <--- error occurs on this line

Error in UseMethod("predict") : no applicable method for 'predict' applied to an object of class "NULL"
StupidWolf
  • 45,075
  • 17
  • 40
  • 72
Jacob Myer
  • 479
  • 5
  • 22
  • Does this answer your question? [UseMethod("predict") : no applicable method for 'predict' applied to an object of class "train"](https://stackoverflow.com/questions/38623624/usemethodpredict-no-applicable-method-for-predict-applied-to-an-object-o) – Nareman Darwish Jan 11 '20 at 17:02
  • What is the output of `str(preProcessTest)`? – duckmayr Jan 11 '20 at 17:36
  • @Nareman Darwish I have tried `library(caret)` before preProcessing the test data. This did not change the error. I could be wrong but I believe that `bagImpute` belongs to the caret package. That thread also made me wonder why preprocessing the training set worked without error. – Jacob Myer Jan 11 '20 at 17:48

0 Answers0