[I looked into similar threads here and in github, and none of the issues suggested by Max and others seem to relate to my case.]
I have seen some here reporting about formula interface failing whereas non-formula interface working fine for them. My problem is the opposite. Thetrain()
function below with formula interface works perfect:
glmTune <- train(class ~ .,
data = trainData,
method = "glmnet",
trControl = train.control,
tuneGrid = tune.grid)
This one below gives NA errors:
predictors <- trainData[, !(names(trainData) %in% "class")]
response <- trainData$class
glmTune <- train(x = predictors,
y = response,
method = "glmnet",
trControl = train.control,
tuneGrid = tune.grid)
This happens with bothglmnet
andxgboost,
and regardless of whethery
is factor or numeric, butx
has lot of factor variables. Thanks for any help.
Wanted to add, the error for factory
is this:
Something is wrong; all the Accuracy metric values are missing:
Accuracy Kappa
Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA
Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA
NA's :243 NA's :243
Error: Stopping
In addition: Warning message:
In nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
There were missing values in resampled performance measures.
And for numericaly
it is only slightly different (different performance metric):
Something is wrong; all the RMSE metric values are missing:
RMSE Rsquared
Min. : NA Min. : NA
1st Qu.: NA 1st Qu.: NA
Median : NA Median : NA
Mean :NaN Mean :NaN
3rd Qu.: NA 3rd Qu.: NA
Max. : NA Max. : NA
NA's :100 NA's :100
Error: Stopping
In addition: Warning message:
In nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
There were missing values in resampled performance measures.
Here is the code:
library(caret)
library(dplyr)
library(glmnet)
# see dput(droplevels(head(df, 20))) output of data below:
# 70%/30% split
set.seed(42)
inTrain <- createDataPartition(df$lnprice, p=0.7, list=F)
trainData <- df[inTrain, ]
testData <- df[-inTrain, ]
# train model
train.control <- trainControl(method = "repeatedcv",
number = 10,
repeats= 5,
allowParallel = F)
tune.grid <- expand.grid(lambda = seq(0.0001,0.1,length=20),
alpha = c(0, 0.5, 1))
X <- trainData[, !(names(trainData) %in% "lnprice")]
Y <- trainData$lnprice
fit <- train(
# x = X, y = Y, # non-formula
lnprice ~ ., data = trainData, # formula
method = "glmnet",
preProcess = c("zv", "center", "scale"),
tuneGrid = tune.grid,
trControl = train.control)
# plot model
print(plot(fit))
> dput(droplevels(head(df,20)))
structure(list(fuel.type = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "gas", class = "factor"),
aspiration = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("std",
"turbo"), class = "factor"), num.of.doors = structure(c(2L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L,
2L, 1L, 2L, 2L), .Label = c("four", "two"), class = "factor"),
body.style = structure(c(1L, 1L, 2L, 3L, 3L, 3L, 3L, 4L,
3L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L), .Label = c("convertible",
"hatchback", "sedan", "wagon"), class = "factor"), drive.wheels = structure(c(2L,
2L, 2L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L), .Label = c("fwd", "rwd", "X4wd"), class = "factor"),
engine.location = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "front", class = "factor"),
wheel.base = c(88.6, 88.6, 94.5, 99.8, 99.4, 99.8, 105.8,
105.8, 105.8, 99.5, 101.2, 101.2, 101.2, 101.2, 103.5, 103.5,
103.5, 110, 88.4, 94.5), length = c(168.8, 168.8, 171.2,
176.6, 176.6, 177.3, 192.7, 192.7, 192.7, 178.2, 176.8, 176.8,
176.8, 176.8, 189, 189, 193.8, 197, 141.1, 155.9), width = c(64.1,
64.1, 65.5, 66.2, 66.4, 66.3, 71.4, 71.4, 71.4, 67.9, 64.8,
64.8, 64.8, 64.8, 66.9, 66.9, 67.9, 70.9, 60.3, 63.6), height = c(48.8,
48.8, 52.4, 54.3, 54.3, 53.1, 55.7, 55.7, 55.9, 52, 54.3,
54.3, 54.3, 54.3, 55.7, 55.7, 53.7, 56.3, 53.2, 52), curb.weight = c(2548L,
2548L, 2823L, 2337L, 2824L, 2507L, 2844L, 2954L, 3086L, 3053L,
2395L, 2395L, 2710L, 2765L, 3055L, 3230L, 3380L, 3505L, 1488L,
1874L), engine.type = structure(c(1L, 1L, 4L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L), .Label = c("dohc",
"l", "ohc", "ohcv"), class = "factor"), num.of.cylinders = structure(c(2L,
2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L), .Label = c("five.six", "four.or.less"), class = "factor"),
engine.size = c(130L, 130L, 152L, 109L, 136L, 136L, 136L,
136L, 131L, 131L, 108L, 108L, 164L, 164L, 164L, 209L, 209L,
209L, 61L, 90L), fuel.system = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L), .Label = c("mpfi", "X2bbl"), class = "factor"), bore = c(3.47,
3.47, 2.68, 3.19, 3.19, 3.19, 3.19, 3.19, 3.13, 3.13, 3.5,
3.5, 3.31, 3.31, 3.31, 3.62, 3.62, 3.62, 2.91, 3.03), stroke = c(2.68,
2.68, 3.47, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 3.4, 2.8, 2.8,
3.19, 3.19, 3.19, 3.39, 3.39, 3.39, 3.03, 3.11), compression.ratio = c(9,
9, 9, 10, 8, 8.5, 8.5, 8.5, 8.3, 7, 8.8, 8.8, 9, 9, 9, 8,
8, 8, 9.5, 9.6), horsepower = c(111, 111, 154, 102, 115,
110, 110, 110, 140, 160, 101, 101, 121, 121, 121, 182, 182,
182, 48, 70), peak.rpm = c(5000L, 5000L, 5000L, 5500L, 5500L,
5500L, 5500L, 5500L, 5500L, 5500L, 5800L, 5800L, 4250L, 4250L,
4250L, 5400L, 5400L, 5400L, 5100L, 5400L), city.mpg = c(21L,
21L, 19L, 24L, 18L, 19L, 19L, 19L, 17L, 16L, 23L, 23L, 21L,
21L, 20L, 16L, 16L, 15L, 47L, 38L), highway.mpg = c(27L,
27L, 26L, 30L, 22L, 25L, 25L, 25L, 20L, 22L, 29L, 29L, 28L,
28L, 25L, 22L, 22L, 20L, 53L, 43L), make = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L), .Label = c("alfa.romero", "audi", "bmw",
"chevrolet"), class = "factor"), lnprice = c(9.5101, 9.7111,
9.7111, 9.5432, 9.7671, 9.6323, 9.7819, 9.848, 10.0806, 9.69176,
9.7069, 9.7365, 9.9508, 9.9573, 10.1091, 10.334, 10.629,
10.5154, 8.5469, 8.7475)), .Names = c("fuel.type", "aspiration",
"num.of.doors", "body.style", "drive.wheels", "engine.location",
"wheel.base", "length", "width", "height", "curb.weight", "engine.type",
"num.of.cylinders", "engine.size", "fuel.system", "bore", "stroke",
"compression.ratio", "horsepower", "peak.rpm", "city.mpg", "highway.mpg",
"make", "lnprice"), row.names = c(NA, 20L), class = "data.frame")