I would like to use LASSO to train the dataset below, using 10-fold CV repeated 5 times for each method.
This is my current code, but I get an error message and I think I'm doing it wrong. Please let me know how it should be changed!
Error message:
Error in if (nulldev == 0) stop("y is constant; gaussian glmnet fails at standardization step") : missing value where TRUE/FALSE needed
Code:
set.seed(150847)
model.mat <- model.matrix( Class ~ ., train.dat)
x.train <- model.mat [, -1]
model.mat <- model.matrix( Class ~ ., test.dat)
x.test <- model.mat [, -1] #
dv = "Class"
y.train <- as.matrix(train.dat[, dv])
y.test <- as.matrix(test.dat[, dv])
#-----------------------------------------
train.means <- colMeans(x.train)
train.sd <- apply(x.train, 2, sd, na.rm = TRUE)
x.train <- as.matrix( scale(x.train, center=TRUE, scale=TRUE))
x.test <- scale(x.test, center = train.means, scale = train.sd)
tr.Control <- trainControl(
method = "repeatedcv",
number = 10,
repeats = 5)
folds.k = 10
cv.lasso = cv.glmnet (x.train,y.train,alpha=1, nfolds=folds.k,
standardize = FALSE , intercept=TRUE)
Training data "train.dat"
structure(list(x1 = c(-2.48941991263215, -0.338448010439568,
-1.07796826066294, 1.47833943928667, -0.19013864138727), x2 = c(-1.05660014431803,
-1.75938416652951, -1.94445363537753, 2.65603302304451, -0.818464313993987
), x3 = c(-0.928819609794076, -0.24431689960579, -0.26055539595143,
-0.500006066823682, 0.19947842697796), x4 = c(0.167674885884102,
-0.714651010370962, 0.501841366660604, -0.261356553409404, -0.121081806911108
), x5 = c(0.826293680351228, -0.0522530856542289, 0.456970179919153,
-0.483860304113477, 0.827117071952671), x6 = c(0.229410925647244,
0.367363323224708, 0.0097867208532989, 0.6599692159798, 0.454895325470716
), x7 = c(0.277445634594187, 0.00411403737962246, 0.912381467409432,
0.0911673668306321, 0.0729619956109673), x8 = c(0.403632419444111,
-1.76177968998027, 0.818339220424296, 0.77257524859948, -1.45634200383022
), x9 = c(0.666298305218494, 1.28068782733132, 0.243489971387096,
0.00907678612957343, 0.0688231437305274), x10 = c(-0.674113519037765,
-0.221583500325269, 0.555570222138564, 0.572105515491289, 2.32224808146226
), x11 = c(-0.503906052691753, -0.170463238913734, 1.81239693119702,
-0.310259330876175, 0.373355276436323), x12 = c(0.569346066655445,
0.665270271264321, -1.04590277174209, -1.08749423169221, -0.717326819631265
), Class = c("No", "Yes", "Yes", "No", "Yes")), row.names = c(NA,
5L), class = "data.frame")
test data "test.dat"
structure(list(x1 = c(-1.64667008195797, -1.12098964581992, 0.473422701448559,
-1.60461690923768, -0.00749172927415004), x2 = c(-1.16286992117132,
-0.141705544905757, 1.51853911670816, -0.424087214057948, 0.377124786278201
), x3 = c(1.19241045039945, 1.48443779149667, -1.88482327525843,
0.534626743634202, 0.310818572560298), x4 = c(-0.930735300584522,
-1.7670393982441, -1.14191107118164, 0.61126176594059, 0.155931701957036
), x5 = c(-0.820323897991329, -0.926557129248977, 0.965568253770471,
-0.599971735384315, -0.512967912014574), x6 = c(0.29261250467971,
0.269901459803805, 0.9167238867376, 0.000339579302817583, 0.398331164848059
), x7 = c(0.262839384144172, 0.246979274321347, 0.105181680992246,
0.170856263954192, 0.310768554685637), x8 = c(-0.547301867028384,
0.888728318998235, 0.289208399599502, -1.37035914659536, -1.25498394079555
), x9 = c(1.91475599789737, -0.663301448358402, 0.73492523418078,
0.252558835925375, -1.13201069045815), x10 = c(-1.56005878668401,
0.47042681670553, -0.294760033296374, -3.26819220678081, -0.921147419029862
), x11 = c(0.287604446919617, -0.161270837465456, -2.23402479016399,
-2.25198777628389, -0.61977925827879), x12 = c(1.21464666058049,
-0.752948562276805, -1.03149583160279, 0.618987295189923, 0.274782272114187
), Class = c("No", "No", "No", "Yes", "Yes")), row.names = c(NA,
5L), class = "data.frame")