I am doing feature selection using LASSO regression with tidymodels
and glmnet
.
It is possible to force variables in glmnet
by using the penalty.factors
argument (see here and here, for example).
Is it possible to do the same using tidymodels
?
library(tidymodels)
library(vip)
library(forcats)
library(dplyr)
library(ggplot2)
library(data.table)
# Define data split
datasplit = rsample::initial_split(mtcars, prop=0.8)
data_training = rsample::training(datasplit)
data_testing = rsample::testing(datasplit)
# Model specifications - should penalty.factors go here?
model_spec = parsnip::linear_reg(penalty = tune::tune(),
mixture = 1) %>%
parsnip::set_engine("glmnet")
# Model recipe
rec = recipe(mpg ~ ., mtcars)
# Model workflow
wf = workflows::workflow() %>%
workflows::add_recipe(rec) %>%
workflows::add_model(model_spec)
# Resampling
data_resample = rsample::vfold_cv(data_training,
repeats = 3,
v = 2)
hyperparam_grid = dials::grid_regular(dials::penalty(),
levels = 100)
# Define metrics
metrics = yardstick::metric_set(yardstick::rsq,
yardstick::mape,
yardstick::mpe)
# Tune the model
tune_grid_results = tune::tune_grid(
wf,
resamples = data_resample,
grid = hyperparam_grid,
metrics = metrics
)
# Collect and finalise best model
selected_model = tune_grid_results %>%
tune::select_best("mape")
final_model = tune::finalize_workflow(wf, selected_model)
final_model_fit = final_model %>%
parsnip::fit(data_training) %>%
workflows::extract_fit_parsnip()
# Plot variables importance
t_importance = final_model_fit %>%
vip::vi(lambda = selected_model$penalty) %>%
dplyr::mutate(
Importance = Importance,
Variable = forcats::fct_reorder(Variable, Importance)
) %>%
data.table() %>%
setorder( - Importance)
t_importance %>%
ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
geom_col() +
scale_x_continuous(expand = c(0, 0)) +
labs(y = NULL) +
theme_minimal()
Created on 2022-03-14 by the reprex package (v2.0.1)