I am attempting to use the tidymodels stacks package to perform ensemble modeling. Following the instructions provided in their article, I was able to reproduce the example successfully.
However, when I added parallelization during hyperparameter tuning for the "knn_res" section of the code:
library(doParallel)
library(parallel)
set.seed(2020)
cls <- makePSOCKcluster(parallelly::availableCores())
registerDoParallel(cls)
knn_res <-
tune_grid(
knn_wflow,
resamples = folds,
metrics = metric,
grid = 4,
control = ctrl_grid
)
stopCluster(cls)
I encountered an error when running the "tree_frogs_model_st" section of the code:
tree_frogs_model_st <-
tree_frogs_data_st %>%
blend_predictions()
The error message states:
Error in summary.connection(connection) : invalid connection
I believe this issue may be related to the stacks::control_stack_grid() function, but I am unsure of how to resolve it. Please advice.
UPDATE (full reprex)
I excluded the linear model for brevity.
library(tidymodels)
library(stacks)
data("tree_frogs")
# subset the data
tree_frogs <- tree_frogs %>%
filter(!is.na(latency)) %>%
select(-c(clutch, hatched))
# some setup: resampling and a basic recipe
set.seed(1)
tree_frogs_split <- initial_split(tree_frogs)
tree_frogs_train <- training(tree_frogs_split)
tree_frogs_test <- testing(tree_frogs_split)
set.seed(1)
folds <- rsample::vfold_cv(tree_frogs_train, v = 5)
tree_frogs_rec <-
recipe(latency ~ ., data = tree_frogs_train)
metric <- metric_set(rmse)
ctrl_grid <- control_stack_grid()
ctrl_res <- control_stack_resamples()
# create a model definition
knn_spec <-
nearest_neighbor(
mode = "regression",
neighbors = tune("k")
) %>%
set_engine("kknn")
knn_spec
#> K-Nearest Neighbor Model Specification (regression)
#>
#> Main Arguments:
#> neighbors = tune("k")
#>
#> Computational engine: kknn
knn_rec <-
tree_frogs_rec %>%
step_dummy(all_nominal_predictors()) %>%
step_zv(all_predictors()) %>%
step_impute_mean(all_numeric_predictors()) %>%
step_normalize(all_numeric_predictors())
knn_rec
#> Recipe
#>
#> Inputs:
#>
#> role #variables
#> outcome 1
#> predictor 4
#>
#> Operations:
#>
#> Dummy variables from all_nominal_predictors()
#> Zero variance filter on all_predictors()
#> Mean imputation for all_numeric_predictors()
#> Centering and scaling for all_numeric_predictors()
knn_wflow <-
workflow() %>%
add_model(knn_spec) %>%
add_recipe(knn_rec)
knn_wflow
#> ══ Workflow ════════════════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: nearest_neighbor()
#>
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 4 Recipe Steps
#>
#> • step_dummy()
#> • step_zv()
#> • step_impute_mean()
#> • step_normalize()
#>
#> ── Model ───────────────────────────────────────────────────────────────────────
#> K-Nearest Neighbor Model Specification (regression)
#>
#> Main Arguments:
#> neighbors = tune("k")
#>
#> Computational engine: kknn
library(doParallel)
#> Loading required package: foreach
#>
#> Attaching package: 'foreach'
#> The following objects are masked from 'package:purrr':
#>
#> accumulate, when
#> Loading required package: iterators
#> Loading required package: parallel
library(parallel)
set.seed(2020)
cls <- makePSOCKcluster(parallelly::availableCores())
registerDoParallel(cls)
knn_res <-
tune_grid(
knn_wflow,
resamples = folds,
metrics = metric,
grid = 4,
control = ctrl_grid
)
stopCluster(cls)
knn_res
#> # Tuning results
#> # 5-fold cross-validation
#> # A tibble: 5 × 5
#> splits id .metrics .notes .predictions
#> <list> <chr> <list> <list> <list>
#> 1 <split [343/86]> Fold1 <tibble [4 × 5]> <tibble [0 × 3]> <tibble [344 × 5]>
#> 2 <split [343/86]> Fold2 <tibble [4 × 5]> <tibble [0 × 3]> <tibble [344 × 5]>
#> 3 <split [343/86]> Fold3 <tibble [4 × 5]> <tibble [0 × 3]> <tibble [344 × 5]>
#> 4 <split [343/86]> Fold4 <tibble [4 × 5]> <tibble [0 × 3]> <tibble [344 × 5]>
#> 5 <split [344/85]> Fold5 <tibble [4 × 5]> <tibble [0 × 3]> <tibble [340 × 5]>
# create a model definition -----
svm_spec <-
svm_rbf(
cost = tune("cost"),
rbf_sigma = tune("sigma")
) %>%
set_engine("kernlab") %>%
set_mode("regression")
# extend the recipe
svm_rec <-
tree_frogs_rec %>%
step_dummy(all_nominal_predictors()) %>%
step_zv(all_predictors()) %>%
step_impute_mean(all_numeric_predictors()) %>%
step_corr(all_predictors()) %>%
step_normalize(all_numeric_predictors())
# add both to a workflow
svm_wflow <-
workflow() %>%
add_model(svm_spec) %>%
add_recipe(svm_rec)
# tune cost and sigma and fit to the 5-fold cv
set.seed(2020)
cls <- makePSOCKcluster(parallelly::availableCores())
registerDoParallel(cls)
svm_res <-
tune_grid(
svm_wflow,
resamples = folds,
grid = 6,
metrics = metric,
control = ctrl_grid
)
stopCluster(cls)
svm_res
#> # Tuning results
#> # 5-fold cross-validation
#> # A tibble: 5 × 5
#> splits id .metrics .notes .predictions
#> <list> <chr> <list> <list> <list>
#> 1 <split [343/86]> Fold1 <tibble [6 × 6]> <tibble [0 × 3]> <tibble [516 × 6]>
#> 2 <split [343/86]> Fold2 <tibble [6 × 6]> <tibble [0 × 3]> <tibble [516 × 6]>
#> 3 <split [343/86]> Fold3 <tibble [6 × 6]> <tibble [0 × 3]> <tibble [516 × 6]>
#> 4 <split [343/86]> Fold4 <tibble [6 × 6]> <tibble [0 × 3]> <tibble [516 × 6]>
#> 5 <split [344/85]> Fold5 <tibble [6 × 6]> <tibble [0 × 3]> <tibble [510 × 6]>
tree_frogs_data_st <-
stacks() %>%
add_candidates(knn_res) %>%
add_candidates(svm_res)
tree_frogs_data_st
#> # A data stack with 2 model definitions and 10 candidate members:
#> # knn_res: 4 model configurations
#> # svm_res: 6 model configurations
#> # Outcome: latency (numeric)
tree_frogs_model_st <-
tree_frogs_data_st %>%
blend_predictions()
#> Error in summary.connection(connection): invalid connection
tree_frogs_model_st
#> Error in eval(expr, envir, enclos): object 'tree_frogs_model_st' not found
Created on 2023-01-27 by the reprex package (v2.0.1)
Session infosessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.1.2 (2021-11-01)
#> os Ubuntu 18.04.6 LTS
#> system x86_64, linux-gnu
#> ui X11
#> language (EN)
#> collate C.UTF-8
#> ctype C.UTF-8
#> tz Asia/Tokyo
#> date 2023-01-27
#> pandoc 2.14.0.3 @ /usr/lib/rstudio-server/bin/pandoc/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.1.2)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.1.2)
#> broom * 1.0.1 2022-08-29 [1] CRAN (R 4.1.2)
#> butcher 0.1.5 2021-06-28 [1] CRAN (R 4.1.2)
#> class 7.3-19 2021-05-03 [4] CRAN (R 4.0.5)
#> cli 3.6.0 2023-01-09 [1] CRAN (R 4.1.2)
#> codetools 0.2-18 2020-11-04 [4] CRAN (R 4.0.3)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.1.2)
#> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.1.2)
#> DBI 1.1.2 2021-12-20 [1] CRAN (R 4.1.2)
#> dials * 1.1.0 2022-11-04 [1] CRAN (R 4.1.2)
#> DiceDesign 1.9 2021-02-13 [1] CRAN (R 4.1.2)
#> digest 0.6.29 2021-12-01 [2] CRAN (R 4.1.2)
#> doParallel * 1.0.17 2022-02-07 [1] CRAN (R 4.1.2)
#> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.1.2)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.1.2)
#> evaluate 0.15 2022-02-18 [1] CRAN (R 4.1.2)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.1.2)
#> fastmap 1.1.0 2021-01-25 [2] CRAN (R 4.1.2)
#> foreach * 1.5.2 2022-02-02 [1] CRAN (R 4.1.2)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.1.2)
#> furrr 0.3.1 2022-08-15 [1] CRAN (R 4.1.2)
#> future 1.25.0 2022-04-24 [1] CRAN (R 4.1.2)
#> future.apply 1.9.0 2022-04-25 [1] CRAN (R 4.1.2)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.1.2)
#> ggplot2 * 3.4.0 2022-11-04 [1] CRAN (R 4.1.2)
#> glmnet 4.1-4 2022-04-15 [1] CRAN (R 4.1.2)
#> globals 0.15.0 2022-05-09 [1] CRAN (R 4.1.2)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.2)
#> gower 1.0.0 2022-02-03 [1] CRAN (R 4.1.2)
#> GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.1.2)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.1.2)
#> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.1.2)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.1.2)
#> htmltools 0.5.2 2021-08-25 [2] CRAN (R 4.1.2)
#> igraph 1.3.1 2022-04-20 [1] CRAN (R 4.1.2)
#> infer * 1.0.0 2021-08-13 [1] CRAN (R 4.1.2)
#> ipred 0.9-12 2021-09-15 [1] CRAN (R 4.1.2)
#> iterators * 1.0.14 2022-02-05 [1] CRAN (R 4.1.2)
#> kernlab 0.9-30 2022-04-02 [1] CRAN (R 4.1.2)
#> kknn 1.3.1 2016-03-26 [1] CRAN (R 4.1.2)
#> knitr 1.38 2022-03-25 [1] CRAN (R 4.1.2)
#> lattice 0.20-45 2021-09-22 [4] CRAN (R 4.1.1)
#> lava 1.6.10 2021-09-02 [1] CRAN (R 4.1.2)
#> lhs 1.1.5 2022-03-22 [1] CRAN (R 4.1.2)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.1.2)
#> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.1.2)
#> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.1.2)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.1.2)
#> MASS 7.3-54 2021-05-03 [4] CRAN (R 4.0.5)
#> Matrix 1.3-4 2021-06-01 [4] CRAN (R 4.1.0)
#> modeldata * 0.1.1 2021-07-14 [1] CRAN (R 4.1.2)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.1.2)
#> nnet 7.3-16 2021-05-03 [4] CRAN (R 4.0.5)
#> parallelly 1.31.1 2022-04-22 [1] CRAN (R 4.1.2)
#> parsnip * 1.0.3 2022-11-11 [1] CRAN (R 4.1.2)
#> pillar 1.7.0 2022-02-01 [1] CRAN (R 4.1.2)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.2)
#> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.1.2)
#> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.1.2)
#> R.cache 0.15.0 2021-04-30 [1] CRAN (R 4.1.2)
#> R.methodsS3 1.8.1 2020-08-26 [1] CRAN (R 4.1.2)
#> R.oo 1.24.0 2020-08-26 [1] CRAN (R 4.1.2)
#> R.utils 2.11.0 2021-09-26 [1] CRAN (R 4.1.2)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.2)
#> Rcpp 1.0.10 2023-01-22 [1] CRAN (R 4.1.2)
#> recipes * 1.0.3 2022-11-09 [1] CRAN (R 4.1.2)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.1.2)
#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.1.2)
#> rmarkdown 2.13 2022-03-10 [1] CRAN (R 4.1.2)
#> rpart 4.1-15 2019-04-12 [4] CRAN (R 4.0.0)
#> rsample * 1.1.1 2022-12-07 [1] CRAN (R 4.1.2)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.2)
#> scales * 1.2.0 2022-04-13 [1] CRAN (R 4.1.2)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.2)
#> shape 1.4.6 2021-05-19 [1] CRAN (R 4.1.2)
#> stacks * 1.0.1 2022-12-14 [1] CRAN (R 4.1.2)
#> stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.1.2)
#> styler 1.7.0 2022-03-13 [1] CRAN (R 4.1.2)
#> survival 3.2-13 2021-08-24 [4] CRAN (R 4.1.1)
#> tibble * 3.1.7 2022-05-03 [1] CRAN (R 4.1.2)
#> tidymodels * 0.2.0 2022-03-19 [1] CRAN (R 4.1.2)
#> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.1.2)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.1.2)
#> timeDate 3043.102 2018-02-21 [1] CRAN (R 4.1.2)
#> tune * 1.0.1 2022-10-09 [1] CRAN (R 4.1.2)
#> usethis 2.1.5 2021-12-09 [1] CRAN (R 4.1.2)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.2)
#> vctrs 0.5.1 2022-11-16 [1] CRAN (R 4.1.2)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.2)
#> workflows * 1.1.2 2022-11-16 [1] CRAN (R 4.1.2)
#> workflowsets * 0.2.1 2022-03-15 [1] CRAN (R 4.1.2)
#> xfun 0.31 2022-05-10 [1] CRAN (R 4.1.2)
#> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.1.2)
#> yardstick * 1.1.0 2022-09-07 [1] CRAN (R 4.1.2)
#>
#> [1] /home/ubuntu/R/x86_64-pc-linux-gnu-library/4.1
#> [2] /usr/local/lib/R/site-library
#> [3] /usr/lib/R/site-library
#> [4] /usr/lib/R/library
#>
#> ──────────────────────────────────────────────────────────────────────────────