1

I would like to use tidymodels to fit a C5.0 rule-based classification model. I have specified the model as follows

c5_spec <- 
  C5_rules() %>% 
  set_engine("C5.0") %>% 
  set_mode("classification")

In the documentation for the C5_rules() command, I read the following.

The model is not trained or fit until the fit.model_spec() function is used with the data.

I'm not quite sure what I need to do with the parsnip model object after that. Every time I try to fit the model, I get the following error

preprocessor 1/1, model 1/1 (predictions): Error in predict.C5.0(object = object$fit, newdata = new_data, type = "class"): either a tree or rules must be provided

What am I missing?

Thank you very much!

cyn
  • 11
  • 1

2 Answers2

4

That's a good start! You've defined your model spec, but if you're wanting to fit using a workflow, you'll need to create a recipe & workflow as well. Julia Silge's blog is hands down the best resource for getting used to working with tidymodels. Here's a reprex that fits a C5 classifier once to training data:

# load tidymodels & rules
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#>   method                   from   
#>   required_pkgs.model_spec parsnip
library(rules)
#> Warning: package 'rules' was built under R version 4.1.1
#> 
#> Attaching package: 'rules'
#> The following object is masked from 'package:dials':
#> 
#>     max_rules

# example training dataset
cars_train <- as_tibble(mtcars)

# change the number of cylinders to character for predicting as a class
cars_train <- 
  cars_train %>%
  mutate(cyl = as.character(cyl))

# training df
cars_train
#> # A tibble: 32 x 11
#>      mpg cyl    disp    hp  drat    wt  qsec    vs    am  gear  carb
#>    <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  21   6      160    110  3.9   2.62  16.5     0     1     4     4
#>  2  21   6      160    110  3.9   2.88  17.0     0     1     4     4
#>  3  22.8 4      108     93  3.85  2.32  18.6     1     1     4     1
#>  4  21.4 6      258    110  3.08  3.22  19.4     1     0     3     1
#>  5  18.7 8      360    175  3.15  3.44  17.0     0     0     3     2
#>  6  18.1 6      225    105  2.76  3.46  20.2     1     0     3     1
#>  7  14.3 8      360    245  3.21  3.57  15.8     0     0     3     4
#>  8  24.4 4      147.    62  3.69  3.19  20       1     0     4     2
#>  9  22.8 4      141.    95  3.92  3.15  22.9     1     0     4     2
#> 10  19.2 6      168.   123  3.92  3.44  18.3     1     0     4     4
#> # ... with 22 more rows

# setup recipe with no preprocessing
cars_rec <-
  recipe(cyl ~ ., data = cars_train)

# specify c5 model; no need to set mode (can only be used for classification)
cars_spec <- 
  C5_rules() %>%
  set_engine("C5.0")

# create workflow
cars_wf <-
  workflow() %>%
  add_recipe(cars_rec) %>%
  add_model(cars_spec)

# fit workflow
cars_fit <- fit(cars_wf, data = cars_train)

# add predictions to df
cars_preds <- 
  predict(cars_fit, new_data = cars_train) %>%
  bind_cols(cars_train) %>%
  select(.pred_class, cyl)

cars_preds
#> # A tibble: 32 x 2
#>    .pred_class cyl  
#>    <fct>       <chr>
#>  1 6           6    
#>  2 6           6    
#>  3 4           4    
#>  4 6           6    
#>  5 8           8    
#>  6 6           6    
#>  7 8           8    
#>  8 4           4    
#>  9 4           4    
#> 10 6           6    
#> # ... with 22 more rows

# confusion matrix
cars_preds %>%
  conf_mat(truth = cyl, 
           estimate = .pred_class)
#> Warning in vec2table(truth = truth, estimate = estimate, dnn = dnn, ...): `truth`
#> was converted to a factor
#>           Truth
#> Prediction  4  6  8
#>          4 11  0  0
#>          6  0  7  0
#>          8  0  0 14

Created on 2021-09-30 by the reprex package (v2.0.1)

Mark Rieke
  • 306
  • 3
  • 13
0

I tried reprex by Mark Rieke and I got an error for the last command (conf_mat).

load tidymodels & rules

library(tidymodels)
library(rules)
#> 
#> Attaching package: 'rules'
#> The following object is masked from 'package:dials':
#> 
#>     max_rules

# example training dataset
cars_train <- as_tibble(mtcars)

# change the number of cylinders to character for predicting as a class
cars_train <- 
  cars_train %>%
  mutate(cyl = as.character(cyl))

# training df
cars_train
#> # A tibble: 32 × 11
#>      mpg cyl    disp    hp  drat    wt  qsec    vs    am  gear  carb
#>    <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  21   6      160    110  3.9   2.62  16.5     0     1     4     4
#>  2  21   6      160    110  3.9   2.88  17.0     0     1     4     4
#>  3  22.8 4      108     93  3.85  2.32  18.6     1     1     4     1
#>  4  21.4 6      258    110  3.08  3.22  19.4     1     0     3     1
#>  5  18.7 8      360    175  3.15  3.44  17.0     0     0     3     2
#>  6  18.1 6      225    105  2.76  3.46  20.2     1     0     3     1
#>  7  14.3 8      360    245  3.21  3.57  15.8     0     0     3     4
#>  8  24.4 4      147.    62  3.69  3.19  20       1     0     4     2
#>  9  22.8 4      141.    95  3.92  3.15  22.9     1     0     4     2
#> 10  19.2 6      168.   123  3.92  3.44  18.3     1     0     4     4
#> # … with 22 more rows

# setup recipe with no preprocessing
cars_rec <-
  recipe(cyl ~ ., data = cars_train)

# specify c5 model; no need to set mode (can only be used for classification)
cars_spec <- 
  C5_rules() %>%
  set_engine("C5.0")

# create workflow
cars_wf <-
  workflow() %>%
  add_recipe(cars_rec) %>%
  add_model(cars_spec)

# fit workflow
cars_fit <- fit(cars_wf, data = cars_train)

# add predictions to df
cars_preds <- 
  predict(cars_fit, new_data = cars_train) %>%
  bind_cols(cars_train) %>%
  select(.pred_class, cyl)

cars_preds
#> # A tibble: 32 × 2
#>    .pred_class cyl  
#>    <fct>       <chr>
#>  1 6           6    
#>  2 6           6    
#>  3 4           4    
#>  4 6           6    
#>  5 8           8    
#>  6 6           6    
#>  7 8           8    
#>  8 4           4    
#>  9 4           4    
#> 10 6           6    
#> # … with 22 more rows


# confusion matrix
cars_preds %>%
  conf_mat(truth = cyl, 
           estimate = .pred_class)
#> Error in `yardstick_table()`:
#> ! `truth` must be a factor.
#> ℹ This is an internal error in the yardstick package, please report it to the package authors.

#> Backtrace:
#>     ▆
#>  1. ├─cars_preds %>% conf_mat(truth = cyl, estimate = .pred_class)
#>  2. ├─yardstick::conf_mat(., truth = cyl, estimate = .pred_class)
#>  3. └─yardstick:::conf_mat.data.frame(., truth = cyl, estimate = .pred_class)
#>  4.   └─yardstick:::yardstick_table(truth = truth, estimate = estimate, case_weights = case_weights)
#>  5.     └─rlang::abort("`truth` must be a factor.", .internal = TRUE)
Marc Kees
  • 206
  • 2
  • 15