I get this error only when adding error matrix to the formula: Call: C5.0.default(x = heart_train, y = heart_train_results, trials = 1, costs = error_cost)
C5.0 [Release 2.07 GPL Edition] Sat Mar 4 17:43:23 2023
Class specified by attribute `outcome'
Read 820 cases (14 attributes) from undefined.data
*** line 1 of undefined.costs': bad class
no'
Error limit exceeded
data is from https://www.kaggle.com/datasets/johnsmith88/heart-disease-dataset/code?datasetId=216167&language=R
it works fine with
heart_classifier = C5.0(heart_train, heart_train_results, trials=1, costs=Null)
The code is
library(libcoin)
library(Cubist)
# Import packages
library(gmodels)
library(C50)
#install.packages("ggplot2")
library(ggplot2)
library(corrplot)
#install.packages("tidyverse")
library(tidyverse)
library(Amelia)
library(corrgram)
#install.packages('fastDummies')
library(fastDummies)
#Stage 1 exploration-----------------------------------
setwd("C:/R_homework")
getwd()
heart<-read.csv("heart.csv")
str(heart) #1025 obs. of 14 variables
#Stage 2 convert target to factor-----------------------------------
heart$target<-as.factor(heart$target)
#Create a train and test set
set.seed(2410)
length<-length(heart$target) #1025
1025*0.8 #820
train_obs = sample(1:length, size=length*0.8, replace=FALSE)
heart_train<-heart[train_obs, !(names(heart) %in% "target")]
heart_test<-heart[-train_obs, !(names(heart) %in% "target")]
length(heart_train$age) #820
length(heart_test$age) # 205
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# stage5
#-----------------------------------results/labels
heart_train_results<-heart[train_obs, "target"]
heart_test_results<-heart[-train_obs, "target"]
length(heart_train_results) #820 rows
length(heart_test_results) #205 rows
matrix_dimensions = list(c("no", "yes"), c("no", "yes"))
names(matrix_dimensions) = c("predicted", "actual")
matrix_dimensions
# matrix cost
# provide a penalty more to the chance to loose customer
error_cost = matrix(c(0, 1, 4, 0), nrow = 2, ncol=2, byrow=FALSE, dimnames = matrix_dimensions)
error_cost
#b
heart_classifier3 = C5.0(heart_train, heart_train_results, trials=1, costs=error_cost)
summary(heart_classifier3)