0

I am trying to plot my SVM classification with usinf e1071 library.However, the classification plot shows only single value for my parameters.Even though I change the selected parameters to create 2d classification plot it is wrong.

require(caTools)
library(caret)


dataset <-read.csv("income_evaluation.csv")


# fnlwgt row remowed since it is not necessary 
df_income <- subset(dataset,select=-c(fnlwgt))

# turn binary attribute into 0 and 1 
df_income$income <-ifelse(df_income$income==" >50K",1,0)

df_income$native.country
apply(X=df_income,2,FUN=function(x) length(which(x==' ?')))

# handling missing values 


#define function to calculate mode
find_mode <- function(x) {
  u <- unique(x)
  tab <- tabulate(match(x, u))
  u[tab == max(tab)]
}


mod_workclass_df  = find_mode(df_income$workclass)
mod_occupation_df  = find_mode(df_income$occupation)
mod_country_df  = find_mode(df_income$native.country)

# replacing the missing values with the mod values
df_income$workclass[df_income$workclass == ' ?'] <- mod_workclass_df
df_income$occupation[df_income$occupation == ' ?'] <- mod_occupation_df
df_income$native.country[df_income$native.country == ' ?'] <- mod_country_df


# one hot encoding for train set 

dmy <- dummyVars(" ~ .", data = df_income, fullRank = T)
df_income <-  data.frame(predict(dmy, newdata = df_income))



# sampling 
set.seed(101) 
sample = sample.split(df_income$income, SplitRatio = .75)
trainingSet = subset(df_income, sample == TRUE)
testSet  = subset(df_income, sample == TRUE)

# isolaate y cariable 
Y_train <- trainingSet$income
Y_test <- testSet$income

#isolate x cariable 

X_test <- subset(testSet,select=-c(income))




# evalution of svm 
library(e1071)
svm_classifier = svm(formula=income ~ .,data=trainingSet,type="C-classification",kernel="radial",scale=TRUE,cost=10)
Y_pred = predict(svm_classifier,newdata= X_test)
confusionMatrix(table(Y_test,Y_pred))



# cross validation 

# in creating the folds we specify the target feature (dependent variable) and # of folds
folds = createFolds(trainingSet$income, k = 10)
# in cv we are going to applying a created function to our 'folds'
cv = lapply(folds, function(x) { # start of function
  # in the next two lines we will separate the Training set into it's 10 pieces
  training_fold = trainingSet[-x, ] # training fold =  training set minus (-) it's sub test fold
  test_fold = trainingSet[x, ] # here we describe the test fold individually
  # now apply (train) the classifer on the training_fold
  classifier = svm_classifier
  Y_pred = predict(svm_classifier,newdata= test_fold[-97])
  cm = table(test_fold[, 97], Y_pred)
  accuracy = (cm[1,1] + cm[2,2]) / (cm[1,1] + cm[2,2] + cm[1,2] + cm[2,1])
  return(accuracy)
  
  })
accuracy = mean(as.numeric(cv))
accuracy


trainingSet$income <-as.factor(trainingSet$income)

# Visualising the Training set results

plot(svm_classifier,trainingSet,education.num~age)
  
  library(ggplot2)
svm_classifier
table(predicted=svm_classifier$fitted,actual=trainingSet$income)

Here is my code above and the plot below.I could not find the problem why there is only one color background and why there is any red color in the background. plot

0 Answers0