I am trying to use an SVM model on a dataset. It has 125973 records in the train and 22544 records in the test. It keeps computing forever and never completes. Can anyone help me. Below is my python code so far..
import pandas as pd
import sklearn.svm as s
import sklearn.metrics as m
import sklearn.preprocessing as pp
traindata = pd.read_csv("E:\\a-train.csv")
testdata = pd.read_csv("E:\\a-test.csv")
for col in traindata.columns:
if traindata[col].dtype == type(object):
le = pp.LabelEncoder()
traindata[col] = le.fit_transform(traindata[col])
for col in testdata.columns:
if testdata[col].dtype == type(object):
le = pp.LabelEncoder()
testdata[col] = le.fit_transform(testdata[col])
countS = 0
featsTrain = traindata.values[:,0:13]
featsTest = testdata.values[:,0:13]
lblsTrain = traindata.values[:,13]
lblsTest = testdata.values[:,13]
modelS = s.SVC(cache_size = 7000)
modelS.fit(featsTrain, lblsTrain)
lblsPredS = modelS.predict(featsTest)
for a,b in zip(lblsTest, lblsPredS):
if a == b:
countS += 1
accS = (round(countS/(len(featsTest)), 3)) * 100
print( m.confusion_matrix(lblsTest, lblsPredS) )
print(m.classification_report(lblsTest, lblsPredS))
print("\nAccuracy = ", accS, "%")