I'm trying to train an SVM in opencv with a list of numpy images, but I keep getting the error TypeError: samples is not a numpy array, neither a scalar
for the last line of code. The error is most likely from the way I setup my data in the samplePrep
function. Here is my code.
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
import numpy as np
import cv2
Preparing the Data
class dataPrep:
def import_from_XML(self, path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def samplePrep(self, xml_df, path, yscaling = 36, xscaling = 81):
sample_list = []
label_list = []
with open(xml_df) as xml_df:
i = 0
for xml in xml_df:
if i == 0:
i =+ 1
continue
else:
xml = list(xml.split(','))
filename = xml[0]
width = int(xml[1])
height = int(xml[2])
if xml[3] == "Red_TL":
label = 1
elif xml[3] == "Orange_TL":
label = 2
elif xml[3] == "Green_TL":
label = 3
label_list.append(np.array(label))
xmin = int(xml[4])
ymin = int(xml[5])
xmax = int(xml[6])
ymax = int(xml[7])
image = cv2.imread(path + "/" +filename)
image = image[ymin:ymax, xmin:xmax]
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_image = cv2.resize(gray_image, None,fx=xscaling/(xmax-xmin),fy=yscaling/(ymax-ymin), interpolation = cv2.INTER_NEAREST)
np_image = np.array(gray_image)
flat_image = np_image.ravel()
sample_list.append(flat_image)
i =+ 1
return sample_list, label_list
SVM Wrapper
class Learn:
def __init__(self):
self.est = cv2.ml.SVM_create()
def train(self, X, y, params = dict(kernel_type=cv2.ml.SVM_LINEAR, svm_type=cv2.ml.SVM_C_SVC) ):
self.est.trainAuto(X, responses = y,layout = cv2.ml.ROW_SAMPLE, kFold = 3) #kfold=3 (default: 10)
print("Model has been trained")
def load(self, file):
with file as f:
self.est.load(f)
print("SVM File Loaded")
def save(self, file):
with file as f:
self.est.save(f)
print("SVM File Saved")
def predict(self, X):
return self.est.predict_all(X).ravel()
Training SVM
path = "/"
xml_df = dataPrep().import_from_XML(path)
xml_df.to_csv('xml_test.csv',index=False)
sample_list, label_list = dataPrep().samplePrep(xml_df = 'xml_test.csv', path = path)
print(np.shape(sample_list))
Learn().train(sample_list, label_list)
Errors
TypeError: samples is not a numpy array, neither a scalar
self.est.train(samples, responses, params = params)
TypeError: an integer is required (got type list)