Test data accuracy low with the use of trained svm classifier

Question

`I want to evaluate my recognition system using trained svm classifier.It loads a trained FaceNet model and the associated weights and uses it to generate face embeddings. The embeddings are then used to train an SVM classifier to recognize known faces. The trained SVM classifier is then used to perform face recognition on test images. I not sure whether the arguments passed to predictions in svm classifier is correct as the accuracy i got on test data is very low.'

import os
import cv2
import pickle
from tensorflow.keras.models import load_model
from sklearn import svm
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from sklearn.metrics import average_precision_score, roc_curve, auc
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc
import numpy as np
import pandas as pd
import seaborn as sns
import pickle
import imutils
import numpy as np
import random


# import required files and model and weights
MODEL_PATH = ".\\models\\FaceNetModel.h5"
WEIGHTS_PATH = ".\\models\\facenet_keras_weights.h5"
ENCONDINGS_PATH = ".\\models\\face_encoding.pickle"
PROTOTXT_PATH = ".\\models\\deploy.prototxt.txt"
CAFFE_MODEL = ".\\models\\res10_300x300_ssd_iter_140000.caffemodel"
net = cv2.dnn.readNetFromCaffe(PROTOTXT_PATH, CAFFE_MODEL)
encoder = load_model(MODEL_PATH)
encoder.load_weights(WEIGHTS_PATH)

# folder
test_folder = ".\\Evaluation\\test_Georgia_1"

# load the known faces and embeddings from encoding file train svm classifier
encoding_file = ".\\Evaluation\\face_encoding_evaluate_Georgia_1.pickle"
data = pickle.loads(open(encoding_file, "rb").read())
# convert list object to NumPy array
encodings = np.array(data["encodings"])
# reshape the encodings
encodings = encodings.reshape(len(encodings), -1)
# encode the labels
le = LabelEncoder()
labels = le.fit_transform(data["names"])
# train svm classifier to fit the encodings and labels
print("[INFO] training model...")
print(encodings.shape)
print(labels.shape)
print(labels)
svm = SVC(C=1.0, kernel="linear", probability=True)
svm.fit(encodings, labels)

print ("Accuracy on training set:")
print (svm.score(encodings, labels))

# test svm classifier on trained dataset
print("[INFO] testing model...")
predictions = svm.predict(encodings)
print("Accuracy: {:.2f}%".format(accuracy_score(labels, predictions) * 100))

# save the svm model
svm_file = ".\\Evaluation\\svm_evaluate_Georgia.pickle"
with open(svm_file, "wb") as f:
    f.write(pickle.dumps(svm))

def load_dataset(test_folder):
    encodings_dict = {}

    for root, dirs, files in os.walk(test_folder):
        # Shuffle the files to prevent bias
        random.shuffle(files)

        for file in files:
            if file.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".gif")):
                image_path = os.path.join(root, file)
                image = cv2.imread(image_path)
                image = imutils.resize(image, width=800)
                (h, w) = image.shape[:2]
                blob = cv2.dnn.blobFromImage(cv2.resize(
                    image, (300, 300)), 1.0, (300, 300), (104, 177, 123))
                net.setInput(blob)
                detections = net.forward()
                for i in range(0, detections.shape[2]):
                    confidence = detections[0, 0, i, 2]
                    if confidence > 0.9:
                        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
                        (startX, startY, endX, endY) = box.astype("int")
                        face = image[startY:endY, startX:endX]
                        if face.size != 0:
                            pixels = np.asarray(face)
                            cv2.normalize(face, None, 0, 1.0,
                                          cv2.NORM_MINMAX, dtype=cv2.CV_32F)
                            face = cv2.resize(face, (160, 160))
                            pixels = np.expand_dims(face, axis=0)
                            encoding = encoder.predict(pixels)
                            encoding = encoding.flatten()
                            name = os.path.basename(file)
                            # Add the encoding and name to the encodings_dict
                            if root not in encodings_dict:
                                encodings_dict[root] = []
                            encodings_dict[root].append((encoding, name))

    encodings = []
    names = []
    for root, encodings_names in encodings_dict.items():
        # Shuffle the encodings and names in each folder to prevent bias
        np.random.shuffle(encodings_names)
        # Take all encodings and names per folder
        for encoding_name in encodings_names:
            encoding, name = encoding_name
            encodings.append(encoding)
            names.append(name)

    encodings = np.array(encodings)
    names = np.array(names)
    # encode the names to numeric labels
    le = LabelEncoder()
    names = le.fit_transform(names)
    # save the encodings and names to a pickle file

    print(encodings.shape)
    print(names.shape)
    return encodings, names

# Load the testing dataset
test_encodings, test_names = load_dataset(test_folder)

print(encodings.shape)
print(test_encodings.shape)
print(test_names.shape)

# Load trained svm to perform predictions on test set
svm_file = ".\\Evaluation\\svm_evaluate_Georgia.pickle"
svm = pickle.loads(open(svm_file, "rb").read())

# Make predictions on the test set
predictions_y = svm.predict(test_encodings)
print("Accuracy: {:.2f}%".format(accuracy_score(test_names, predictions_y) * 100))

# encode the string labels in test_names to numeric labels
le = LabelEncoder()
test_names_encoded = le.fit_transform(test_names)
print(test_names_encoded.shape)
print(test_names_encoded)

# classification report
print("\nClassification Report")
predictions = svm.predict(test_encodings)
print(classification_report(test_names_encoded, predictions))

# confusion matrix with seaborn
cm = confusion_matrix(test_names_encoded, predictions)
print(cm)
df_cm = pd.DataFrame(cm, index=[i for i in "0123456789"],
                        columns=[i for i in "0123456789"])
plt.figure(figsize=(10, 7))
sns.heatmap(df_cm, annot=True)
plt.show()

Test data accuracy low with the use of trained svm classifier

0 Answers0