`I want to evaluate my recognition system using trained svm classifier.It loads a trained FaceNet model and the associated weights and uses it to generate face embeddings. The embeddings are then used to train an SVM classifier to recognize known faces. The trained SVM classifier is then used to perform face recognition on test images. I not sure whether the arguments passed to predictions in svm classifier is correct as the accuracy i got on test data is very low.'
import os import cv2 import pickle from tensorflow.keras.models import load_model from sklearn import svm from sklearn.svm import SVC from sklearn.preprocessing import LabelEncoder from sklearn.metrics import classification_report, accuracy_score import pandas as pd import seaborn as sns from sklearn.metrics import confusion_matrix import matplotlib.pyplot as plt from sklearn.metrics import average_precision_score, roc_curve, auc from sklearn.preprocessing import LabelBinarizer from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc import numpy as np import pandas as pd import seaborn as sns import pickle import imutils import numpy as np import random # import required files and model and weights MODEL_PATH = ".\\models\\FaceNetModel.h5" WEIGHTS_PATH = ".\\models\\facenet_keras_weights.h5" ENCONDINGS_PATH = ".\\models\\face_encoding.pickle" PROTOTXT_PATH = ".\\models\\deploy.prototxt.txt" CAFFE_MODEL = ".\\models\\res10_300x300_ssd_iter_140000.caffemodel" net = cv2.dnn.readNetFromCaffe(PROTOTXT_PATH, CAFFE_MODEL) encoder = load_model(MODEL_PATH) encoder.load_weights(WEIGHTS_PATH) # folder test_folder = ".\\Evaluation\\test_Georgia_1" # load the known faces and embeddings from encoding file train svm classifier encoding_file = ".\\Evaluation\\face_encoding_evaluate_Georgia_1.pickle" data = pickle.loads(open(encoding_file, "rb").read()) # convert list object to NumPy array encodings = np.array(data["encodings"]) # reshape the encodings encodings = encodings.reshape(len(encodings), -1) # encode the labels le = LabelEncoder() labels = le.fit_transform(data["names"]) # train svm classifier to fit the encodings and labels print("[INFO] training model...") print(encodings.shape) print(labels.shape) print(labels) svm = SVC(C=1.0, kernel="linear", probability=True) svm.fit(encodings, labels) print ("Accuracy on training set:") print (svm.score(encodings, labels)) # test svm classifier on trained dataset print("[INFO] testing model...") predictions = svm.predict(encodings) print("Accuracy: {:.2f}%".format(accuracy_score(labels, predictions) * 100)) # save the svm model svm_file = ".\\Evaluation\\svm_evaluate_Georgia.pickle" with open(svm_file, "wb") as f: f.write(pickle.dumps(svm)) def load_dataset(test_folder): encodings_dict = {} for root, dirs, files in os.walk(test_folder): # Shuffle the files to prevent bias random.shuffle(files) for file in files: if file.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".gif")): image_path = os.path.join(root, file) image = cv2.imread(image_path) image = imutils.resize(image, width=800) (h, w) = image.shape[:2] blob = cv2.dnn.blobFromImage(cv2.resize( image, (300, 300)), 1.0, (300, 300), (104, 177, 123)) net.setInput(blob) detections = net.forward() for i in range(0, detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > 0.9: box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") face = image[startY:endY, startX:endX] if face.size != 0: pixels = np.asarray(face) cv2.normalize(face, None, 0, 1.0, cv2.NORM_MINMAX, dtype=cv2.CV_32F) face = cv2.resize(face, (160, 160)) pixels = np.expand_dims(face, axis=0) encoding = encoder.predict(pixels) encoding = encoding.flatten() name = os.path.basename(file) # Add the encoding and name to the encodings_dict if root not in encodings_dict: encodings_dict[root] = [] encodings_dict[root].append((encoding, name)) encodings = [] names = [] for root, encodings_names in encodings_dict.items(): # Shuffle the encodings and names in each folder to prevent bias np.random.shuffle(encodings_names) # Take all encodings and names per folder for encoding_name in encodings_names: encoding, name = encoding_name encodings.append(encoding) names.append(name) encodings = np.array(encodings) names = np.array(names) # encode the names to numeric labels le = LabelEncoder() names = le.fit_transform(names) # save the encodings and names to a pickle file print(encodings.shape) print(names.shape) return encodings, names # Load the testing dataset test_encodings, test_names = load_dataset(test_folder) print(encodings.shape) print(test_encodings.shape) print(test_names.shape) # Load trained svm to perform predictions on test set svm_file = ".\\Evaluation\\svm_evaluate_Georgia.pickle" svm = pickle.loads(open(svm_file, "rb").read()) # Make predictions on the test set predictions_y = svm.predict(test_encodings) print("Accuracy: {:.2f}%".format(accuracy_score(test_names, predictions_y) * 100)) # encode the string labels in test_names to numeric labels le = LabelEncoder() test_names_encoded = le.fit_transform(test_names) print(test_names_encoded.shape) print(test_names_encoded) # classification report print("\nClassification Report") predictions = svm.predict(test_encodings) print(classification_report(test_names_encoded, predictions)) # confusion matrix with seaborn cm = confusion_matrix(test_names_encoded, predictions) print(cm) df_cm = pd.DataFrame(cm, index=[i for i in "0123456789"], columns=[i for i in "0123456789"]) plt.figure(figsize=(10, 7)) sns.heatmap(df_cm, annot=True) plt.show()