I'm trying to get metrics for a classifier I built using scikit-learn's OneVsRestClassifier to solve a multilabel classification problem. However, I'm having trouble getting the metrics library to work, as the binary indicators that I am trying to compare for my true labels and my predicted labels are different size. Here's the code, most of which is taken from use scikit-learn to classify into multiple categories
import numpy as np
import collections
import csv
import os
import sys
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
import sklearn.metrics as metrics
np.set_printoptions(threshold=sys.maxsize)
csv_read_args = ({'mode': 'rb'} if sys.version_info[0] < 3 else
{'mode': 'rt', 'newline': '', 'encoding': 'latin1'})
with open(os.path.abspath('somefilepath'), **csv_read_args) as myfile:
reader = csv.reader(myfile)
next(reader)
a, b = [], []
# feed generator expression into a zero-length deque to consume it
generator = ((a.append(row[2]), b.append(row[1].split(";"))) for row in reader)
collections.deque(generator, maxlen=0)
X_train = np.array(a)
y_train_text = b
with open(os.path.abspath('some filepath'), **csv_read_args) as myfile:
reader = csv.reader(myfile)
next(reader)
c, d = [], []
generator = ((c.append(row[2]), d.append(row[1].split(";"))) for row in reader)
collections.deque(generator, maxlen=0)
X_test = np.array(c)
mlb = MultiLabelBinarizer()
Y = mlb.fit_transform(y_train_text)
classifier = Pipeline([
('vectorizer', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', OneVsRestClassifier(LinearSVC()))])
classifier.fit(X_train, Y)
predicted = classifier.predict(X_test)
all_labels = mlb.inverse_transform(predicted)
mlb = MultiLabelBinarizer()
true = mlb.fit_transform(d)
print(true.shape)
print(predicted.shape)
print(metrics.f1_score(true, predicted, average="micro"))
On this last line, I get an error message: ValueError: Multi-label binary indicator input with different numbers of labels
Why are my true and predicted indicators coming out with a different number of labels? Is it because my train dataset may have labels that do not exist in the test dataset, or vice-versa? If so, how should I account for this?