I'm having issues with a Keras model, with the predictions and confusion matrix.
I want to change this Keras tutorial for multiple classes.
https://www.tensorflow.org/tutorials/structured_data/feature_columns
I read in data and encode the 6 string
target classes to int
dataframe = pd.read_csv("my_csv.csv")
target = 'some_target'
labelencoder = LabelEncoder()
dataframe[target] = labelencoder.fit_transform(dataframe[target])
Then I split data, create columns, create model and fit
train, test = train_test_split(dataframe, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, target, shuffle=True, batch_size=32):
dataframe = dataframe.copy()
labels = dataframe.pop(target)
ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(dataframe))
ds = ds.batch(batch_size)
return ds, labels
feature_columns = []
f = feature_column.categorical_column_with_vocabulary_list(
field, unique_categories)
feature_columns.append(feature_column.embedding_column(f, dimension=8))
f2 = feature_column.categorical_column_with_vocabulary_list(
field, unique_categories)
indicator_column = feature_column.indicator_column(f2)
feature_columns.append(indicator_column)
feature_columns.append(feature_column.numeric_column(field))
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
batch_size = 32
train_ds, train_labels = df_to_dataset(train, target, batch_size=batch_size)
val_ds, val_labels = df_to_dataset(val, target, shuffle=False, batch_size=batch_size)
test_ds, test_labels = df_to_dataset(test, target, shuffle=False, batch_size=batch_size)
model = tf.keras.Sequential([
feature_layer,
layers.Dense(128, activation='relu'),
layers.Dense(128, activation='relu'),
layers.Dropout(.1),
layers.Dense(1, activation='softmax')
])
# get hps
optimizer = 'adam'
loss_function = tf.keras.losses.BinaryCrossentropy(from_logits=True)
metrics = ['accuracy']
epochs = 1
model.compile(optimizer=optimizer,
loss=loss_function,
metrics=metrics)
model.fit(train_ds,
validation_data=val_ds,
epochs=epochs)
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)
predicted = model.predict(test_ds)
cf = confusion_matrix(test_labels, predicted)
When I run model.predict the output is strange
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
And the confusion matrix is also not correct
[ 0 33 0 0 0 0]
[ 0 499 0 0 0 0]
[ 0 14 0 0 0 0]
[ 0 1089 0 0 0 0]
[ 0 360 0 0 0 0]
[0 4 0 0 0 0]
I have tried different encoding for target, changing loss but to no avail
# mlb = MultiLabelBinarizer()
# dataframe[target] = mlb.fit_transform(dataframe[target])
loss='categorical_crossentropy'
What am I doing wrong here ?
Also tried with 6 output neurons
model = tf.keras.Sequential([
feature_layer,
layers.Dense(128, activation='relu'),
layers.Dense(128, activation='relu'),
layers.Dropout(.1),
layers.Dense(6, activation='softmax')
])
but got error
ValueError: logits and labels must have the same shape ((None, 6) vs (None, 1))
EDIT:
print(type(train_ds))
# <class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>
print(train_ds)
# <BatchDataset shapes: ({feature1: (None,), feature2: (None,), feature3: (None,), feature4: (None,) ...
print(type(train_labels))
# <class 'pandas.core.series.Series'>
EDIT: Making a bit of progress. Turns out the loss function and target dim are dependent: Tensorflow : logits and labels must have the same first dimension
if you have 1D integer encoded target, you can use sparse_categorical_crossentropy as loss function
So changed loss to:
sparse_categorical_crossentropy
Now when I run model.predict the output looks better
[0.02313532 0.39231667 0.0117254 0.42083895 0.15037686 0.00160678]
[2.3085043e-02 3.3588389e-01 8.1730038e-03 4.8321337e-01 1.4923279e-01
4.1199493e-04]
[8.1658429e-03 3.3901721e-01 2.3666199e-03 5.3861737e-01 1.1167890e-01
1.5400720e-04]
[8.6198252e-04 1.2048376e-01 1.3487167e-02 4.1729528e-01 4.4759643e-01
2.7547608e-04]
[0.06842247 0.31534496 0.02852604 0.40057638 0.17933881 0.0077913 ]
[0.05149424 0.34782204 0.02664029 0.34621894 0.22060096 0.00722347]
Then get the index of highest prediction and pass into confusion matrix
predictions_index = np.argmax(predicted, axis=1)
cf = confusion_matrix(test_labels, predictions_index)
And confusion matrix looks better
[ 0 3 0 27 2 0]
[ 0 37 0 386 54 0]
[ 0 0 0 14 1 0]
[ 0 13 0 968 124 0]
[ 0 4 0 309 49 0]
[0 0 0 6 2 0]