2

I've got a working CNN model that classifies images from a custom dataset that is loaded with a csv file. The dataset is split up into training, validation and test dataset after being shuffled. Now I want to expand the image input by four extra input classes containing info / metadata about the images.

I've already learnt that I should split up my cnn model into two branches, one for the images and one for the extra input. My question is, how must I modify my data input so that the model can correctly process both images and additional input?

I'm very new to creating neural networks in tensorflow. My entire code is basically from this website. However, none of the topics could solve the problem for my code.

This is my code: (additional metadata are called usages, completions, heights, constructions)

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from keras.callbacks import History
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.metrics import confusion_matrix
import seaborn as sns
import io

# READ IMAGES, METADATA AND LABELS
df = pd.read_csv('dataset.csv')
df = df.sample(frac=1)
file_paths = df['file_name'].values
labels = df['label'].values
usages = df['usage'].values
completions = df['completion'].values
heights = df['height'].values
constructions = df['construction'].values

# SPLITTING THE DATASET INTO 80 % TRAINING DATA, 10 % VALIDATION DATA, 10 % TEST DATA
dataset_size = len(df.index)
train_size = int(0.8 * dataset_size)
val_size = int(0.1 * dataset_size)
test_size = int(0.1 * dataset_size)

img_height = 350
img_width = 350
batch_size = 16
autotune = tf.data.experimental.AUTOTUNE

# FUNCTION TO READ AND NORMALIZE THE IMAGES
def read_image(image_file, label, usg, com, hei, con):
    image = tf.io.read_file(image_file)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (img_width, img_height))
    return tf.cast(image, tf.float32) / 255.0, label, \
           tf.cast(usg, tf.float32), tf.cast(com, tf.float32), \
           tf.cast(hei, tf.float32), tf.cast(con, tf.float32)

# FUNCTION FOR DATA AUGMENTATION
def augment(image, labeL, usg, com, hei, con):
    if tf.random.uniform((), minval=0, maxval=1) < 0.1:
        image = tf.tile(tf.image.rgb_to_grayscale(image), [1, 1, 3])
    image = tf.image.random_brightness(image, max_delta=0.25)
    image = tf.image.random_contrast(image, lower=0.75, upper=1.25)
    image = tf.image.random_saturation(image, lower=0.75, upper=1.25)
    image = tf.image.random_flip_left_right(image)
    return image, label, usg, com, hei, con

# SETUP FOR TRAINING, VALIDATION & TEST DATASET
ds_train = ds_train.map(read_image, num_parallel_calls=autotune)
ds_train = ds_train.cache()
ds_train = ds_train.map(augment, num_parallel_calls=autotune)
ds_train = ds_train.batch(batch_size)
ds_train = ds_train.prefetch(autotune)

ds_val = ds_val.map(read_image, num_parallel_calls=autotune)
ds_val = ds_val.batch(batch_size)
ds_val = ds_val.prefetch(autotune)

ds_test = ds_test.map(read_image, num_parallel_calls=autotune)
ds_test = ds_test.batch(batch_size)
ds_test = ds_test.prefetch(autotune)


## HOW TO SPLIT UP THE DATASET FOR THE MODEL FROM HERE? ##


# DEFINING FUNCTIONAL MODEL
input_img = keras.Input(shape=(img_width, img_height, 3))
input_dat = keras.Input(shape=(4,))                              # how is this shape supposed to be?

x = layers.Conv2D(16, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(input_img)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.02), padding='same')(x)
x = layers.BatchNormalization(momentum=0.9)(x)
x = layers.MaxPooling2D()(x)
out1 = layers.Flatten()(x)

out2 = layers.Dense(128, activation='relu')(input_dat)

merge = layers.concatenate([out1, out2])
x = layers.Dense(256, activation='relu')(merge)
x = layers.Dropout(0.35)(x)
output = layers.Dense(8, activation='sigmoid')(x)
model = keras.Model(inputs=[input_img, input_dat], outputs=output)

history = History() 

no_overfit = keras.callbacks.EarlyStopping(monitor='val_loss',              # stop training when overfitting occurs
                                           min_delta=0.015, patience=1,
                                           verbose=2, mode='auto')


# TRAINING STEP
model.compile(
    optimizer=keras.optimizers.Adam(3e-5),
    loss=[keras.losses.SparseCategoricalCrossentropy()],
    metrics=["accuracy"])


model.fit(ds_train, epochs=30, callbacks=[no_overfit, history],
          verbose=1, validation_data=ds_val)

So far I've only added the extra inputs to the dataset tensor and changed the model structure. How exactly do I split my dataset into input_img and input_dat so that each model branch will receive their proper input?

Also I have a custom test step in order to plot a confusion matrix. How is this supposed to be modified? Here the working code, for just the image input:

y_true = []
y_pred = []

for x, y in ds_test:
    y_true.append(y)
    predicts = model.predict(x)                                             # compute model predictions for test step
    y_pred.append(np.argmax(predicts, axis=-1))                                              
true = tf.concat([item for item in y_true], axis=0)
pred = tf.concat([item for item in y_pred], axis=0)

cm = confusion_matrix(true, pred)                                           # confusion matrix from seaborn
testacc = np.trace(cm) / float(np.sum(cm))                                  # calculating test accuracy
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
fig, ax = plt.subplots(figsize=(10, 10))
color = sns.light_palette("seagreen", as_cmap=False)
sns.heatmap(cm, annot=True, square=True, cmap=color, fmt=".3f",
            linewidths=0.6, linecolor='k', cbar_kws={"shrink": 0.8})
plt.yticks(rotation=0)
plt.xlabel('\nPredicted Labels', fontsize=18)
plt.ylabel('True Labels\n', fontsize=18)
plt.title('Multiclass Model - Confusion Matrix (Test Step)\n', fontsize=24)
plt.text(10, 1.1, 'Accuracy = {:0.4f}'.format(testacc), fontsize=20)
ax.axhline(y=8, color='k', linewidth=1.5)                                   # depending on amount of classes
ax.axvline(x=8, color='k', linewidth=1.5)
plt.show()
print('\naccuracy: {:0.4f}'.format(testacc))

Any help is greatly appreciated!!

Ruzezol
  • 21
  • 1

0 Answers0