I currently have a TensorFlow model (ResNet50), which takes a single image input and outputs a continuous value via regression (ranges from 0.8 - 2.0). The dataset has 3000 different patients, each of which has an image and several numerical data points (specifically age, gender, weight). I'm able to get decent accuracy by training on the images, but I was wondering how I could also add the numerical data points as separate inputs. The numerical data is in a csv file where each row is a seperate patients and different columns contain different values. Here is what I have for 1 image:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.applications.resnet_rs import ResNetRS50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, GlobalMaxPooling2D
from tensorflow.keras.optimizers import Adam
import warnings
labels = pd.read_csv('[path goes here]')
labels.head()
def load_train(path):
"""
It loads the train part of dataset from path
"""
#1 / 255
labels = pd.read_csv('[Target Dataframe path]')
train_datagen = ImageDataGenerator(validation_split=0.2, rescale=None)
train_gen_flow = train_datagen.flow_from_dataframe(
dataframe=labels,
directory='[Images folder path]',
x_col='ID',
y_col='Value',
target_size=(224, 224),
batch_size=32,
class_mode='raw',
subset = 'training',
seed=1234)
return train_gen_flow
def load_test(path):
"""
It loads the validation/test part of dataset from path
"""
labels = pd.read_csv('[Target Dataframe path]')
validation_datagen = ImageDataGenerator(validation_split=0.2, rescale=None)
test_gen_flow = validation_datagen.flow_from_dataframe(
dataframe = labels,
directory='[Images folder path]',
x_col="ID",
y_col="Value",
class_mode="raw",
target_size=(224,224),
batch_size=32,
subset = "validation",
seed=1234,
)
return test_gen_flow
def create_model(input_shape):
"""
It defines the model
"""
backbone = ResNetRS50(input_shape=input_shape, weights='imagenet', include_top=False)
model = Sequential()
model.add(backbone)
model.add(Dropout(0.3))
model.add(GlobalMaxPooling2D())
model.add(Dense(1, activation='linear'))
optimizer = Adam(learning_rate=0.0003)
model.compile(optimizer=optimizer, loss='mae', metrics=['mae'])
print(model.summary())
return model
def train_model(model, train_data, test_data, batch_size=32, epochs=100,
steps_per_epoch=None, validation_steps=None):
"""
Trains the model given the parameters
"""
history = model.fit(train_data, validation_data=test_data, batch_size=batch_size,
epochs=epochs, steps_per_epoch=steps_per_epoch,
validation_steps=validation_steps, verbose=2)
# Get training and test loss histories
training_loss = history.history['loss']
test_loss = history.history['val_loss']
# Create count of the number of epochs
epoch_count = range(1, len(training_loss) + 1)
# Visualize loss history
plt.plot(epoch_count, training_loss, 'r--')
plt.plot(epoch_count, test_loss, 'b-')
plt.legend(['Training Loss', 'Test Loss'])
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show();
return model
path = '[Full set path (contains image folder, target csv, and numerical feature csv]'
train_data = load_train(path)
test_data = load_test(path)
#build a model
model = create_model(input_shape = (224, 224, 3))
model = train_model(model, train_data, test_data)