I am trying to get mean absolute error (MAE) for each split of data using 5-fold (KFold) cross validation. I have built a custom model using Xception that takes a X-ray hand image as an input and outputs estimated age in months. When I run the for loop for kf.split(X_train)
in the code below (Under cv_mae
part), I get an output for the first CV run. However, after the first CV run, I get the following error:
640/640 [==============================] - 86s 114ms/step - loss: 0.3346 - mae_months: 17.8703
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
Input In [15], in <cell line: 3>()
3 for train_index, val_index in kf.split(X_train):
4 model.fit(X_train[train_index], y_train[train_index], batch_size = 10)
----> 5 pred = model.predict(X_train[val_index], batch_size = 2)
6 err = mean_absolute_error(y_train[val_index], pred)
7 cv_mae.append(err)
File ~\anaconda3\lib\site-packages\keras\wrappers\scikit_learn.py:364, in KerasRegressor.predict(self, x, **kwargs)
350 """Returns predictions for the given test data.
351
352 Args:
(...)
361 Predictions.
362 """
363 kwargs = self.filter_sk_params(Sequential.predict, kwargs)
--> 364 return np.squeeze(self.model.predict(x, **kwargs))
File ~\anaconda3\lib\site-packages\keras\utils\traceback_utils.py:67, in filter_traceback.<locals>.error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File ~\anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py:102, in convert_to_eager_tensor(value, ctx, dtype)
100 dtype = dtypes.as_dtype(dtype).as_datatype_enum
101 ctx.ensure_initialized()
--> 102 return ops.EagerTensor(value, ctx.device_name, dtype)
InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.
InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.
It seems the error appears every time when it comes across model.predict()
because the error message states:
----> 5 pred = model.predict(X_train[val_index], batch_size = 2)
Code:
# Checking the GPU availability
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)
#---------------------------------------------------------------------------------
# Root path for the image files:
root = 'P:/BoneDataset/0-Dataset/ba-trainset/'
age_df = pd.read_csv(os.path.join(root, 'ba-training-dataset.csv'))
# Converting 'male' column to have male and female instead of true and false:
age_df['gender'] = age_df['male'].map(lambda x: 'male' if x else 'female')
# Checking for the path existance
age_df['path'] = age_df['id'].map(lambda x: os.path.join(root, 'ba-trainset', '{}.png'.format(x)))
age_df['exists'] = age_df['path'].map(os.path.exists)
print(age_df['exists'].sum(), 'images found of total of', age_df.shape[0], 'images.')
#---------------------------------------------------------------------------------
# Oldest children age in the dataset:
print('Maximum age: ' + str(age_df['boneage'].max()) + ' months')
# Youngest children age in the dataset:
print('Minimum age: ' + str(age_df['boneage'].min()) + ' months')
# Mean of children age in the dataset:
boneage_mean = age_df['boneage'].mean()
print('Mean BA: ' + str(boneage_mean))
# Median of children age in the dataset:
print('Median BA: ' + str(age_df['boneage'].median()))
# Standard deviation of children age in the dataset:
boneage_div = age_df['boneage'].std()
# Normalizing features (models perform better) to have Zero Mean, and
# Unified Standard Deviation using Z-score for training:
age_df['boneage_zscore'] = age_df['boneage'].map(lambda x: (x-boneage_mean)/boneage_div)
#---------------------------------------------------------------------------------
# Trimming data size to 10000 from 12000
age_df['boneage_category'] = pd.cut(age_df['boneage'], 10)
new_age_df = age_df.groupby(['boneage_category', 'male']).apply(lambda x: x.sample(500, replace = True)).reset_index(drop = True)
print('New Data Size:', new_age_df.shape[0], 'Old Size:', age_df.shape[0])
#---------------------------------------------------------------------------------
train_df, valid_df = train_test_split(new_age_df, test_size = 0.20, stratify = new_age_df['boneage_category'])
#---------------------------------------------------------------------------------
## Image preprocessing:
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import preprocess_input
from keras.applications.imagenet_utils import preprocess_input
IMG_SIZE = (224, 224)
core_idg = ImageDataGenerator(samplewise_center = True,
samplewise_std_normalization = True,
height_shift_range = 0.05,
width_shift_range = 0.05,
rotation_range = 10,
fill_mode = 'nearest',
rescale = 1. / 255,
preprocessing_function = preprocess_input)
#---------------------------------------------------------------------------------
def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, **dflow_args):
base_dir = os.path.dirname(in_df[path_col].values[0])
print('## Ignore next message from keras, values are replaced anyways')
df_gen = img_data_gen.flow_from_directory(base_dir, class_mode = 'sparse', **dflow_args)
df_gen.filenames = in_df[path_col].values
# Added df_gen.filepaths.extend because the filepaths is empty list.
# Hence added image path to the filepaths.
df_gen.filepaths.extend(df_gen.filenames)
df_gen.classes = np.stack(in_df[y_col].values)
df_gen.samples = in_df.shape[0]
df_gen.n = in_df.shape[0]
df_gen._set_index_array()
df_gen.directory = '' # since we have the full path
print('Reinserting dataframe: {} images'.format(in_df.shape[0]))
return df_gen
#---------------------------------------------------------------------------------
# Data Generators:
train_gen = flow_from_dataframe(core_idg, train_df,
path_col = 'path',
y_col = 'boneage_zscore',
target_size = IMG_SIZE,
color_mode = 'rgb',
batch_size = len(train_df),
shuffle = True)
X_train, y_train = next(train_gen)
def boneage_model():
base_model = Xception(input_shape = X_train.shape[1:], include_top = False, weights = 'imagenet')
base_model.trainable = True
model = Sequential()
model.add(base_model)
model.add(GlobalMaxPooling2D())
model.add(Flatten())
model.add(Dense(16, activation = 'relu'))
model.add(Dense(1, activation = 'linear'))
def mae_months(in_gt, in_pred):
return mean_absolute_error(boneage_div * in_gt, boneage_div * in_pred)
# Compile model
adam = Adam(learning_rate = 0.0005)
model.compile(loss = 'mse', optimizer = adam, metrics = [mae_months])
return model
#---------------------------------------------------------------------------------
# KFold
n_splits = 5
kf = KFold(n_splits = n_splits, shuffle = True, random_state = 42)
# create model
model = KerasRegressor(build_fn = boneage_model)
#---------------------------------------------------------------------------------
#### THIS IS WHERE THE ERROR STARTS
cv_mae = []
for train_index, val_index in kf.split(X_train):
model.fit(X_train[train_index], y_train[train_index], batch_size = 16)
pred = model.predict(X_train[val_index], batch_size = 2)
err = mean_absolute_error(y_train[val_index], pred)
cv_mae.append(err)
cv_mae
Note! train_df
has a value of 8000 images of X-ray hand images.
According to the post here, it suggests trimming the batch_size down, hence why the batch_size = 2 above at the model.predict()
. However, it still prints out the same error message. Please help!