I am using keras-tuner in order to obtain the best set of hyperparameters for my model. I can reproduce my problem for a random dataset:
def generate_data(n_windows, n_timesteps):
feature_vector_list = []
label_list = []
for i in range(10):
x = tf.random.normal((n_windows, n_timesteps))
feature_vector = [x]
choices = [np.array([1, 0]), np.array([0, 1]),
np.array([0, 0]), np.array([1,1])]
labels = np.array([random.choice(choices) for i in range(n_windows)])
feature_vector_list.append(feature_vector)
label_list.append(labels)
return feature_vector_list, label_list
def custom_generator(feat_vector_list, label_list):
assert len(feat_vector_list) == len(label_list), \
"Number of feature vectors inconsistent with the number of labels"
counter = 0
while True:
feat_vec = feat_vector_list[counter]
list_labels = label_list[counter]
counter = (counter + 1) % len(feat_vector_list)
yield feat_vec, list_labels
Here is the model:
def model_builder(hp):
n_timesteps, n_features, n_outputs = 60, 1, 2
hp_units = hp.Int("units", min_value=50, max_value=500, step=50)
hp_filters = hp.Int("filters", 4, 32, step=4, default=8)
hp_kernel_size = hp.Int("kernel_size", 3, 50, step=1)
hp_pool_size = hp.Int("pool_size", 2, 8, step=1)
hp_dropout = hp.Float("dropout", 0.1, 0.5, step=0.1)
input1 = Input(shape=(n_timesteps, n_features))
conv1 = Conv1D(filters=hp_filters,
kernel_size=hp_kernel_size,
activation='relu')(input1)
drop1 = Dropout(hp_dropout)(conv1)
if hp.Choice("pooling", ["max", "avg"]) == "max":
pool1 = MaxPooling1D(pool_size=hp_pool_size)(drop1)
else:
pool1 = AveragePooling1D(pool_size=hp_pool_size)(drop1)
flatten1 = Flatten()(pool1)
# hidden layers
dense1 = Dense(hp_units, activation='relu')(flatten1)
outputs = Dense(n_outputs, activation='softmax')(dense1)
model = Model(inputs=[input1, input2], outputs=outputs)
model.compile(loss='categorical_crossentropy',
optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Float("learning_rate",
0.01,
0.1,
step=0.2)),
metrics=['accuracy'])
return model
Here is the training script:
if __name__ == '__main__':
x_train, y_train = generate_data(350, 60)
x_val, y_val = generate_data(80, 60)
training_generator = custom_generator(x_train, y_train)
validation_generator = custom_generator(x_val, y_val)
tuner = kt.Hyperband(
model_builder,
objective="val_accuracy",
max_epochs=70,
factor=3,
directory="Results",
project_name="cnn_tunning"
)
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
patience=5,
min_delta=0.002)
tuner.search(
training_generator,
steps_per_epoch=N_WINDOWS,
validation_data=validation_generator,
validation_steps=75,
callbacks=[stop_early],
)
Now what I have found is that after the hyperband starts using a decent number of iterations and the callback I set up should come into play I get this error:
W tensorflow/core/framework/op_kernel.cc:1733] INVALID_ARGUMENT: ValueError: Could not find callback with key=pyfunc_530 in the registry.
Traceback (most recent call last):
File "/home/diogomota/.cache/pypoetry/virtualenvs/WUAle-Z1-py3.7/lib/python3.7/site-packages/tensorflow/python/ops/script_ops.py", line 259, in __call__
raise ValueError(f"Could not find callback with key={token} in the "
ValueError: Could not find callback with key=pyfunc_530 in the registry.
W tensorflow/core/kernels/data/generator_dataset_op.cc:107] Error occurred when finalizing GeneratorDataset iterator: INVALID_ARGUMENT: ValueError: Could not find callback with key=pyfunc_530 in the registry.
Traceback (most recent call last):
File "/home/diogomota/.cache/pypoetry/virtualenvs/WUAle-Z1-py3.7/lib/python3.7/site-packages/tensorflow/python/ops/script_ops.py", line 259, in __call__
raise ValueError(f"Could not find callback with key={token} in the "
ValueError: Could not find callback with key=pyfunc_530 in the registry.
However it just proceeds to the next trial so I'm not sure what is going on, can someone explain why it can't find the callback?
I'm using tensorflow 2.8
and keras-tuner 1.1.2
I could only find one place online with a similar issue, but no solution provided: https://issuemode.com/issues/tensorflow/tensorflow/72982126
EDIT:
- Provided full error message
- After further debugging, the problem comes solely from using a generator as input for the
.search()
. I do not know the reason for this being an issue. Regular training using the.fit()
works without any issues - Added dataset generation code for reproducibility