I'm trying to compile and fit a model using the EpochModelCheckpoint class from this thread - I want the model to save regularly after each epoch.
But I get the following error which I absolutely don't understand:
Epoch 1/1000
Epoch 1: val_loss improved from inf to -0.86435, saving model to Models/HM0001/01
WARNING:absl:Found untraced functions such as _update_step_xla while saving (showing 1 of 1). These functions will not be directly callable after loading.
Traceback (most recent call last):
File "/home/au/find/Ex.py", line 92, in <module>
model = CompileFitModel (xTrain, yTrain, epochs, batchSize, optimizer, loss, activation1, activation2, verbose)
File "/home/au/find/Ex.py", line 71, in CompileFitModel
model.fit(xTrain, yTrain, epochs=epochs, verbose=verbose, batch_size=batchSize,validation_data=(xTrain, yTrain),
File "/home/au/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/home/au/find/Ex.py", line 30, in on_epoch_end
self._save_model(epoch=epoch, batch=None, logs=logs)
File "/home/au/.local/lib/python3.10/site-packages/tensorflow/dtensor/python/d_variable.py", line 60, in __init__
original_layout = api.fetch_layout(dvariable)
File "/home/au/.local/lib/python3.10/site-packages/tensorflow/dtensor/python/api.py", line 353, in fetch_layout
return _dtensor_device().fetch_layout(tensor)
File "/home/au/.local/lib/python3.10/site-packages/tensorflow/dtensor/python/dtensor_device.py", line 312, in fetch_layout
raise core._status_to_exception(e) from None # pylint: disable=protected-access
tensorflow.python.framework.errors_impl.InvalidArgumentError: FetchLayout expects a tensor placed on the layout device.
Any idea? Full code follows. Source data - Meh100.npy (936K) https://github.com/velkyvont/velkyvont/blob/main/Meh100.npy
import numpy as np
import tensorflow as tf
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential
from keras.layers import AlphaDropout, Dense
from keras import backend as K
from tensorflow.keras.utils import to_categorical
class EpochModelCheckpoint(tf.keras.callbacks.ModelCheckpoint):
def __init__(self,
filepath,
frequency=1,
monitor='val_loss',
verbose=2,
save_best_only=True,
save_weights_only=False,
mode='auto',
loss='MyLoss.hdf5',
options=None,
**kwargs):
super(EpochModelCheckpoint, self).__init__(filepath, monitor, verbose, save_best_only, save_weights_only, mode, "epoch", options)
self.epochs_since_last_save = 0
self.frequency = frequency
def on_epoch_end(self, epoch, logs=None):
self.epochs_since_last_save += 1
# pylint: disable=protected-access
if self.epochs_since_last_save % self.frequency == 0:
self._save_model(epoch=epoch, batch=None, logs=logs)
def on_train_batch_end(self, batch, logs=None):
pass
def GetXYFromDataIncludingOdds(data):
favOdds = data.T[6]
dogOdds = data.T[7]
drawOdds = data.T[8]
odds = np.array(list(zip(favOdds, dogOdds, drawOdds)))
y = data.T[1]
y = np.asarray(y).astype('float32')
y = to_categorical(y, 3)
y = np.hstack([y ,odds])
x = data.T[2:].T
x = tf.keras.utils.normalize(x)
return x, y
def MyLoss(yTrue, yPred):
favWin = yTrue[:,1:2]
dogWin = yTrue[:, 2:3]
draw = yTrue[:, 0:1]
favOdds = yTrue[:, 3:4]
dogOdds = yTrue[:, 4:5]
drawOdds = yTrue[:, 5:6]
gainLossVector = K.concatenate([
draw*drawOdds,
favWin*favOdds,
dogWin*dogOdds,
], axis=1)
return -1 * K.mean(K.sum(gainLossVector * yPred, axis=1))
def CompileFitModel(xTrain, yTrain, epochs, batchSize, optimizer, loss, activation1, activation2, verbose):
model = Sequential()
model.add(AlphaDropout(2000, input_dim=1191))
model.add(Dense(1000, activation=activation1))
model.add(AlphaDropout(500))
model.add(Dense(3, activation=activation2))
model.compile(optimizer=optimizer, loss = loss)
model.fit(xTrain, yTrain, epochs=epochs, verbose=verbose, batch_size=batchSize,validation_data=(xTrain, yTrain),
callbacks=[EarlyStopping(patience=100), EpochModelCheckpoint("Models/HM0001/{epoch:02d}", frequency=1)
])
return model
learningRate = 0.00001
batchSize = 128
loss = MyLoss
activation1 = 'elu'
activation2 = 'softmax'
verbose = 2
epochs = 1000
batchSize = 128
optimizer = tf.keras.dtensor.experimental.optimizers.RMSprop(learning_rate=learningRate,rho=0.9,momentum=0.0, epsilon=1e-07, centered=False, gradients_clip_option=None, ema_option=None, jit_compile=True, name='RMSprop', mesh=None)
numpyFilename = "Meh100.npy"
data = np.load(numpyFilename, allow_pickle=True)
xTrain, yTrain = GetXYFromDataIncludingOdds (data)
model = CompileFitModel (xTrain, yTrain, epochs, batchSize, optimizer, loss, activation1, activation2, verbose)