After successful training, I cannot save this model with the line keras.models.save_model(factoring, "basic_test_case")
. Why is this? I added some arguments to the constructor to avoid its being ill-defined, but that does not convince the Keras api.
Per this answer here, this model may be missing an input layer. I tried to add the line factoring.add(keras.layers.Flatten(input_shape=(5, 100)))
but it threw the error AttributeError: 'SumNet' object has no attribute 'add'
.
So how could I add a well-defined input layer so I can save the model? Note that I am relying on a Keras-only API, for backend reasons.
My sense is that I am missing an input layer somehow but I don't know how to add this to the class. As you can see, the submodes g
and f
are the workhorses but somehow the software cannot see 'their connection'.
import os
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
import numpy as np
import keras
from keras import callbacks
def mlp2(size_in, size_out):
hidden = 128
inputs = keras.Input(shape=(size_in, ))
x = keras.layers.Dense(hidden, name='layer1', activation='relu')(inputs)
x = keras.layers.Dense(hidden, name='layer2', activation='relu')(x)
x = keras.layers.Dense(hidden, name='layer3', activation='relu')(x)
outputs = keras.layers.Dense(size_out, name='layer4', activation='relu')(x)
m = keras.Model(inputs, outputs)
return m
# to make the model defined, add dimension definitions to the constructor https://keras.io/guides/making_new_layers_and_models_via_subclassing/
class SumNet(keras.models.Model):
def __init__(self, units, input_shape):
super(SumNet, self).__init__()
#these are for our inductive bias
self.g = mlp2(input_shape[0], 1)
self.f = mlp2(1, units)
def call(self, x):
y_i = self.g(x)[:, :, 0]
# y_i should be two-dimensional array, first dimension is as always the batch dimension.
y = keras.backend.sum(y_i, axis=1, keepdims=True) / float(keras.backend.int_shape(y_i)[1])
z = self.f(y)
# this reduces the dimension again / float(keras.backend.int_shape(y_i)[1])
return z
N = 100000
Nt = 100
X = 6 * np.random.rand(N, Nt, 5) - 3
y_i = X[..., 0] ** 2 + 6 * np.cos(2 * X[..., 2])
y = np.sum(y_i, axis=1, keepdims=False) / y_i.shape[1]
z = y ** 2
X.shape, y.shape
###### np.random.seed(0)
# importantly, the input shape size is always the same, regardless of how many examples I have.
factoring = SumNet(units=1, input_shape=(5, 100))
# factoring.add(keras.layers.Flatten(input_shape=(5, 100)))
# check if there is an argument for a maximum learning rate, set to default 10^-3
# check difference of epochs vs total steps in pytorch "scheduler" object
optimizer = keras.optimizers.Adam(lr=1e-3)
factoring.compile(optimizer, loss=keras.losses.mean_squared_error)
# create array along first dim of X
# https://stackoverflow.com/questions/44747343/keras-input-explanation-input-shape-units-batch-size-dim-etc
f_dim = np.arange(len(X))
training_indeces = np.random.choice(f_dim, int(.8 * f_dim.shape[0]), replace=False)
# include_idx = set(training_indeces) #Set is more efficient, but doesn't reorder your elements if that is desireable
mask = np.array([(i in training_indeces) for i in np.arange(len(X))])
Xtrain = X[mask]
ztrain = z[mask]
Xtest = X[~mask]
ztest = z[~mask]
earlystopping = callbacks.EarlyStopping(monitor ="val_loss",
mode="min", patience = 5,
restore_best_weights = True)
factoring.fit(Xtrain, ztrain, batch_size=64, epochs=10, validation_split=.15, callbacks=[earlystopping])
results = factoring.evaluate(Xtest, ztest, batch_size=64)
print("test loss:", results)
keras.models.save_model(factoring, "basic_test_case")