I am building a model in python based on a simple autoencoder example that I found online. The example was written for keras. With the recommended transition to tensorflow.keras I modified the program's imports expecting no other changes would be needed.
With the keras imports
from keras.layers import Input, Dense
from keras.models import Model
from keras.datasets import mnist
the autoencoder works fine, you can see it converge in the std output, and the recovered images make sense. When I use the tensorflow inputs
from tensorflow.python.keras.layers import Input, Dense
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.datasets import mnist
the results no longer converge and the recovered images just look like noise.
Below is the minimum working example of my problem. Just change between the two imports above to reproduce the difference in behavior.
import numpy as np
import matplotlib.pyplot as plt
def prepModel(inputShape, outputShape, numNeurons):
input_image = Input(shape=(inputShape,))
#encoded representation of input
encoded = Dense(numNeurons, activation='relu')(input_image)
#decoded lossy reconstruction
decoded = Dense(outputShape, activation='sigmoid')(encoded)
#model to encoded data
autoencoder = Model(input_image, decoded)
encoder = Model(input_image, encoded)
encoded_input = Input(shape=(numNeurons,)) #placeholder
decoder_layer = autoencoder.layers[-1] #last layer of model
decoder = Model(encoded_input, decoder_layer(encoded_input)) #decoder model
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
return autoencoder, encoder, decoder
def prepData():
#import / set data
(x_train, _), (x_test, _) = mnist.load_data()
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
return x_train, x_test
def runModel(autoencoder, encoder, decoder, x_train, x_test):
#train encoder
autoencoder.fit(x_train, x_train,
epochs=50,
batch_size=256,
shuffle=True,
validation_data=(x_test, x_test))
encoded_images = encoder.predict(x_test)
decoded_images = decoder.predict(encoded_images)
return encoded_images, decoded_images
def plotComparison(x_test, decoded_images):
#Plot original image
n = 10
plt.figure(figsize=(20,4))
for i in range(n):
ax = plt.subplot(2,n,i+1)
plt.imshow(x_test[i].reshape(28,28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
#plot decompressed image
ax = plt.subplot(2, n, i+1+n)
plt.imshow(decoded_images[i].reshape(28,28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
x_train, x_test = prepData()
autoencoder, encoder, decoder = prepModel(784, 784, 16)
encoded_images, decoded_images = runModel(autoencoder, encoder, decoder, x_train, x_test)
plotComparison(x_test, decoded_images)
I'm running python 3.8.3, keras version 2.3.1, and tensorflow version 2.2.0. I've fooled around with rescaling the input data and other naive tricks to no avail. And I've verified the behavior on two other computers. What could explain why the performance between the two sets of imports is so different?