I've been trying to implement a convolutional autoencoder in Tensorflow similar to how it was done in Keras in this tutorial.
So far this is what my code looks like
filter1 = tf.Variable(tf.random_normal([3, 3, 1, 16]))
filter2 = tf.Variable(tf.random_normal([3, 3, 16, 8]))
filter3 = tf.Variable(tf.random_normal([3, 3, 8, 8]))
d_filter1 = tf.Variable(tf.random_normal([3, 3, 8, 8]))
d_filter2 = tf.Variable(tf.random_normal([3, 3, 8, 8]))
d_filter3 = tf.Variable(tf.random_normal([3, 3, 8, 16]))
d_filter4 = tf.Variable(tf.random_normal([3, 3, 16, 1]))
def encoder(input_img):
conv1 = tf.nn.relu(tf.nn.conv2d(input_img, filter1, strides=[1, 1, 1, 1], padding='SAME'))# [-1, 28, 28, 16]
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=2, strides=2, padding='SAME') # [-1, 14, 14, 16]
conv2 = tf.nn.relu(tf.nn.conv2d(pool1, filter2, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 14, 14, 8]
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=2, strides=2, padding='SAME') # [-1, 7, 7, 8]
conv3 = tf.nn.relu(tf.nn.conv2d(pool2, filter3, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 7, 7, 8]
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=2, strides=2, padding='SAME') # [-1, 4, 4, 8]
return pool3
def decoder(encoded):
d_conv1 = tf.nn.relu(tf.nn.conv2d(encoded, d_filter1, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 4, 4, 8]
d_pool1 = tf.keras.layers.UpSampling2D((2, 2))(d_conv1) # [-1, 8, 8, 8]
d_conv2 = tf.nn.relu(tf.nn.conv2d(d_pool1, d_filter2, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 8, 8, 8]
d_pool2 = tf.keras.layers.UpSampling2D((2, 2))(d_conv2) # [-1, 16, 16, 8]
d_conv3 = tf.nn.relu(tf.nn.conv2d(d_pool2, d_filter3, strides=[1, 1, 1, 1], padding='VALID')) # [-1, 14, 14, 16]
d_pool3 = tf.keras.layers.UpSampling2D((2, 2))(d_conv3) # [28, 28, 16]
decoded = tf.nn.sigmoid(tf.nn.conv2d(d_pool3, d_filter4, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 28, 28, 1]
return decoded
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
encoded = encoder(x)
decoded = decoder(mid)
autoencoder = decoder(encoded)
loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_true=x, y_pred=autoencoder))
optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.1).minimize(loss)
batch_size = 128
epochs = 50
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
num_batches = int(x_train.shape[0]/batch_size)
for epoch in range(epochs):
avg_epoch_loss = 0.0
for k in range(num_batches):
batch_x = x_train[k*batch_size:k*batch_size+batch_size]
feed_dict = {x: batch_x.reshape([-1, 28, 28, 1])}
_, l = sess.run([optimizer, loss], feed_dict=feed_dict)
avg_epoch_loss += l
if k % 100 == 0:
print 'Step {}/{} of epoch {}/{} completed with loss {}'.format(k, num_batches, epoch, epochs, l)
avg_epoch_loss /= num_batches
print 'Epoch {}/{} completed with average loss {}'.format(epoch, epochs, avg_epoch_loss)
saver.save(sess=sess, save_path='./model.ckpt')
img = sess.run(autoencoder, feed_dict={x: x_test[0].reshape([1, 28, 28, 1])}).reshape(28, 28)
plt.imshow(img, cmap='gray')
plt.show()
When I train this the loss value tends to go down but then stays around the same (high) value. However, when I replace the encoder
and decoder
functions with this, which uses the Keras methods from the above link, the loss decreases at a reasonable rate and converges to a low value.
def encoder(input_img):
Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
return encoded
def decoder(encoded):
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
return decoded
I'm trying to figure out what the difference is between these two methods, I've looked over it several times and it seems like my method should be doing the exact same thing as the Keras method. Any help in figuring out what's going on would be appreciated!