I'm trying to implement a 5 layer deep convolutional neural network in Tensorflow with 3 convolutional layers followed by 2 fully connected layers. My current implementation is below.
def deepnn(x):
x_image = tf.reshape(x, [-1, FLAGS.img_width, FLAGS.img_height, FLAGS.img_channels])
img_summary = tf.summary.image('Input_images', x_image)
with tf.variable_scope('Conv_1'):
W_conv1 = weight_variable([5, 5, FLAGS.img_channels, 32])
tf.add_to_collection('decay_weights',W_conv1)
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1,2) + b_conv1)
h_pool1 = avg_pool_3x3(h_conv1)
with tf.variable_scope('Conv_2'):
W_conv2 = weight_variable([5, 5, 32, 32])
tf.add_to_collection('decay_weights',W_conv2)
b_conv2 = bias_variable([32])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2,2) + b_conv2)
h_pool2 = avg_pool_3x3(h_conv2)
with tf.variable_scope('Conv_3'):
W_conv3 = weight_variable([5, 5, 32, 64])
tf.add_to_collection('decay_weights',W_conv3)
b_conv3 = bias_variable([64])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3,2) + b_conv3)
h_pool3 = max_pool_3x3(h_conv3)
with tf.variable_scope('FC_1'):
h_pool3_flat = tf.reshape(h_pool3,[-1,4*4*64])
W_fc1 = weight_variable([4*4*64,64])
tf.add_to_collection('decay_weights',W_fc1)
b_fc1 = bias_variable([64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat,W_fc1) + b_fc1)
with tf.variable_scope('FC_2'):
W_fc2 = weight_variable([64, FLAGS.num_classes])
tf.add_to_collection('decay_weights',W_fc2)
b_fc2 = bias_variable([FLAGS.num_classes])
y_fc2 = tf.matmul(h_fc1, W_fc2) + b_fc2
with tf.variable_scope('softmax'):
y_conv = tf.nn.softmax(y_fc2)
return y_conv, img_summary
def conv2d(x, W,p):
output = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID', name='convolution')
return tf.pad(output, tf.constant([[0,0],[p, p,],[p, p],[0,0]]), "CONSTANT")
def avg_pool_3x3(x):
output = tf.nn.avg_pool(x, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='VALID', name='pooling')
return tf.pad(output, tf.constant([[0,0],[0, 1,],[0, 1],[0,0]]), "CONSTANT")
def max_pool_3x3(x):
output = tf.nn.max_pool(x, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='VALID', name='pooling2')
return tf.pad(output, tf.constant([[0,0],[0, 1], [0, 1],[0,0]]), "CONSTANT")
def weight_variable(shape):
weight_init = tf.random_uniform(shape, -0.05,0.05)
return tf.Variable(weight_init, name='weights')
def bias_variable(shape):
bias_init = tf.random_uniform(shape, -0.05,0.05)
return tf.Variable(bias_init, name='biases')
def main(_):
tf.reset_default_graph()
dataset = pickle.load(open('dataset.pkl', 'rb'),encoding='latin1')
train_dataset = dataset[0]
learning_rate = 0.01
current_validation_acc = 1
with tf.variable_scope('inputs'):
x = tf.placeholder(tf.float32, [None, FLAGS.img_width * FLAGS.img_height * FLAGS.img_channels])
y_ = tf.placeholder(tf.float32, [None, FLAGS.num_classes])
y_conv, img_summary = deepnn(x)
with tf.variable_scope('softmax_loss'):
softmax_loss = tf.reduce_mean(tf.negative(tf.log(tf.reduce_sum(tf.multiply(y_conv,y_),1))))
tf.add_to_collection('losses', softmax_loss)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
train_step = tf.train.MomentumOptimizer(learning_rate,FLAGS.momentum).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')
loss_summary = tf.summary.scalar('Loss', loss)
acc_summary = tf.summary.scalar('Accuracy', accuracy)
For some unknown reason, the model doesn't seem to improve its accuracy above 10%. I've been banging my head against the wall trying to figure out why. I'm using a softmax loss cost function (as described here) and momentum optimiser. The dataset used is the GTSRB dataset.
While I can add various deep learning features (such as adaptive learning rates etc) to improve the accuracy, I am suspicious as to why the basic CNN model is performing so poorly.
Is there anything obvious that could explain why it's not learning as expected? Alternatively, is there anything I could try to help diagnose the problem?
Any help would be much appreciated!