I've implemented a CNN in tensor flow following the cs231n class. I wanted to reproduce the same result every time, so i've set the seed where ever it's needed but still my results are not stable. I don't know if it is because of the optimizer that i'm using. Below are the codes that i've written. Please go through it and your help will be appreciated.
import numpy as np
def train_network(lr, ksz, kst, plsz, plst, bs):
ksz = np.int(ksz)
kst = np.int(kst)
plsz = np.int(plsz)
plst = np.int(plst)
tf.reset_default_graph()
random.seed(0)
tf.set_random_seed(0)
np.random.seed(0)
# tf.set_random_seed(1234)
with tf.Graph().as_default():
tf.set_random_seed(0)
with tf.Session() as sess:
X = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)
# y_out = my_model(X,y,is_training,ksz=np.int(ksz),kst=np.int(kst),plsz=np.int(plsz),plst=np.int(plst))
# Conv-Relu-BN
conv1act = tf.contrib.layers.conv2d(inputs=X, num_outputs=32, padding='same', kernel_size=ksz, stride=kst,
activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
bn1act = tf.contrib.layers.batch_norm(inputs=conv1act, is_training=is_training)
# Conv-Relu-BN
conv2act = tf.contrib.layers.conv2d(inputs=bn1act, num_outputs=64, padding='same', kernel_size=ksz,
stride=kst, activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
bn2act = tf.contrib.layers.batch_norm(inputs=conv2act, is_training=is_training)
# Conv-Relu-BN
c3 = tf.contrib.layers.conv2d(inputs=bn2act, num_outputs=128, padding='same', kernel_size=ksz, stride=kst,
activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
b3 = tf.contrib.layers.batch_norm(inputs=c3, is_training=is_training)
# Conv-Relu-BN
c4 = tf.contrib.layers.conv2d(inputs=b3, num_outputs=256, padding='same', kernel_size=ksz, stride=kst,
activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
b4 = tf.contrib.layers.batch_norm(inputs=c4, is_training=is_training)
# Maxpool
maxpool1act = tf.contrib.layers.max_pool2d(inputs=b4, stride=plst, kernel_size=plsz)
# Flatten
Rsize = maxpool1act.get_shape().as_list()
# print(Rsize)
Rsize1 = Rsize[1] * Rsize[2] * Rsize[3]
flatten1 = tf.reshape(maxpool1act, shape=[-1, Rsize1])
# FC-Relu-BN
fc1 = tf.contrib.layers.fully_connected(inputs=flatten1, num_outputs=1024, activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
bn3act = tf.contrib.layers.batch_norm(inputs=fc1, is_training=is_training)
# Output FC
y_out = tf.contrib.layers.fully_connected(inputs=bn3act, num_outputs=10, activation_fn=None,
weights_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0),
biases_initializer=tf.contrib.layers.variance_scaling_initializer(
mode="FAN_AVG", seed=0))
mean_loss = tf.losses.softmax_cross_entropy(logits=y_out, onehot_labels=tf.one_hot(y, 10))
# optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
optimizer = tf.train.RMSPropOptimizer(learning_rate=lr)
# batch normalization in tensorflow requires this extra dependency
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
train_step = optimizer.minimize(mean_loss)
with tf.device("/cpu:0"): # "/cpu:0" or "/gpu:0"
sess.run(tf.global_variables_initializer())
print('Training')
run_model(X, y, mean_loss, is_training, sess, y_out, mean_loss, X_train, y_train, 10, np.int(bs), 100,
train_step, True)
print('Validation')
loss, val = run_model(X, y, mean_loss, is_training, sess, y_out, mean_loss, X_val, y_val, 1, np.int(bs))
return 1 - val
run model function
def run_model(X, y, mean_loss, is_training, session, predict, loss_val, Xd, yd,
epochs=1, batch_size=64, print_every=100,
training=None, plot_losses=False):
# have tensorflow compute accuracy
# print(predict)
random.seed(0)
np.random.seed(0)
correct_prediction = tf.equal(tf.argmax(predict, 1), y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# shuffle indicies
train_indicies = np.arange(Xd.shape[0])
# np.random.shuffle(train_indicies)
# print(train_indicies)
training_now = training is not None
# setting up variables we want to compute (and optimizing)
# if we have a training function, add that to things we compute
variables = [mean_loss, correct_prediction, accuracy]
if training_now:
variables[-1] = training
# counter
iter_cnt = 0
for e in range(epochs):
# keep track of losses and accuracy
correct = 0
losses = []
# make sure we iterate over the dataset once
for i in range(int(math.ceil(Xd.shape[0] / batch_size))):
# print()
# generate indicies for the batch
start_idx = (i * batch_size) % Xd.shape[0]
idx = train_indicies[start_idx:start_idx + batch_size]
# create a feed dictionary for this batch
feed_dict = {X: Xd[idx, :],
y: yd[idx],
is_training: training_now}
# get batch size
actual_batch_size = yd[idx].shape[0]
# print(actual_batch_size)
# have tensorflow compute loss and correct predictions
# and (if given) perform a training step
loss, corr, _ = session.run(variables, feed_dict=feed_dict)
# print(np.sum(corr))
# aggregate performance stats
losses.append(loss * actual_batch_size)
correct += np.sum(corr)
# print(np.sum(corr))
# print(actual_batch_size)
# print(np.sum(corr)/float(actual_batch_size))
# print every now and then
if training_now and (iter_cnt % print_every) == 0:
print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}" \
.format(iter_cnt, loss, float(np.sum(corr)) / actual_batch_size))
iter_cnt += 1
total_correct = float(correct) / Xd.shape[0]
total_loss = np.sum(losses) / Xd.shape[0]
print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}" \
.format(total_loss, total_correct, e + 1))
if plot_losses:
plt.plot(losses)
plt.grid(True)
plt.title('Epoch {} Loss'.format(e + 1))
plt.xlabel('minibatch number')
plt.ylabel('minibatch loss')
plt.show()
return total_loss, total_correct