How can i get stable results in tensorflow CNN

Question

I've implemented a CNN in tensor flow following the cs231n class. I wanted to reproduce the same result every time, so i've set the seed where ever it's needed but still my results are not stable. I don't know if it is because of the optimizer that i'm using. Below are the codes that i've written. Please go through it and your help will be appreciated.

import numpy as np
def train_network(lr, ksz, kst, plsz, plst, bs):
    ksz = np.int(ksz)

    kst = np.int(kst)
    plsz = np.int(plsz)
    plst = np.int(plst)
    tf.reset_default_graph()
    random.seed(0)
    tf.set_random_seed(0)
    np.random.seed(0)
    # tf.set_random_seed(1234)
    with tf.Graph().as_default():
        tf.set_random_seed(0)

        with tf.Session() as sess:
            X = tf.placeholder(tf.float32, [None, 32, 32, 3])
            y = tf.placeholder(tf.int64, [None])
            is_training = tf.placeholder(tf.bool)
            # y_out = my_model(X,y,is_training,ksz=np.int(ksz),kst=np.int(kst),plsz=np.int(plsz),plst=np.int(plst))
            # Conv-Relu-BN
            conv1act = tf.contrib.layers.conv2d(inputs=X, num_outputs=32, padding='same', kernel_size=ksz, stride=kst,
                                                activation_fn=tf.nn.relu,
                                                weights_initializer=tf.contrib.layers.variance_scaling_initializer(
                                                    mode="FAN_AVG", seed=0),
                                                biases_initializer=tf.contrib.layers.variance_scaling_initializer(
                                                    mode="FAN_AVG", seed=0))
            bn1act = tf.contrib.layers.batch_norm(inputs=conv1act, is_training=is_training)
            # Conv-Relu-BN
            conv2act = tf.contrib.layers.conv2d(inputs=bn1act, num_outputs=64, padding='same', kernel_size=ksz,
                                                stride=kst, activation_fn=tf.nn.relu,
                                                weights_initializer=tf.contrib.layers.variance_scaling_initializer(
                                                    mode="FAN_AVG", seed=0),
                                                biases_initializer=tf.contrib.layers.variance_scaling_initializer(
                                                    mode="FAN_AVG", seed=0))
            bn2act = tf.contrib.layers.batch_norm(inputs=conv2act, is_training=is_training)
            # Conv-Relu-BN
            c3 = tf.contrib.layers.conv2d(inputs=bn2act, num_outputs=128, padding='same', kernel_size=ksz, stride=kst,
                                          activation_fn=tf.nn.relu,
                                          weights_initializer=tf.contrib.layers.variance_scaling_initializer(
                                              mode="FAN_AVG", seed=0),
                                          biases_initializer=tf.contrib.layers.variance_scaling_initializer(
                                              mode="FAN_AVG", seed=0))
            b3 = tf.contrib.layers.batch_norm(inputs=c3, is_training=is_training)
            # Conv-Relu-BN
            c4 = tf.contrib.layers.conv2d(inputs=b3, num_outputs=256, padding='same', kernel_size=ksz, stride=kst,
                                          activation_fn=tf.nn.relu,
                                          weights_initializer=tf.contrib.layers.variance_scaling_initializer(
                                              mode="FAN_AVG", seed=0),
                                          biases_initializer=tf.contrib.layers.variance_scaling_initializer(
                                              mode="FAN_AVG", seed=0))
            b4 = tf.contrib.layers.batch_norm(inputs=c4, is_training=is_training)

            # Maxpool
            maxpool1act = tf.contrib.layers.max_pool2d(inputs=b4, stride=plst, kernel_size=plsz)
            # Flatten
            Rsize = maxpool1act.get_shape().as_list()
            # print(Rsize)
            Rsize1 = Rsize[1] * Rsize[2] * Rsize[3]
            flatten1 = tf.reshape(maxpool1act, shape=[-1, Rsize1])
            # FC-Relu-BN
            fc1 = tf.contrib.layers.fully_connected(inputs=flatten1, num_outputs=1024, activation_fn=tf.nn.relu,
                                                    weights_initializer=tf.contrib.layers.variance_scaling_initializer(
                                                        mode="FAN_AVG", seed=0),
                                                    biases_initializer=tf.contrib.layers.variance_scaling_initializer(
                                                        mode="FAN_AVG", seed=0))
            bn3act = tf.contrib.layers.batch_norm(inputs=fc1, is_training=is_training)
            # Output FC 
            y_out = tf.contrib.layers.fully_connected(inputs=bn3act, num_outputs=10, activation_fn=None,
                                                      weights_initializer=tf.contrib.layers.variance_scaling_initializer(
                                                          mode="FAN_AVG", seed=0),
                                                      biases_initializer=tf.contrib.layers.variance_scaling_initializer(
                                                          mode="FAN_AVG", seed=0))
            mean_loss = tf.losses.softmax_cross_entropy(logits=y_out, onehot_labels=tf.one_hot(y, 10))
            # optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
            optimizer = tf.train.RMSPropOptimizer(learning_rate=lr)

            # batch normalization in tensorflow requires this extra dependency
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(extra_update_ops):
                train_step = optimizer.minimize(mean_loss)

            with tf.device("/cpu:0"):  # "/cpu:0" or "/gpu:0" 
                sess.run(tf.global_variables_initializer())
                print('Training')
                run_model(X, y, mean_loss, is_training, sess, y_out, mean_loss, X_train, y_train, 10, np.int(bs), 100,
                          train_step, True)
                print('Validation')
                loss, val = run_model(X, y, mean_loss, is_training, sess, y_out, mean_loss, X_val, y_val, 1, np.int(bs))
    return 1 - val

run model function

def run_model(X, y, mean_loss, is_training, session, predict, loss_val, Xd, yd,
              epochs=1, batch_size=64, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    # print(predict)
    random.seed(0)
    np.random.seed(0)
    correct_prediction = tf.equal(tf.argmax(predict, 1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    # np.random.shuffle(train_indicies)
    # print(train_indicies)
    training_now = training is not None

    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss, correct_prediction, accuracy]
    if training_now:
        variables[-1] = training

    # counter
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0] / batch_size))):
            # print()
            # generate indicies for the batch
            start_idx = (i * batch_size) % Xd.shape[0]
            idx = train_indicies[start_idx:start_idx + batch_size]

            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx, :],
                         y: yd[idx],
                         is_training: training_now}
            # get batch size
            actual_batch_size = yd[idx].shape[0]
            # print(actual_batch_size)
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables, feed_dict=feed_dict)
            # print(np.sum(corr))
            # aggregate performance stats
            losses.append(loss * actual_batch_size)

            correct += np.sum(corr)
            # print(np.sum(corr))
            # print(actual_batch_size)
            # print(np.sum(corr)/float(actual_batch_size))
            # print every now and then
            if training_now and (iter_cnt % print_every) == 0:
                print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}" \
                      .format(iter_cnt, loss, float(np.sum(corr)) / actual_batch_size))
            iter_cnt += 1
        total_correct = float(correct) / Xd.shape[0]
        total_loss = np.sum(losses) / Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}" \
              .format(total_loss, total_correct, e + 1))
        if plot_losses:
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e + 1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
    return total_loss, total_correct

Possible duplicate of [How to get stable results with TensorFlow, setting random seed](https://stackoverflow.com/questions/36288235/how-to-get-stable-results-with-tensorflow-setting-random-seed) — causation, Jan 09 '18 at 12:19
No i've seen that post and i've followed the way they have set the seed, but still i'm not able to get stable results — lucky kumar, Jan 09 '18 at 13:30
Is there a specific reason you are using 3 different seedings? `np`, `tf` and `random`? — Eypros, Jan 10 '18 at 09:33
I understood that the results are not reproducible when using GPU. So it is not possible at all any ways thanks for your comments. — lucky kumar, Feb 05 '18 at 13:28

How can i get stable results in tensorflow CNN

0 Answers0