Tensor flow: Weird issue when loading weights and biases. They are loaded in as their initial definition, not as updated

Question

I recently trained a convolutional neural network, and I would like to load the trained weights and use them in an external dataset, but I am having an issue.

I will post my code for training and testing, but I will say in summary that my weights and biases are being loaded in as they were previously defined. I defined my weights are random tensors and all biases as 0.1 to ensure no nodes would start off dead. When I load in this network I observe that ALL biases are 0.1 and, more troublingly, if I run my prediction twice in a row on the same data, I get two different results-- suggesting that I am somehow randomizing the weights during testing!

Anyways, here's the training code; my network and weights/biases are defined like this:

def conv3dWrap(x, W, b, strides=1,activation='yes'):
    x = tf.nn.conv3d(x, W, strides=[1, strides, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    if activation =='no' or activation == 0:
        return x
    return tf.nn.relu(x)




def conv_net(x, weights, biases, dropout):

    #First Conv Layer
    conv1 = conv3dWrap(x,weights['wConv1'],biases['bConv1'])
    conv2 = conv3dWrap(conv1, weights['wConv2'], biases['bConv2']) 
    conv3 = conv3dWrap(conv2, weights['wConv3'], biases['bConv3']) 
    conv4 = conv3dWrap(conv3, weights['wConv4'], biases['bConv4']) 
    conv5 = conv3dWrap(conv4, weights['wConv5'], biases['bConv5']) 
    conv6 = conv3dWrap(conv5, weights['wConv6'], biases['bConv6']) 
    conv7 = conv3dWrap(conv6, weights['wConv7'], biases['bConv7']) 
    conv8 = conv3dWrap(conv7, weights['wConv8'], biases['bConv8']) 
    conv9 = conv3dWrap(conv8, weights['wConv9'], biases['bConv9']) 
    conv10 = conv3dWrap(conv9, weights['wConv10'], biases['bConv10']) 
    conv11 = conv3dWrap(conv10, weights['wConv11'], biases['bConv11']) 
    conv12 = conv3dWrap(conv11, weights['wConv12'], biases['bConv12']) 
    conv13 = conv3dWrap(conv12, weights['wConv13'], biases['bConv13']) 
    conv14 = conv3dWrap(conv13, weights['wConv14'], biases['bConv14']) 
    conv15 = conv3dWrap(conv14, weights['wConv15'], biases['bConv15']) 
    conv16 = conv3dWrap(conv15, weights['wConv16'], biases['bConv16']) 
    convOUT = conv3dWrap(conv16,weights['wConvOUT'],biases['bConvOUT'],1,0)

    return convOUT

# Store layers weight & bias
weights = {
#'wConv1': tf.Variable(tf.random_normal([3, 3, 3, 1, 16],0,0.1), name='wC1'),
'wConv1': tf.Variable(tf.ones([3, 3, 3, 1, 16]), name='wC1'),
# 5x5 conv, 32 inputs, 64 outputs
'wConv2': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.1), name='wC2'),
'wConv3': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.1), name='wC3'),
'wConv4': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.1), name='wC4'),
'wConv5': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.1), name='wC5'),
'wConv6': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.1), name='wC6'),
'wConv7': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.1), name='wC7'),
'wConv8': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.1), name='wC8'),
'wConv9': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.1), name='wC9'),
'wConv10': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.01), name='wC10'),
'wConv11': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.01), name='wC11'),
'wConv12': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.01), name='wC12'),
'wConv13': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.01), name='wC13'),
'wConv14': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.01), name='wC14'),
'wConv15': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.01), name='wC15'),
'wConv16': tf.Variable(tf.random_normal([3, 3, 3, 16, 16],0,0.01), name='wC16'),
'wConvOUT': tf.Variable(tf.random_normal([3, 3, 3, 16, 1],0,0.01), name='wCOUT')
}

biases = {
'bConv1': tf.Variable(16*[0.01],name='bC1'),
'bConv2': tf.Variable(16*[0.01],name='bC2'),
'bConv3': tf.Variable(16*[0.01],name='bC3'),
'bConv4': tf.Variable(16*[0.01],name='bC4'),
'bConv5': tf.Variable(16*[0.01],name='bC5'),
'bConv6': tf.Variable(16*[0.01],name='bC6'),
'bConv7': tf.Variable(16*[0.01],name='bC7'),
'bConv8': tf.Variable(16*[0.01],name='bC8'),
'bConv9': tf.Variable(16*[0.01],name='bC9'),
'bConv10': tf.Variable(16*[0.01],name='bC10'),
'bConv11': tf.Variable(16*[0.01],name='bC11'),
'bConv12': tf.Variable(16*[0.01],name='bC12'),
'bConv13': tf.Variable(16*[0.01],name='bC13'),
'bConv14': tf.Variable(16*[0.01],name='bC14'),
'bConv15': tf.Variable(16*[0.01],name='bC15'),
'bConv16': tf.Variable(16*[0.01],name='bC16'),
'bConvOUT': tf.Variable([0.01],name='bCOUT')
}

And then I train the network by feeding in patches of some reference data like this:

def train_cnn(x):
    epochLosses=[]
    print('Beginning Training!')
    pred = conv_net(x, weights, biases, KEEP_PROB)        
    cost = tf.reduce_mean(tf.squared_difference(pred, y))
    saver = tf.train.Saver()
    optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
    train_op=optimizer.minimize(cost)
    #optimizer = tf.train.Ad(learning_rate=LEARNING_RATE).minimize(cost)
    epochLosses=[]
    valLosses=[]
    with tf.Session() as sess:
        print('Beginning Session!')
        sess.run(tf.global_variables_initializer())
        writer  =  tf.summary.FileWriter ( './graphs' ,  sess.graph)
        for epoch in range(NUM_EPOCHS):
            t0=time.time()
            print('Epoch #:')
            print(epoch)
            epoch_loss=0
            for pid in pidList: #get a patient's data
                os.chdir(mriDir+pid)       
                file=glob.glob('cr*')
                mri = nib.load(file[0])
                mri = mri.get_data()
                mri = mri-mri.mean()
                #mri = mri[:63,:63,:]
                mri = np.expand_dims(mri, 0)
                mri = np.expand_dims(mri, -1)
                os.chdir(txDir+pid)
                file=glob.glob('input.n*')
                tx = nib.load(file[0])
                tx = tx.get_data()
                #tx = tx[:63,:63,:]
                tx = np.expand_dims(tx, 0)
                tx = np.expand_dims(tx, -1) 
                tx = np.transpose(tx,(0,3,1,2,4))
                mri = np.transpose(mri,(0,3,1,2,4))
                os.chdir(launch)
                for zSlice in range( 1 , mri.shape[2] - 1 ):
                    for ud in range(2):
                        for lr in range(2): 
                            inPatch=mri[:,zSlice-1:zSlice+1,(ud*63):((ud+1)*63),(lr*63):((lr+1)*63),:]   
                            testPatch=tx[:,zSlice-1:zSlice+1,(ud*63):((ud+1)*63),(lr*63):((lr+1)*63),:]
                            _, c = sess.run([train_op, cost], feed_dict = {x: inPatch,y: testPatch})
                            epoch_loss += c
                save_path = saver.save(sess, "./SIMPLE_OUT/simple_test_cnn.ckpt")  
               # print(epoch_loss)
            mean_epoch_loss=epoch_loss/len(pidList) 
            print('Epoch', epoch, 'completed out of',NUM_EPOCHS,'loss:',epoch_loss) 
            print('Mean loss for epoch:', mean_epoch_loss)               
            epochLosses.append(epoch_loss)
            #Validation
            print('Running validation for', epoch, 'will cut training if mean MSE <', VALIDATION_CUTOFF)
            valLoss=0
            t1=time.time()
            t=t1-t0
            print('EPOCH #:', epoch, 'is complete')
            print('It took this much time:', t)

It goes on to do some other stuff like validation and all, but this is the important part for this question.

Then I go on to try and test the network thusly:

def conv3dWrap(x, W, b, strides=1,activation='yes'):
    x = tf.nn.conv3d(x, W, strides=[1, strides, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    if activation =='no' or activation == 0:
        return x
    return tf.nn.elu(x)

def conv_net(x, weights, biases, dropout):

    #First Conv Layer
    conv1 = conv3dWrap(x,weights['wConv1'],biases['bConv1'])
    conv2 = conv3dWrap(conv1, weights['wConv2'], biases['bConv2']) 
    conv3 = conv3dWrap(conv2, weights['wConv3'], biases['bConv3']) 
    conv4 = conv3dWrap(conv3, weights['wConv4'], biases['bConv4']) 
    conv5 = conv3dWrap(conv4, weights['wConv5'], biases['bConv5']) 
    conv6 = conv3dWrap(conv5, weights['wConv6'], biases['bConv6']) 
    conv7 = conv3dWrap(conv6, weights['wConv7'], biases['bConv7'])
    conv8 = conv3dWrap(conv7, weights['wConv8'], biases['bConv8'])
    conv9 = conv3dWrap(conv8, weights['wConv9'], biases['bConv9'])
    conv10 = conv3dWrap(conv9, weights['wConv10'], biases['bConv10'])
    conv11 = conv3dWrap(conv10, weights['wConv11'], biases['bConv11'])
    conv12 = conv3dWrap(conv11, weights['wConv12'], biases['bConv12'])
    conv13 = conv3dWrap(conv12, weights['wConv13'], biases['bConv13'])
    conv14 = conv3dWrap(conv13, weights['wConv14'], biases['bConv14'])
    conv15 = conv3dWrap(conv14, weights['wConv15'], biases['bConv15'])
    conv16 = conv3dWrap(conv15, weights['wConv16'], biases['bConv16'])
    convOUT = conv3dWrap(conv16,weights['wConvOUT'],biases['bConvOUT'],1,0)

    return convOUT

# Store layers weight  bias

sess = tf.Session()
x = tf.placeholder(dtype=tf.float32)
new_saver = tf.train.import_meta_graph('./SIMPLE_OUT/simple_test_cnn.ckpt.meta')
sess.run(tf.global_variables_initializer())
#pred = tf.get_default_graph().get_tensor_by_name("pred:0")
weights = {
'wConv1':  sess.run('wC1:0'),
'wConv2':  sess.run('wC2:0'),
'wConv3':  sess.run('wC3:0'),
'wConv4':  sess.run('wC4:0'),
'wConv5':  sess.run('wC5:0'),
'wConv6':  sess.run('wC6:0'),
'wConv7':  sess.run('wC7:0'),
'wConv8':  sess.run('wC8:0'),
'wConv9':  sess.run('wC9:0'),
'wConv10':  sess.run('wC10:0'),
'wConv11':  sess.run('wC11:0'),
'wConv12':  sess.run('wC12:0'),
'wConv13':  sess.run('wC13:0'),
'wConv14':  sess.run('wC14:0'),
'wConv15':  sess.run('wC15:0'),
'wConv16':  sess.run('wC16:0'),
'wConvOUT'  :  sess.run('wCOUT:0')
}
biases = {
'bConv1': sess.run('bC1:0'),
'bConv2': sess.run('bC2:0'),
'bConv3': sess.run('bC3:0'),
'bConv4': sess.run('bC4:0'),
'bConv5': sess.run('bC5:0'),
'bConv6': sess.run('bC6:0'),
'bConv7': sess.run('bC7:0'),
'bConv8': sess.run('bC8:0'),
'bConv9': sess.run('bC9:0'),
'bConv10': sess.run('bC10:0'),
'bConv11': sess.run('bC11:0'),
'bConv12': sess.run('bC12:0'),
'bConv13': sess.run('bC13:0'),
'bConv14': sess.run('bC14:0'),
'bConv15': sess.run('bC15:0'),
'bConv16': sess.run('bC16:0'),
'bConvOUT':   sess.run('bCOUT:0')
}

t0=time.time()
pred = conv_net(x, weights, biases,0)  
print("Loaded Weights")
b1=sess.run(pred,feed_dict={x: mri})
b=b1[0,:,:,:,0]
a=b[80,:,:]
t=time.time()-t0

And I run into the previously mentioned issues. The biases are ALL 0.1. And, running this code twice in a row will yield two different results! I am not sure how/why I am messing this up but I've spent a long time trying to figure out and I think it's time to ask for help.

Edit: Since this would be my first guess: I'd like to confirm that the weights and biases do change during training. This is confirmed by the fact that the cost changes and, more specifically, that the L2 loss of each weight/bias changes during every backpropogration (I am not using regularization on my biases, I just did it to confirm they were changing).

I think that when you run global_variables_initializer, you reset all the variables. You should use saver.restore instead. https://www.tensorflow.org/api_docs/python/tf/train/Saver — Conchylicultor, Nov 07 '17 at 06:13
Conchylicultor is absolutely right. See this question - https://stackoverflow.com/q/33759623/712995 — Maxim, Nov 07 '17 at 09:00

Tensor flow: Weird issue when loading weights and biases. They are loaded in as their initial definition, not as updated

0 Answers0