0

Here is my feature extracting network.

class vgg16:
    def __init__(self, imgs1,imgs2, weights=None, sess=None):

        self.imgs1 = imgs1
    self.imgs2 = imgs2

        with tf.variable_scope("siamese") as scope:
            self.o1 = self.convlayers(self.imgs1)

        .....
        if weights is not None and sess is not None:
                self.load_weights(weights, sess)
            scope.reuse_variables()
            self.o2 = self.convlayers(self.imgs2)

        .......

        if weights is not None and sess is not None:
             self.load_weights(weights, sess)
        #create loss function
        with tf.variable_scope("loss") as scope:

                self.cd=self.cos_dist(self.o1,self.o2,sess)
                self.loss(self.cd,self.o1,self.o2)

    def convlayers(self,_image):
        .....

    def load_weights(self, weight_file, sess):
       ....
    def loss(self,la,i1,j2):
    label=tf.cast(la, tf.float32)
    i=tf.nn.l2_normalize(i1, 1, epsilon=1e-2, name='normed')
    j=tf.nn.l2_normalize(j2, 1, epsilon=1e-2, name='normed')


    sub= tf.subtract(i,j)
    magi=tf.sqrt(tf.reduce_sum(tf.square(sub)))
    ab=tf.abs(magi)
    sq=tf.square(ab)
    self.left=sq*label
    #working on right now
    second_part=1-label
    tpart=tf.constant(0.675, dtype=tf.float32, name='T')-magi
    tp=tf.maximum(0.0,tpart)
    self.right=second_part*tp
    self.final_loss=self.left+self.right

    o= tf.train.GradientDescentOptimizer(0.09)
        self.l=o.compute_gradients(self.final_loss)
        self.o=o.apply_gradients(self.l)




    def cos_dist(self,net_1,net_2,sess):
    ......


if __name__ == '__main__':
  # for batch processing


    sess = tf.InteractiveSession()
    imgs1 = tf.placeholder(tf.float32, [None, 224, 224, 3])
    imgs2 = tf.placeholder(tf.float32, [None, 224, 224, 3])
    vgg = vgg16(imgs1,imgs2, 'vgg16_weights.npz', sess)
    train_step = tf.train.GradientDescentOptimizer(0.001).minimize(vgg.final_loss)



    #try:
    img1 = imread('c2.jpeg', mode='RGB')

    img1 = imresize(img1,(224, 224))
    img2 = imread('c7.jpeg', mode='RGB')

    img2 = imresize(img2,(224, 224))

    M= sess.run([vgg.k,vgg.o,vgg.final_loss],feed_dict={vgg.imgs1: [img1],vgg.imgs2: [img2]})

It is a siames fashioned CNN. The problem is that the latter layers do not get updated. And the first ones are updated to Nan. Is there something wrong with sess.run()? The way I am calling the attributes. Usually one of the reasons for this is faulty input. And I tried printing the values img1 and img2 and the values contained in the matrices is 255 only

user3102085
  • 459
  • 3
  • 8
  • 19
  • If you are updated to `NaN`, then something went wrong. Take a look at http://stackoverflow.com/questions/38810424/how-does-one-debug-nan-values-in-tensorflow for more info. – drpng Feb 18 '17 at 19:49
  • But why are the values in last layers not updated? What could be the general cause for this? – user3102085 Feb 19 '17 at 07:42
  • If the kernel values are printed in the graph and after in the session are different. For example in graph value is -0.012.. and in the session it is -1.24.. Why is that? – user3102085 Feb 19 '17 at 07:58
  • And this is only forward pass. – user3102085 Feb 19 '17 at 07:58
  • One more thing: I printed the value of the images and the values in the matrices are only 255 – user3102085 Feb 19 '17 at 08:30

0 Answers0