0

I have extracted CNN features from a pretrain vgg19 with size 4096. Then I am using a shallower architecture to train a classifier with softmax and center losses. Unfortunately, the softmax loss function returns nan. There is detailed discussion available here, however I am not able to resolve the problem with clip because labels and logits are in two different data format (int64, float32). Furthermore, I also changed the learning rate but still got the same error.

Can some please let me know, how to resolve this situation.

from __future__ import division
from __future__ import print_function

import csv

import numpy as np
import tensorflow as tf

from retrieval_model import setup_train_model

FLAGS = None
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"


def get_name(read_file):
    feat_lst = []
    identifier_lst = []
    with open(read_file, 'r') as csvfile:
        read_file = csv.reader(csvfile, delimiter=',')
        for row in read_file:
            feat = row[:-1]
            s_feat = [float(i) for i in feat]
            identifier = row[-1]
            feat_lst.append(s_feat)
            identifier_lst.append(identifier)
    return feat_lst, identifier_lst


def get_batch(batch_index, batch_size, labels, f_lst):
    start_ind = batch_index * batch_size
    end_ind = start_ind + batch_size

    return f_lst[start_ind:end_ind], labels[start_ind:end_ind]


def creat_dict(orig_labels):
    dict = {}
    count = 0
    for x in orig_labels:
        n_label = dict.get(x, None)
        if n_label is None:
            dict[x] = count
            count += 1
    return dict




def main(_):

    save_dir = 'model/one-branch-ckpt'
    train_file = 'gtrain.csv'
    img_feat, img_labels = get_name(train_file)

    map_dict   = creat_dict(img_labels)
    img_labels = [map_dict.get(x) for x in img_labels]


    im_feat_dim = 4096
    batch_size = 50
    max_num_epoch = 10
    steps_per_epoch = len(img_feat) // batch_size
    num_steps = steps_per_epoch * max_num_epoch

    # Setup placeholders for input variables.
    im_feat_plh = tf.placeholder(tf.float32, shape=[batch_size, im_feat_dim])
    label_plh   = tf.placeholder(tf.int64, shape=(batch_size), name='labels')

    train_phase_plh = tf.placeholder(tf.bool)

    # Setup training operation.
    t_l = setup_train_model(im_feat_plh, train_phase_plh, label_plh, classes)

    # Setup optimizer.
    global_step = tf.Variable(0, trainable=False)
    init_learning_rate = 0.0001
    learning_rate = tf.train.exponential_decay(init_learning_rate, global_step,
                                               steps_per_epoch, 0.794, staircase=True)
    optim = tf.train.AdamOptimizer(init_learning_rate)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_step = optim.minimize(t_l, global_step=global_step)

    # Setup model saver.
    saver = tf.train.Saver(save_relative_paths=True,max_to_keep=1)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for i in range(num_steps):
            im_feats, labels = get_batch(
                i % steps_per_epoch, batch_size, img_labels, img_feat)
            feed_dict = {
                im_feat_plh: im_feats,
                label_plh: labels,
                train_phase_plh: True,
            }
            [_, loss_val] = sess.run([train_step, t_l], feed_dict=feed_dict)
            if i % 100 == 0:
                print('Epoch: %d Step: %d Loss: %f' % (i // steps_per_epoch, i, loss_val))
            if i % steps_per_epoch == 0 and i > 0:
                print('Saving checkpoint at step %d' % i)
                saver.save(sess, save_dir, global_step=global_step)


if __name__ == '__main__':
    np.random.seed(0)
    tf.set_random_seed(0)
    tf.app.run(main=main)

**************************retrieval_model********************************

def setup_train_model(im_feats, train_phase, im_labels, nrof_classes):
    alfa = 0.9
    # nrof_classes = 28783
    i_embed = embedding_model(im_feats, train_phase, im_labels)
    c_l   = embedding_loss(i_embed, im_labels, alfa, nrof_classes)
    loss = softmax_loss(i_embed, im_labels)
    total_loss = loss + c_l
    return total_loss

def add_fc(inputs, outdim, train_phase, scope_in):
    fc = fully_connected(inputs, outdim, activation_fn=None, scope=scope_in + '/fc')
    fc_bnorm = tf.layers.batch_normalization(fc, momentum=0.1, epsilon=1e-5,
                                             training=train_phase, name=scope_in + '/bnorm')
    fc_relu = tf.nn.relu(fc_bnorm, name=scope_in + '/relu')
    fc_out = tf.layers.dropout(fc_relu, seed=0, training=train_phase, name=scope_in + '/dropout')
    return fc_out

def embedding_loss(features, label, alfa, nrof_classes):
    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
                              initializer=tf.constant_initializer(0), trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    #centers = tf.scatter_sub(centers, label, diff)
    center_loss = tf.reduce_mean(tf.square(features - centers_batch))
    #softmax_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=features))
    #total_loss = softmax_loss + center_loss

    return  center_loss

  def embedding_model(im_feats, train_phase, im_labels,
                    fc_dim=2048, embed_dim=512):

    # Image branch.
    im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')
    im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None,
                             scope='im_embed_2')

    return tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)


def softmax_loss(feat, im_labels):
    label = tf.reshape(im_labels, [-1])
    softmax = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=feat))

    return softmax
cswah
  • 401
  • 1
  • 5
  • 22
  • 1
    You say: "I am not able to resolve the problem with clip because labels and logits are in two different data format (int64, float32).". Would converting one of the two series so that their formats match solve the issue? – CAPSLOCK Mar 18 '19 at 12:17
  • it seems illogical to convert labels from int to float values, isn't. @Gio – cswah Mar 18 '19 at 12:28
  • Actually, `logits and labels must have the same dtype (either float16, float32, or float64)` as per official docs. – Sharky Mar 18 '19 at 13:07
  • well I would personally convert it into float if it would solve my problem straight away. I don't think that the loss in terms of memory would be so detrimental to overcome the time saved in not having to come up with another solution. @cswah – CAPSLOCK Mar 18 '19 at 15:13
  • I found that I need to add fully connected layer in the network to resolve `nan'. – cswah Mar 20 '19 at 10:11

0 Answers0