1

I'm trying to use tf.nn.sparse_softmax_cross_entropy_with_logits and I have followed the answer by user Olivier Moindrot [here][1] but I'm getting a dimension error

I'm building a segmentation network, so the input image is 200x200 and the output image is 200x200. The classification is binary, so foreground and background.

After I build the CNN pred = conv_net(x, weights, biases, keep_prob)

pred looks like this <tf.Tensor 'Add_1:0' shape=(?, 40000) dtype=float32>

The CNN has a couple of conv layers followed by a fully connected layer. The fully connected layer is 40000 because it is 200x200 flattened.

According to the above link, I reshape pred like so...

(side note: I also tried packing tf.pack() two pred's -- like above -- together, but I thought that was wrong)

pred = tf.reshape(pred, [-1, 200, 200, 2])

...so that there are 2 classifications. Continuing the above link...

temp_pred = tf.reshape(pred, [-1,2])
temp_y = tf.reshape(y, [-1])
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

I have the following placeholders and batch data...

x = tf.placeholder(tf.float32, [None, 200, 200])
y = tf.placeholder(tf.int64, [None, 200, 200])
(Pdb) batch_x.shape
(10, 200, 200)
(Pdb) batch_y.shape
(10, 200, 200)

When I run a training session, I get the following dimension error:

tensorflow.python.framework.errors.InvalidArgumentError: logits first
dimension must match labels size.  logits shape=[3200000,2] labels 
shape=[400000]

My full code looks like this:

import tensorflow as tf
import pdb
import numpy as np

# Import MINST data
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)


# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 10
display_step = 1

# Network Parameters
n_input = 200 # MNIST data input (img shape: 28*28)
n_classes = 2 # MNIST total classes (0-9 digits)
n_output = 40000
#n_input = 200

dropout = 0.75 # Dropout, probability to keep units

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input, n_input])
y = tf.placeholder(tf.int64, [None, n_input, n_input])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)


# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')


# Create model
def conv_net(x, weights, biases, dropout):
    # Reshape input picture
    x = tf.reshape(x, shape=[-1, 200, 200, 1])

    # Convolution Layer
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # Max Pooling (down-sampling)
    # conv1 = tf.nn.local_response_normalization(conv1)
    # conv1 = maxpool2d(conv1, k=2)

    # Convolution Layer
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling)
    # conv2 = tf.nn.local_response_normalization(conv2)
    # conv2 = maxpool2d(conv2, k=2)

    # Convolution Layer
    conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
    # # Max Pooling (down-sampling)
    # conv3 = tf.nn.local_response_normalization(conv3)
    # conv3 = maxpool2d(conv3, k=2)

    # return conv3

    # Fully connected layer
    # Reshape conv2 output to fit fully connected layer input
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, dropout)

    return tf.add(tf.matmul(fc1, weights['out']), biases['out'])

    # Output, class prediction
    # output = []
    # for i in xrange(2):
    #     # output.append(tf.nn.softmax(tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
    #     output.append((tf.add(tf.matmul(fc1, weights['out']), biases['out'])))
    #
    # return output

# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 32 outputs
    'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    # 5x5 conv, 32 inputs, 64 outputs
    'wc3': tf.Variable(tf.random_normal([5, 5, 64, 128])),
    # fully connected, 7*7*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.random_normal([50*50*64, 1024])),
    # 1024 inputs, 10 outputs (class prediction)
    'out': tf.Variable(tf.random_normal([1024, n_output]))
}

biases = {
    'bc1': tf.Variable(tf.random_normal([32])),
    'bc2': tf.Variable(tf.random_normal([64])),
    'bc3': tf.Variable(tf.random_normal([128])),
    'bd1': tf.Variable(tf.random_normal([1024])),
    'out': tf.Variable(tf.random_normal([n_output]))
}

# Construct model
pred = conv_net(x, weights, biases, keep_prob)
pdb.set_trace()
# pred = tf.pack(tf.transpose(pred,[1,2,0]))
pred = tf.reshape(pred, [-1, n_input, n_input, 2])
temp_pred = tf.reshape(pred, [-1,2])
temp_y = tf.reshape(y, [-1])
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(temp_pred, temp_y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
# correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
temp_pred2 = tf.reshape(pred, [-1,n_input,n_input])
correct_pred = tf.equal(tf.cast(y,tf.float32),tf.sub(temp_pred2,tf.cast(y,tf.float32)))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.initialize_all_variables()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    summ = tf.train.SummaryWriter('/tmp/logdir/', sess.graph_def)
    step = 1
    from tensorflow.contrib.learn.python.learn.datasets.scroll import scroll_data
    data = scroll_data.read_data('/home/kendall/Desktop/')
    # Keep training until reach max iterations
    while step * batch_size < training_iters:
        batch_x, batch_y = data.train.next_batch(batch_size)
        # Run optimization op (backprop)
        batch_x = batch_x.reshape((batch_size, n_input, n_input))
        batch_y = batch_y.reshape((batch_size, n_input, n_input))
        batch_y = np.int64(batch_y)
        # y = tf.reshape(y, [-1,n_input,n_input])
        pdb.set_trace()
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: dropout})
        if step % display_step == 0:
            # Calculate batch loss and accuracy
            pdb.set_trace()
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1.})
            print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc)
        step += 1
    print "Optimization Finished!"

    # Calculate accuracy for 256 mnist test images
    print "Testing Accuracy:", \
        sess.run(accuracy, feed_dict={x: data.test.images[:256],
                                      y: data.test.labels[:256],
                                      keep_prob: 1.})



  [1]: http://stackoverflow.com/questions/35317029/how-to-implement-pixel-wise-classification-for-scene-labeling-in-tensorflow/37294185?noredirect=1#comment63253577_37294185
Kendall Weihe
  • 2,021
  • 4
  • 27
  • 53

2 Answers2

1

Let's forget about softmax and use a simpler tf.nn.sigmoid_cross_entropy_with_logits here:

  • with sigmoid, you only need one prediction per pixel
    • if pred[pixel] > 0.5, you predict 1
    • if pred[pixel] < 0.5, you predict 0
  • the shape of prediction and target should then be [batch_size, 40000]
pred = conv_net(x, weights, biases, keep_prob)  # shape [batch_size, 40000]
flattened_y = tf.reshape(y, [-1, 40000])  # shape [batch_size, 40000]

loss = tf.nn.sigmoid_cross_entropy_with_logits(pred, flattened_y)
Olivier Moindrot
  • 27,908
  • 11
  • 92
  • 91
0

Using sparse softmax is going to be of help only after the last layer you want to resize the image to the original size (200*200).In this case using reshape as you have would ensure that the the code would be error free. But in your case you don't have to use sparse softmax. To see why check the dimensions of "pred".