I'm new to stackoverflow so please apologize any typical newbie mistakes.
I want to set up a CNN with U-Net architecture in Python and Tensorflow. I tried to reuse some code I got which works on binary classification and wanted to adapt it to detect 3 classes. The code I got works great for 2 output layers which has a binary image as label groundtruth.
Now my question is: Are there any conventions how multiclass labels should look like? Should I use an labelimage with only one layer (grayscale) with three values for my different classes (like 0, 127, 255)? Or should I use a rgb image with one colour for every class (like 255, 0, 0 for class 0; 0, 255, 0 for class 1 and so on...)?
""" 0) Creating placeholders for input images and labels """
# Placeholder for input images
x = tf.placeholder(tf.float32, [None, 3*img_size]) # None = arbitrary (Number of images)
# Arrangeing images in 4D format
x_shaped = tf.reshape(x, [-1, img_height, img_width, 3]) # 3 for 3 channels RGB
# Placeholder for labels of input images (ground truth)
y = tf.placeholder(tf.float32, [None, 2*img_size])
# Arrangeing labels in 4D format
y_shaped = tf.reshape(y, [-1, img_size, 2])
""" 1) Defining FCN-8 VGGNet-16 """
network = conv_layer(x_shaped, 64, filter_size=[3, 3], name='conv1a')
network = conv_layer(network, 64, filter_size=[3, 3], name='conv1b')
network = max_pool_layer(network, name='pool1')
network = conv_layer(network, 128, filter_size=[3, 3], name='conv2a')
network = conv_layer(network, 128, filter_size=[3, 3], name='conv2b')
network = max_pool_layer(network, name='pool2')
network = conv_layer(network, 256, filter_size=[3, 3], name='conv3a')
network = conv_layer(network, 256, filter_size=[3, 3], name='conv3b')
network = conv_layer(network, 256, filter_size=[3, 3], name='conv3c')
network = max_pool_layer(network, name='pool3')
net_pool3 = network
network = conv_layer(network, 512, filter_size=[3, 3], name='conv4a')
network = conv_layer(network, 512, filter_size=[3, 3], name='conv4b')
network = conv_layer(network, 512, filter_size=[3, 3], name='conv4c')
network = max_pool_layer(network, name='pool4')
net_pool4 = network
network = conv_layer(network, 512, filter_size=[3, 3], name='conv5a')
network = conv_layer(network, 512, filter_size=[3, 3], name='conv5b')
network = conv_layer(network, 512, filter_size=[3, 3], name='conv5c')
network = max_pool_layer(network, name='pool5')
network = deconv_layer(network, 256, filter_size=[3, 3], name='deconv1')
network = tf.concat([network, net_pool4], 3)
network = conv_layer(network, 256, filter_size=[5, 5], name='conv6')
network = deconv_layer(network, 128, filter_size=[3, 3], name='deconv2')
network = tf.concat([network, net_pool3], 3)
network = conv_layer(network, 128, filter_size=[5, 5], name='conv7')
# in the next lines I would have to change 2 into 3 to get 3 output classes
network = deconv_layer(network, 2, filter_size=[7, 7], strides=[8, 8], name='deconv3')
network = conv_layer(network, 2, filter_size=[7, 7], activation=' ', name='conv8')
y_ = tf.nn.softmax(network)
After computing I generate an output image (in the test phase, after training is completed)
for i in range(rows):
for j in range(cols):
for k in range(layers):
imdata[i*img_height:(i+1)*img_height, j*img_width:(j+1)*img_width, k] = cnn_output[cols*i+j, :, :, k]
imdata = imdata[0:im.height, 0:im.width]
for row in range(real_height):
for col in range(real_width):
if(np.amax(imdata[row,col,:]) == imdata[row,col,0]):
imdata[row,col,:] = 255, 0, 0
elif(np.amax(imdata[row,col,:]) == imdata[row,col,1]):
imdata[row,col,:] = 0, 255, 0
else:
imdata[row,col,:] = 0, 0, 255
#img[row][col] = imdata[row][col]
# Save the image
scipy.misc.imsave(out_file, imdata)
im.close()
imdata has the shape of my image with 3 layers (1080, 1920, 3).