1

The question has already been discussed here and here, but still I can't find any satisfactory answer for it.

I'm passing the data to the RNN without using any batches. It gives following error:

traceback (most recent call last):
File "/home/wolborg/PycharmProjects/speech_to_text/source/rnn_train_without_batch_bbb.py", line 127, in <module>
incorrect = sess.run(error,{input_data: test_input, target: test_output})
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow    /python/client/session.py", line 895, in run
run_metadata_ptr)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1321, in _do_run
options, run_metadata)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible    shapes: [0] vs. [10]
   [[Node: NotEqual = NotEqual[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](ArgMax, ArgMax_1)]]

Caused by op u'NotEqual', defined at:
File "/home/wolborg/PycharmProjects/speech_to_text/source /rnn_train_without_batch_bbb.py", line 119, in <module>
mistakes = tf.not_equal(tf.argmax(target, 1, output_type=tf.int32), tf.argmax(prediction, 1, output_type=tf.int32))
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 1485, in not_equal
result = _op_def_lib.apply_op("NotEqual", x=x, y=y, name=name)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
op_def=op_def)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Incompatible shapes: [0] vs. [10]
 [[Node: NotEqual = NotEqual[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](ArgMax, ArgMax_1)]]

And when I pass data in batch size of 10, I get below error:

File "/home/wolborg/PycharmProjects/speech_to_text/source/rnn_train_without_batch_bbb.py", line 127, in <module>
Starting with testing data...
incorrect = sess.run(error,{input_data: test_input, target: test_output})
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/home/wolborg/anaconda2/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1100, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (0, 400) for Tensor u'Placeholder_1:0', which has shape '(10, 400)'

Following is the code:

import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np

#for taking MFCC and label input
import rnn_input_data_bbb
import sound_constants

# input constants

# Training Parameters
learning_rate = 0.0001 # for large training set, it can be set 0.001
training_steps = 10000
batch_size = 10
display_step = 200
test_data_size = 29

# Network Parameters
num_input = 58 # mfcc data input
timesteps = 80000 # timesteps
num_hidden = 200 # hidden layer num of features
num_classes = 28 # total alphabet classes (a-z) + extra symbols (', ' ')



mfcc_coeffs, text_data = rnn_input_data_bbb.mfcc_and_text_encoding() #custom dataset

input_data = tf.placeholder(tf.float32, [None, sound_constants.MAX_ROW_SIZE_IN_DATA, sound_constants.MAX_COLUMN_SIZE_IN_DATA])
target = tf.placeholder(tf.float32, [None, sound_constants.MAX_ROW_SIZE_IN_TXT])

input_data_np = mfcc_coeffs[:test_data_size]
target_np = text_data[:test_data_size]

weight = tf.Variable(tf.truncated_normal([num_hidden * 2, sound_constants.MAX_ROW_SIZE_IN_TXT]))
bias = tf.Variable(tf.constant(0.1, shape=[sound_constants.MAX_ROW_SIZE_IN_TXT]))

input_data_np_tensor = tf.convert_to_tensor(input_data_np)

def BiRNN():

    print ('Inside BiRNN....')
    ipt_data = tf.unstack(input_data_np_tensor)

    # creating one forward cell
    fwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)

    # creating one backward cell
    bkwd_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, state_is_tuple=True, forget_bias=1.0)

    # creating bidirectional RNN
    val, _, _ = tf.nn.static_bidirectional_rnn(fwd_cell, bkwd_cell, ipt_data, dtype = tf.float32)

    print "Val:", val

    val = tf.transpose(val, [1, 0, 2])
    last = tf.gather(val, int(val.get_shape()[0]) - 1)

    return last


last = BiRNN()

print ('BiRNN finished successfully !!')

# mapping to 28 output classes
prediction = tf.nn.softmax(tf.matmul(last, weight) + bias)

print ('Predicted successfully !!')


# getting probability distribution
cross_entropy = -tf.reduce_sum(target * tf.log(tf.clip_by_value(prediction,1e-10,1.0)))
optimizer = tf.train.AdamOptimizer()
minimize = optimizer.minimize(cross_entropy)

print ('Cross entropy done successfully !!')


# identifying correctness of test dataset
# print ("Arg 1:", str(tf.argmax(target, 1, output_type=tf.int32)))
# print ("Arg 2:", str(tf.argmax(prediction, 1, output_type=tf.int32)))

# initialize tensorflow session and all variables
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)

print ('Session created successfully !!')

# begin the training
no_of_batches = int(num_input/batch_size)
epoch = 3
for i in range(epoch):
    ptr = 0
    err = sess.run(minimize,{input_data: input_data_np, target: target_np})

    print "Epoch - ", (i + 1)


test_input = mfcc_coeffs[test_data_size:]
test_output = text_data[test_data_size:]

mistakes = tf.not_equal(tf.argmax(target, 1, output_type=tf.int32), tf.argmax(prediction, 1, output_type=tf.int32))
error = tf.reduce_mean(tf.cast(mistakes, tf.float32))

# print (input_data.get_shape())
# print (target.get_shape())
# print (prediction.get_shape())

print ('Starting with testing data...')
incorrect = sess.run(error,{input_data: test_input, target: test_output})
print('Epoch {:2d} error {:3.1f}%'.format(i + 1, 100 * incorrect))

print ('Done with testing data !!')
sess.close()

The error is resulting from here:

mistakes = tf.not_equal(tf.argmax(target, 1, output_type=tf.int32), tf.argmax(prediction, 1, output_type=tf.int32))

The input vector has 58 matrices of size 80000 X 1, and output matrix has 58 matrices of size 400 X 1.

System details:

  • Ubuntu 14.04 LTS

  • Python 2.7.14

  • Tensorflow 1.3.0

AkJ
  • 73
  • 2
  • 10

0 Answers0