I am training a autoencoder with tensorflow ,but the cost is nan ,i modify the learning rate and the optimizer, but it not works.i search some result shows that decreasing the lr may help it ,but I change the lr to 0.00001 ,it also wont work . here is my parameters code :
learning_rate =0.00001
training_epochs = 2
batch_size = 900
display_step =1
examples_to_show = 10
nextbatch = 0
#network parameters
n_input = 500
# tf Graph input
X = tf.placeholder("float",[None,n_input])
# hidden layer setting
n_hidden_1 = 400 # 1st layer num features
n_hidden_2 = 300 # 2nd layer num features
n_hidden_3 = 200 # 3nd layer num features
n_hidden_4 = 100 # 4nd layer num features
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])), # 500 * 400
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), # 400 * 300
'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])), # 300 * 200
'encoder_h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4])), # 200 * 100
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_4, n_hidden_3])), # 100 * 200
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_2])), # 200 * 300
'decoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])), # 300 * 400
'decoder_h4': tf.Variable(tf.random_normal([n_hidden_1, n_input])), # 400 * 500
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])), # 400
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])), # 300
'encoder_b3': tf.Variable(tf.random_normal([n_hidden_3])), # 200
'encoder_b4': tf.Variable(tf.random_normal([n_hidden_4])), # 100
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_3])), # 200
'decoder_b2': tf.Variable(tf.random_normal([n_hidden_2])), # 300
'decoder_b3': tf.Variable(tf.random_normal([n_hidden_1])), # 400
'decoder_b4': tf.Variable(tf.random_normal([n_input])), # 500
}
# Building the encoder
def encoder(x):
print ("i am encoder")
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x,
weights['encoder_h1']),biases['encoder_b1']))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,
weights['encoder_h2']),biases['encoder_b2']))
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2,
weights['encoder_h3']),biases['encoder_b3']))
layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3,
weights['encoder_h4']),biases['encoder_b4']))
return layer_4
# Building the decoder
def decoder(x):
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x,
weights['decoder_h1']),biases['decoder_b1']))
print("layer1:",layer_1)
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,
weights['decoder_h2']),biases['decoder_b2']))
print("layer2:",layer_2)
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2,
weights['decoder_h3']),biases['decoder_b3']))
print("layer3:",layer_3)
layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3,
weights['decoder_h4']),biases['decoder_b4']))
print("layer4:",layer_4)
return layer_4
def normalize(x):
amin,amax = x.min(),x.max()
x = (x-amin)/(amax - amin)
return x
def main():
# Construct model
encoder_op = encoder(X)
print ("encoder_op:",encoder_op)
decoder_op = decoder(encoder_op)
print ("decoder_op:",decoder_op)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X
# Define loss and optimizer, minimize the squared error
#cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
#cost =tf.reduce_sum(tf.pow((y_true - y_pred),2))
cost = tf.reduce_mean(tf.squared_difference(y_true, y_pred))
#optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)
lenx_train = len(loadvector("x","train"))
total_batch = int(lenx_train/batch_size)
# Training cycle
for epoch in range(training_epochs):
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = get_next_batch(batch_size) # max(x) = 1, min(x) = 0
# Run optimization op (backprop) and cost op (to get loss value)
op,c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),"cost=",c)
print("op:", '%04d' % (epoch+1),"op=",op)
print("Optimization Finished!")
if __name__ == "__main__":
x_train= loadvector("x","train")
#x_train = scale(x_train)
x_train = normalize(x_train)
y_train = loadvector("y", "train")
main()
here is the result when i running it
Epoch: 0001 cost= 0.373359
op: 0001 op= None
Epoch: 0002 cost= nan
op: 0002 op= None
Optimization Finished!