Keras getting different results with set seed

Question

In keras, each run has a high variance and unstable performance. To combat this according to https://keras.io/getting-started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development. I set the seeds as shown.

Unfortunately this does not help and I continue to get mixed results. Any guidance would help.

# Seed value (can actually be different for each attribution step)
seed_value= 0

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session
from keras import backend as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
from itertools import permutations
import keras
from keras import optimizers
from keras.callbacks import Callback
from keras.initializers import glorot_uniform
from keras.layers import Input, LSTM, Dense, concatenate, Lambda, Multiply, Add, Dropout, multiply, TimeDistributed, Conv1D, GlobalMaxPooling1D, LeakyReLU
from keras.activations import softmax, sigmoid
from keras.models import Model
from keras_sequential_ascii import keras2ascii
from keras import backend as K

from keras.callbacks import ModelCheckpoint, EarlyStopping

# Custom loss to take full batch (of size beam) and apply a mask to calculate the true loss within the beam
beam_size = 10

K.clear_session()
def create_mask(y, yhat):
    idxs = list(permutations(range(beam_size), r=2))
    perms_y = tf.gather(y, idxs)
    perms_yhat = tf.gather(yhat, idxs)
    mask  = tf.where(tf.not_equal(perms_y[:,0], perms_y[:,1]))
    mask = tf.reduce_sum(mask, 1)
    uneq = tf.squeeze(tf.gather(perms_y, mask))
    yhat_uneq = tf.squeeze(tf.gather(perms_yhat, mask))
    return uneq, yhat_uneq

def mask_acc(y, yhat):
    uneq, yhat_uneq = create_mask(y, yhat)
    uneq = tf.argmax(uneq,1)
    yhat_uneq = tf.argmax(yhat_uneq, 1)
    #uneq = tf.Print(uneq, [uneq], summarize=-1)
    #yhat_uneq = tf.Print(yhat_uneq, [yhat_uneq], 'pred', summarize=-1)
    # argmax and compare
    #a = tf.Print(tf.reduce_mean(tf.cast(tf.equal(uneq, yhat_uneq), tf.float32)), [tf.reduce_mean(tf.cast(tf.equal(uneq, yhat_uneq), tf.float32))])
    return tf.reduce_mean(tf.cast(tf.equal(uneq, yhat_uneq), tf.float32))#tf.cond(tf.greater(tf.size(yhat_uneq), 1), lambda: tf.reduce_sum(tf.cast(tf.equal(uneq, yhat_uneq), tf.float32)), lambda: 100.)

def beam_acc(y, yhat):
    #a = tf.Print(yhat, [yhat], 'pred', summarize=-1)
    #yhat = tf.Print(yhat, [yhat],'\nSTART', summarize=-1)
    yhat_uneq = tf.argmax(yhat, 0)
    # argmax and compare
    # do possible indexes and predicted index
    y = tf.reshape(y, [-1])
    #y = tf.Print(y, [y], summarize=-1)
    possible = tf.where(tf.equal(y, tf.constant(1.0,dtype=tf.float32)))
    yhat_uneq = tf.Print(yhat_uneq, [yhat_uneq], 'prediction')
    possible = tf.reshape(possible, [-1])
    #possible = tf.Print(possible, [possible], 'actual')
    mean = tf.reduce_mean(tf.cast(tf.reduce_any(tf.equal(possible, yhat_uneq)), tf.float32))
    #mean = tf.Print(mean, [mean], 'mean\n')
    return mean#tf.reduce_mean(tf.cast(tf.reduce_any(tf.equal(possible, yhat_uneq)), tf.float32))#tf.cond(tf.equal(tf.reduce_sum(y), tf.constant(0.0)), true_fn=lambda: 0., false_fn=lambda: tf.reduce_mean(tf.cast(tf.equal(yhat_uneq, possible), tf.float32)))

def mask_loss(y, yhat):
    # Cosider weighted loss
    uneq, yhat_uneq = create_mask(y, yhat)
    #yhat_uneq = tf.Print(yhat_uneq, [yhat_uneq], summarize=-1)
    #create all permutations and zero out matches with mask
    total_loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(onehot_labels=tf.cast(uneq, tf.int32), logits=yhat_uneq))
    #d = tf.Print(yhat_uneq, [yhat_uneq], summarize=-1)
    #total_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=uneq, logits=yhat_uneq))
    #total_loss = tf.Print(total_loss, [total_loss])
    return total_loss


x = Input((19,78))
lstm1 = LSTM(64, batch_input_shape=(10, 19, 78),return_sequences=True, unroll=True, activation='relu')(x)
#mult = multiply([encoded, ff])
#cat = concatenate([encoded, squeezed])
#dense2 = Dense(10)(encoded)
dense = Dense(1)(lstm1)
#mult = multiply([dense, prob])
#dense2 = Dense(1)(mult)
#print(dense2.shape)
output = Lambda(lambda x: K.sum(x, axis=1))(dense)
#output = Lambda(lambda x: K.squeeze(x, -1))(added)
#lam2 = Lambda(lambda x: K.sum(x, axis=1))(lam)
#probs_aug = Lambda(lambda x: x * .01)(probs)
#output = Add()([lam, probs])
sgd = optimizers.SGD(lr=0.01, nesterov=True, momentum=.9, decay=1e-5)
adam = optimizers.Adam(lr=0.001,decay=1e-5)#, nesterov=True, momentum=.9, decay=1e-5)
lstm_model = Model(inputs=[x], outputs=output)
lstm_model.compile(sgd, loss=mask_loss, metrics=[mask_acc, beam_acc])
filepath="weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_beam_acc', verbose=1, save_best_only=True, mode='max')
stop = EarlyStopping(monitor='val_beam_acc', patience=3) 
#lstm_model.fit([X_train], y_train, batch_size=10,epochs=10, verbose=1, shuffle=False,validation_data=([X_dev], y_dev))#, callbacks=[checkpoint, ])
           #, callbacks=[PlotLossesCallback()])

lstm_model.fit(X_train, y_train, batch_size=10,
               epochs=10, verbose=1, shuffle=False,validation_data=(X_dev, y_dev), callbacks=[checkpoint, stop])
               #, callbacks=[PlotLossesCallback()])

I suggest you still use `np.random.seed(0)` or `tf.set_random_seed(0)` et the very start of your program and let me know if works. — prosti, Jan 30 '19 at 12:55
This does not run on GPUs, and all seeds have been set at the beginning of the program as described in the link from keras. — Jacob B, Jan 30 '19 at 16:49
Yeah I will try that once I get into work, will be back with an update in ~1hr — Jacob B, Jan 30 '19 at 16:55
OK, meantime I will post some answer to this question to clarify the idea. — prosti, Jan 30 '19 at 16:58

score 2 · Accepted Answer · answered Jan 30 '19 at 18:42

This was solved by setting the pythonhashseed at an os level using (Reproducible results using Keras with TensorFlow backend):

# Seed value (can actually be different for each attribution step)
seed_value= 0

# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session
from keras import backend as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

Very good finding, in the particular [document](https://keras.io/getting-started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development) you used stands that this should be set from the environment, but is also possible from within the code `os.environ['PYTHONHASHSEED']=0` — prosti, Jan 30 '19 at 18:55

prosti · Answer 2 · 2019-02-05T12:08:33.530

1

Make sure you had both these lines:

np.random.seed(0) 
tf.set_random_seed(0)

Document you mentioned also states you can run it like this: PYTHONHASHSEED=0 python3 yourcode.py to set the python hash seed.

Possible this would be the best way do eliminate the hash seed randomness.

This variable need to be set before launching the python process. os.putenv(), or in os.environ may not work in some Python version.

edited Feb 05 '19 at 12:08

answered Jan 30 '19 at 17:18

prosti

42,291
14
186
151

I updated my post, unfortunately this was not the issue, seeds are set and I am using SGD with no momentum. – Jacob B Jan 30 '19 at 18:36

Keras getting different results with set seed

2 Answers2

Linked