I am new to Bayesian neural network using tensorflow probability. Here is a toy example I created, but the neural network is not predicting accurate results.
I am creating a linear x,y
data and using sklearn.model_selection.train_test_split()
x_1 = np.linspace(0.0, 1.0, 100)
y_1 = np.linspace(0.0, 1.0, 100)
x_train, x_test, y_train, y_test = train_test_split(x_1, y_1, test_size=0.2, random_state=36, shuffle=True)
def negative_loglikelihood1(targets, estimated_distribution):
return -estimated_distribution.log_prob(targets)
def prior_trainable(kernel_size, bias_size=0, dtype=None):
n = kernel_size + bias_size
prior_model = keras.Sequential([
tfp.layers.DistributionLambda(lambda t:
tfp.distributions.MultivariateNormalDiag(loc=tf.zeros(n),
scale_diag=tf.ones(n)))])
return prior_model
def posterior_mean_field2(kernel_size, bias_size=0, dtype=None):
n = kernel_size + bias_size
c = np.log(np.expm1(1.))
return tf.keras.Sequential([
tfp.layers.VariableLayer(2 * n, dtype=dtype),
tfp.layers.DistributionLambda(lambda t: tfp.distributions.Independent(
tfp.distributions.Normal(loc=t[..., :n],
scale=1e-5 + tf.nn.softplus(c + t[..., n:])),
reinterpreted_batch_ndims=1)), ])
And now creating DenseVariational
NN.
model_prob = tf.keras.Sequential()
model_prob.add(tfp.layers.DenseVariational(units=12, make_prior_fn=prior_trainable,
make_posterior_fn=posterior_mean_field2, kl_weight=1 / x_train.shape[0],
activation='relu', input_shape=(1,))) # input
model_prob.add(tfp.layers.DistributionLambda(
lambda t: tfp.distributions.Normal(
loc=t[..., :1], scale=1e-3 + tf.math.softplus(0.05 * t[..., 1:])))) # output
opt2 = tf.keras.optimizers.Adam(learning_rate=0.001)
model_prob.compile(loss=negative_loglikelihood1, optimizer=opt2,
metrics=[tf.keras.metrics.MeanSquaredError(), tf.keras.metrics.RootMeanSquaredError()])
model_prob.build(input_shape=(1,))
Training and prediction:
history_BNN = model_prob.fit(x_train, y_train, batch_size=8, epochs=64)
y_pred1 = model_prob.predict(x_test)
y_pred2 = np.mean(y_pred1, axis=1)
y_pred3 = np.std(y_pred1, axis=1)
for i in range(10):
print("Actual={}, Predicted Mean= {}, Range= [ {}, {} ]".format(
y_test[i], y_pred2[i], y_pred2[i] + 1.0 * y_pred3[i], y_pred2[i] - 1.0 * y_pred3[i]))
Here are some examples of results:
Actual=0.595959595959596, Predicted Mean= 0.06185102462768555, Range= [ 0.7597668766975403, -0.6360648274421692 ]
Actual=0.5858585858585859, Predicted Mean= -0.2560698688030243, Range= [ 0.3110552132129669, -0.8231949508190155 ]
Clearly, predictions are far from actuals. Any idea what is not correct?