In the below code, I am comparing the Predicted Output
of the TF Keras Model
with the respective value which is calculated Manually
(Softmax Activation
implemented using Numpy
).
Surprisingly, they are not same. Am I missing something?
Also, there is a Warning,
UserWarning: "
sparse_categorical_crossentropy
receivedfrom_logits=True
, but theoutput
argument was produced by a sigmoid or softmax activation and thus does not represent logits. Was this intended?" '"sparse_categorical_crossentropy
receivedfrom_logits=True
, but '
What does that warning mean? And is that warning the reason for the mismatch?
import tensorflow as tf
import numpy as np
inputs = tf.keras.Input(shape=(784,), name="digits")
x1 = tf.keras.layers.Dense(64, activation="relu")(inputs)
x2 = tf.keras.layers.Dense(64, activation="relu")(x1)
outputs = tf.keras.layers.Dense(10, name="predictions", activation = 'softmax')(x2)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
# Instantiate an optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Prepare the training dataset.
batch_size = 64
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = np.reshape(x_train, (-1, 784))
x_test = np.reshape(x_test, (-1, 784))
# Normalize the values of Pixels of Image. Else, Calculation of Softmax results in NaN
x_train = x_train/255.0
x_test = x_test/255.0
# Reserve 10,000 samples for validation.
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)
epochs = 2
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
x_batch_train = tf.cast(x_batch_train, tf.float32)
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
Initial_Weights_1st_Hidden_Layer = model.trainable_weights[0]
Initial_Weights_2nd_Hidden_Layer = model.trainable_weights[2]
Initial_Weights_Output_Layer = model.trainable_weights[4]
Initial_Bias_1st_Hidden_Layer = model.trainable_weights[1]
Initial_Bias_2nd_Hidden_Layer = model.trainable_weights[3]
Initial_Bias_Output_Layer = model.trainable_weights[5]
# Implementing Relu Activation Function using Numpy
def Relu_Activation(Input):
return np.maximum(Input, 0)
#Compute Softmax Activation Function using Numpy
def Softmax_Activation(Input):
return np.exp(Input) / np.sum(np.exp(Input), axis=0)
# Calculations
Input_to_1st_Hidden_Layer = x_batch_train @ Initial_Weights_1st_Hidden_Layer + \
Initial_Bias_1st_Hidden_Layer
Output_Of_1st_Hidden_Layer = Relu_Activation(Input_to_1st_Hidden_Layer)
Input_to_2nd_Hidden_Layer = Output_Of_1st_Hidden_Layer @ Initial_Weights_2nd_Hidden_Layer + \
Initial_Bias_2nd_Hidden_Layer
Output_Of_2nd_Hidden_Layer = Relu_Activation(Input_to_2nd_Hidden_Layer)
Input_to_Final_Layer = Output_Of_2nd_Hidden_Layer @ Initial_Weights_Output_Layer + \
Initial_Bias_Output_Layer
# Softmax Activation Function has been used in the Output/Final Layer
Calculated_Y_Pred = Softmax_Activation(Input_to_Final_Layer)
# Log every 200 batches.
if step == 200:
print('\n Y_Pred = ', logits[0:2])
print('\n Calculated_Y_Pred = ', Calculated_Y_Pred[0:2])
The output of the above code is shown below:
Start of epoch 0
/home/mothukuru/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/backend.py:4930: UserWarning: "`sparse_categorical_crossentropy` received `from_logits=True`, but the `output` argument was produced by a sigmoid or softmax activation and thus does not represent logits. Was this intended?"
'"`sparse_categorical_crossentropy` received `from_logits=True`, but '
Y_Pred = tf.Tensor(
[[0.07784345 0.13746074 0.09005958 0.08652461 0.07746054 0.12440132
0.10698392 0.07508533 0.07116801 0.15301245]
[0.0656803 0.08119027 0.09362638 0.10353054 0.12599334 0.10456354
0.1271341 0.08623642 0.08971243 0.12233265]], shape=(2, 10), dtype=float32)
Calculated_Y_Pred = [[0.01511016 0.02304603 0.01961761 0.01425961 0.01025286 0.02124614
0.01223315 0.01411171 0.01178642 0.01445299]
[0.01271159 0.01357185 0.02033444 0.01701196 0.01662761 0.01780546
0.01449438 0.01615969 0.01481383 0.01152103]]
Start of epoch 1
Y_Pred = tf.Tensor(
[[0.12411885 0.08815324 0.05189805 0.07208851 0.11877609 0.06383732
0.13067529 0.08087374 0.09073243 0.17884655]
[0.07584718 0.079349 0.06285123 0.1089478 0.09581042 0.09398626
0.12189291 0.10832074 0.08284932 0.17014521]], shape=(2, 10), dtype=float32)
Calculated_Y_Pred = [[0.02525741 0.01648222 0.01210153 0.012623 0.01642019 0.01224833
0.01583157 0.01587343 0.01606088 0.01728726]
[0.01414648 0.01359805 0.01343262 0.01748529 0.01214003 0.01652816
0.01353526 0.01948644 0.01344168 0.01507382]]