Here is the end-to-end code to capture the gradient using the keras backend. I have called the gradient capturing function from callbacks of model.fit to capture the gradient after end of every epoch. This code is Compatible in both tensorflow 1.x and tensorflow 2.x versions and also I have ran it in colab. If you would like to run in tensorflow 1.x, then replace the first statement in the program with %tensorflow_version 1.x
and restart the runtime.
Capturing Gradient of the model -
# Importing dependency
%tensorflow_version 2.x
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras import datasets
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
import numpy as np
import tensorflow as tf
tf.keras.backend.clear_session() # For easy reset of notebook state.
tf.compat.v1.disable_eager_execution()
# Import Data
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
# Build Model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(10))
# Model Summary
model.summary()
# Model Compile
model.compile(optimizer='adam',
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
# Define the Gradient Fucntion
epoch_gradient = []
# Define the Gradient Function
def get_gradient_func(model):
grads = K.gradients(model.total_loss, model.trainable_weights)
inputs = model._feed_inputs + model._feed_targets + model._feed_sample_weights
func = K.function(inputs, grads)
return func
# Define the Required Callback Function
class GradientCalcCallback(keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
get_gradient = get_gradient_func(model)
grads = get_gradient([train_images, train_labels, np.ones(len(train_labels))])
epoch_gradient.append(grads)
epoch = 4
model.fit(train_images, train_labels, epochs=epoch, validation_data=(test_images, test_labels), callbacks=[GradientCalcCallback()])
# (7) Convert to a 2 dimensiaonal array of (epoch, gradients) type
gradient = np.asarray(epoch_gradient)
print("Total number of epochs run:", epoch)
print("Gradient Array has the shape:",gradient.shape)
Output -
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 30, 30, 32) 896
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 32) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 13, 13, 64) 18496
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 4, 4, 64) 36928
_________________________________________________________________
flatten (Flatten) (None, 1024) 0
_________________________________________________________________
dense (Dense) (None, 64) 65600
_________________________________________________________________
dense_1 (Dense) (None, 10) 650
=================================================================
Total params: 122,570
Trainable params: 122,570
Non-trainable params: 0
_________________________________________________________________
Train on 50000 samples, validate on 10000 samples
Epoch 1/4
50000/50000 [==============================] - 73s 1ms/sample - loss: 1.8199 - accuracy: 0.3834 - val_loss: 1.4791 - val_accuracy: 0.4548
Epoch 2/4
50000/50000 [==============================] - 357s 7ms/sample - loss: 1.3590 - accuracy: 0.5124 - val_loss: 1.2661 - val_accuracy: 0.5520
Epoch 3/4
50000/50000 [==============================] - 377s 8ms/sample - loss: 1.1981 - accuracy: 0.5787 - val_loss: 1.2625 - val_accuracy: 0.5674
Epoch 4/4
50000/50000 [==============================] - 345s 7ms/sample - loss: 1.0838 - accuracy: 0.6183 - val_loss: 1.1302 - val_accuracy: 0.6083
Total number of epochs run: 4
Gradient Array has the shape: (4, 10)
Hope this answers your question. Happy Learning.