1

I want to get the activation values for a given input of a trained LSTM network, specifically the values for the cell, the input gate, the output gate and the forget gate. According to this Keras issue and this Stackoverflow question I'm able to get some activation values with the following code:

(basically I'm trying to classify 1-dimensional timeseries using one label per timeseries, but that doesn't really matter for this general question)

import random
from pprint import pprint

import keras.backend as K
import numpy as np
from keras.layers import Dense
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.utils import to_categorical

def getOutputLayer(layerNumber, model, X):
    return K.function([model.layers[0].input],
                      [model.layers[layerNumber].output])([X])

model = Sequential()
model.add(LSTM(10, batch_input_shape=(1, 1, 1), stateful=True))
model.add(Dense(2, activation='softmax'))
model.compile(
    loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

# generate some test data
for i in range(10):
    # generate a random timeseries of 100 numbers
    X = np.random.rand(10)
    X = X.reshape(10, 1, 1)

    # generate a random label for the whole timeseries between 0 and 1
    y = to_categorical([random.randint(0, 1)] * 10, num_classes=2)

    # train the lstm for this one timeseries
    model.fit(X, y, epochs=1, batch_size=1, verbose=0)
    model.reset_states()

# to keep the output simple use only 5 steps for the input of the timeseries
X_test = np.random.rand(5)
X_test = X_test.reshape(5, 1, 1)

# get the activations for the output lstm layer
pprint(getOutputLayer(0, model, X_test))

Using that I get the following activation values for the LSTM layer:

[array([[-0.04106992, -0.00327154, -0.01524276,  0.0055838 ,  0.00969929,
        -0.01438944,  0.00211149, -0.04286387, -0.01102304,  0.0113989 ],
       [-0.05771339, -0.00425535, -0.02032563,  0.00751972,  0.01377549,
        -0.02027745,  0.00268653, -0.06011265, -0.01602218,  0.01571197],
       [-0.03069103, -0.00267129, -0.01183739,  0.00434298,  0.00710012,
        -0.01082268,  0.00175544, -0.0318702 , -0.00820942,  0.00871707],
       [-0.02062054, -0.00209525, -0.00834482,  0.00310852,  0.0045242 ,
        -0.00741894,  0.00141046, -0.02104726, -0.0056723 ,  0.00611038],
       [-0.05246543, -0.0039417 , -0.01877101,  0.00691551,  0.01250046,
        -0.01839472,  0.00250443, -0.05472757, -0.01437504,  0.01434854]],
      dtype=float32)]

So I get for each input value 10 values, because I specified in the Keras model to use a LSTM with 10 neurons. But which one is a cell, which is is the input gate, which one the output gate, which one the forget gate?

Jason Aller
  • 3,541
  • 28
  • 38
  • 38
Ragadabing
  • 472
  • 2
  • 7
  • 14

1 Answers1

0

Well, these are the output values, to get and look into the value of each gate look into this issue

I paste the essential part here

for i in range(epochs):
    print('Epoch', i, '/', epochs)
    model.fit(cos,
              expected_output,
              batch_size=batch_size,
              verbose=1,
              nb_epoch=1,
              shuffle=False)

    for layer in model.layers:
        if 'LSTM' in str(layer):
            print('states[0] = {}'.format(K.get_value(layer.states[0])))
            print('states[1] = {}'.format(K.get_value(layer.states[1])))

            print('Input')
            print('b_i = {}'.format(K.get_value(layer.b_i)))
            print('W_i = {}'.format(K.get_value(layer.W_i)))
            print('U_i = {}'.format(K.get_value(layer.U_i)))

            print('Forget')
            print('b_f = {}'.format(K.get_value(layer.b_f)))
            print('W_f = {}'.format(K.get_value(layer.W_f)))
            print('U_f = {}'.format(K.get_value(layer.U_f)))

            print('Cell')
            print('b_c = {}'.format(K.get_value(layer.b_c)))
            print('W_c = {}'.format(K.get_value(layer.W_c)))
            print('U_c = {}'.format(K.get_value(layer.U_c)))

            print('Output')
            print('b_o = {}'.format(K.get_value(layer.b_o)))
            print('W_o = {}'.format(K.get_value(layer.W_o)))
            print('U_o = {}'.format(K.get_value(layer.U_o)))

    # output of the first batch value of the batch after the first fit().
    first_batch_element = np.expand_dims(cos[0], axis=1)  # (1, 1) to (1, 1, 1)
    print('output = {}'.format(get_LSTM_output([first_batch_element])[0].flatten()))

    model.reset_states()

print('Predicting')
predicted_output = model.predict(cos, batch_size=batch_size)

print('Ploting Results')
plt.subplot(2, 1, 1)
plt.plot(expected_output)
plt.title('Expected')
plt.subplot(2, 1, 2)
plt.plot(predicted_output)
plt.title('Predicted')
plt.show()
Areza
  • 5,623
  • 7
  • 48
  • 79