I have built this LSTM class:
import tensorflow as tf
import Constants
class LSTM():
def __init__(self,
inputShape,
outputShape,
numLayers=Constants.numLayers,
numHidden=Constants.numHidden,
learningRate=Constants.learningRate,
forgetBias=Constants.forgetBias):
self.inputs = tf.placeholder(tf.float32, [None] + inputShape)
self.labels = tf.placeholder(tf.float32, [None] + outputShape)
self.inputTensors = tf.unstack(self.inputs, axis=1)
self.weights = tf.Variable(tf.random_normal([numHidden] + outputShape))
self.bias = tf.Variable(tf.random_normal(outputShape))
layers = [tf.contrib.rnn.LSTMCell(numHidden, forget_bias=forgetBias, state_is_tuple=True)] * numLayers
self.cell = tf.contrib.rnn.MultiRNNCell(layers, state_is_tuple=True)
self.optimiser = tf.train.GradientDescentOptimizer(learningRate)
self.forgetBias = forgetBias
self.batchDict = None
self.outputs = None
self.finalStates = None
self.predictions = None
self.loss = None
self.accuracy = None
self.optimise = None
self.session = tf.Session()
self.__buildGraph()
def __buildGraph(self):
outputs, finalStates = tf.nn.static_rnn(self.cell, self.inputTensors, dtype=tf.float32)
predictions = tf.add(tf.matmul(outputs[-1], self.weights), self.bias)
self.predictions = tf.minimum(tf.maximum(predictions, 0), 1)
self.loss = tf.losses.mean_squared_error(predictions=self.predictions, labels=self.labels)
self.accuracy = tf.reduce_mean(1 - tf.abs(self.labels - self.predictions) / 1.0)
self.optimise = self.optimiser.minimize(self.loss)
self.session.run(tf.global_variables_initializer())
def __execute(self, operation):
return self.session.run(operation, self.batchDict)
def setBatch(self, inputs, labels):
self.batchDict = {self.inputs: inputs, self.labels: labels}
def batchLabels(self):
return self.__execute(self.labels)
def batchPredictions(self):
return self.__execute(self.predictions)
def batchLoss(self):
return self.__execute(self.loss)
def batchAccuracy(self):
return self.__execute(self.accuracy)
def processBatch(self):
self.__execute(self.optimise)
def kill(self):
self.session.close()
and I run it like so:
import DataWorker
import Constants
from Model import LSTM
inputShape = [Constants.sequenceLength, DataWorker.numFeatures]
outputShape = [1]
LSTM = LSTM(inputShape, outputShape)
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer)
LSTM.setBatch(batchX, batchY)
LSTM.processBatch()
if batchNum % Constants.printStep == 0 or epochComplete:
print("Batch:\t\t", batchNum)
print("Last Pred:\t", LSTM.batchPredictions()[-1][0])
print("Last Label:\t", LSTM.batchLabels()[-1][0])
print("Loss:\t\t", LSTM.batchLoss())
print("Accuracy:\t", str("%.2f" % (LSTM.batchAccuracy() * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY = DataWorker.generateTestBatch()
LSTM.setBatchDict(testX, testY)
testAccuracy = LSTM.batchAccuracy()
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
LSTM.kill()
This all works well as it should. However, I am using time series data which consists of financial stocks spanning over ranges of timestamps far greater than the number of time steps that my LSTM is unrolled for - Constants.sequenceLength
. Because of this, it takes many sequential batches for a single stock t be processed, and so the state/memory of my LSTM needs to be passed between batches. As well as this, after a batch that completes the lifespan of an ID, the next batch would be passing in a new ID from the initial timestamp of my dataset, and so I would want to reset the memory.
There are many questions asking something similar, and all of the answers are adequate, however, none seem to address the issue of using variable batch sizes - batch sizes initialised to None
and then inferred when a batch is passed in. My batches are usually a constant size, but do change under certain circumstances and I cannot change this. How can I have control over passing the state between batches, as well as resetting the state, if I have not specified the batch size?