I am trying to train a LSTM model to reconstruct time series data. I have a data set of ~1800 univariant time-series. Basically I'm trying to solve a problem similar to this one Anomaly detection in ECG plots, but my time series have different lengths.
I used this approach to deal with variant length: How to apply LSTM-autoencoder to variant-length time-series data? and this approach to split the input data based on shape: Keras misinterprets training data shape
When looping over the data and fitting a model for every shape. is the model eventually only based on the last shape it trained on or is it using all the data to train the final model?
How would I train the model on all input data regardless shape of data? I know I can add padding but I am trying to use the data as is at this point. Any suggestions or other approaches to deal with different length on timeseries? (It is not an issue of time sampling it is more of one timeseries started recording on day X and some only on day X+100)
Here is the code I am using for my autoencoder:
import keras.backend as K
from keras.layers import (Input, Dense, TimeDistributed, LSTM, GRU, Dropout, merge,
Flatten, RepeatVector, Bidirectional, SimpleRNN, Lambda)
def encoder(model_input, layer, size, num_layers, drop_frac=0.0, output_size=None,
bidirectional=False):
"""Encoder module of autoencoder architecture"""
if output_size is None:
output_size = size
encode = model_input
for i in range(num_layers):
wrapper = Bidirectional if bidirectional else lambda x: x
encode = wrapper(layer(size, name='encode_{}'.format(i),
return_sequences=(i < num_layers - 1)))(encode)
if drop_frac > 0.0:
encode = Dropout(drop_frac, name='drop_encode_{}'.format(i))(encode)
encode = Dense(output_size, activation='linear', name='encoding')(encode)
return encode
def repeat(x):
stepMatrix = K.ones_like(x[0][:,:,:1]) #matrix with ones, shaped as (batch, steps, 1)
latentMatrix = K.expand_dims(x[1],axis=1) #latent vars, shaped as (batch, 1, latent_dim)
return K.batch_dot(stepMatrix,latentMatrix)
def decoder(encode, layer, size, num_layers, drop_frac=0.0, aux_input=None,
bidirectional=False):
"""Decoder module of autoencoder architecture"""
decode = Lambda(repeat)([inputs,encode])
if aux_input is not None:
decode = merge([aux_input, decode], mode='concat')
for i in range(num_layers):
if drop_frac > 0.0 and i > 0: # skip these for first layer for symmetry
decode = Dropout(drop_frac, name='drop_decode_{}'.format(i))(decode)
wrapper = Bidirectional if bidirectional else lambda x: x
decode = wrapper(layer(size, name='decode_{}'.format(i),
return_sequences=True))(decode)
decode = TimeDistributed(Dense(1, activation='linear'), name='time_dist')(decode)
return decode
inputs = Input(shape=(None, 1))
encoded = encoder(inputs,LSTM,128, 2, drop_frac=0.0, output_size=None, bidirectional=False)
decoded = decoder(encoded, LSTM, 128, 2, drop_frac=0.0, aux_input=None,
bidirectional=False,)
sequence_autoencoder = Model(inputs, decoded)
sequence_autoencoder.compile(optimizer='adam', loss='mae')
trainByShape = {}
for item in train_data:
if item.shape in trainByShape:
trainByShape[item.shape].append(item)
else:
trainByShape[item.shape] = [item]
for shape in trainByShape:
modelHistory =sequence_autoencoder.fit(
np.asarray(trainByShape[shape]),
np.asarray(trainByShape[shape]),
epochs=100, batch_size=1, validation_split=0.15)