The purpose of this code is to predict the future values of forex market for the general currencies.
At first, I made a unified data frame, this unified data frame is combination of 11 forex market data sets for the most widely traded currencies plus a group of 900 economic indicators.
After combining these 911 data sets in the unified data frame with no problems of any sort after being tested, I begun the LSTM neural network which I also tested with just single data set and it works great.
The problem begins when I combined the unified data frame with the LSTM neural network.
Here is the code:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
import os
os.chdir("E:\Business\Stocks")
path = os.listdir("E:\Business\Stocks")
for file in path:
name, ext = os.path.splitext(str(file))
column_names = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
df1 = pd.read_csv(file, names=column_names, parse_dates={'DateTime': ['Date', 'Time']}, index_col=[0])
df1 = df1.rename(columns={'Open': name + ' ' + 'Open', 'High': name + ' ' + 'High',
'Low': name + ' ' + 'Low', 'Close': name + ' ' + 'Close',
'Volume': name + ' ' + 'Volume'})
os.chdir("E:\Business\Economic Indicators")
path = os.listdir("E:\Business\Economic Indicators")
for file in path:
df2 = pd.read_csv(file, index_col=[0], parse_dates=[0])
name, ext1 = os.path.splitext(file)
df2 = df2.rename(columns={'Actual': name + ' ' + 'Actual', 'Consensus': name + ' ' + 'Consensus',
'Previous': name + ' ' + 'Previous', 'Revised': name + ' ' + 'Revised'})
dfs = [df1 ,df2]
df = pd.concat(dfs, axis=1, join='inner').sort_index(ascending=False)
df.fillna(method='ffill', inplace=True)
sequence_length = 120
n_features = len(df.columns)
val_ratio = 0.1
n_epochs = 3000
batch_size = 500
data = df.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)
val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]
print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))
train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape
train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))
preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)
train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))
X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))
model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=100, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("relu"))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'accuracy'])
history = model.fit(
X_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
verbose=2)
preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
pred = preds_val[i][0]
diff.append(y_val[i] - pred)
real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[:1])
print(preprocessor.data_max_[:1])
preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min
plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()
print(model.summary())
Here is the error:
Using TensorFlow backend.
Traceback (most recent call last):
File "E:/Tutorial/new.py", line 47, in train = data_processed[: int(val_split), :]
IndexError: too many indices for array