I would like to denormalize my predictions. When I normalize the data with sklearn MinMaxscaler
a train set that has 6 columns it won't let me use inverse_transform()
for the data because my prediction only has one column. Anyone know how to inverse transform it or maybe normalize the data in a better way so it will be easier to denormalize the predictions?
api_key = '.......'
train_pct = 0.7
val_pct = 0.1
test_pct = 0.2
sequence_length = 20
target_prediction = 1
stock = 'msft'
def get_df(stock):
ts = TimeSeries(key=api_key, output_format='pandas')
df, meta_data = ts.get_daily_adjusted(symbol=stock, outputsize='full')
#adjusting for stock split
df['Open'] = (1 / df.loc[:, ('8. split coefficient')]).replace(np.inf, 1).cumprod() * df.loc[:, ('1. open')]
df['High'] = (1 / df.loc[:, ('8. split coefficient')]).replace(np.inf, 1).cumprod() * df.loc[:, ('2. high')]
df['Low'] = (1 / df.loc[:, ('8. split coefficient')]).replace(np.inf, 1).cumprod() * df.loc[:, ('3. low')]
df['Close'] = (1 / df.loc[:, ('8. split coefficient')]).replace(np.inf, 1).cumprod() * df.loc[:, ('4. close')]
df = df[['Open', 'High', 'Low', 'Close', '6. volume', '7. dividend amount']]
#reverses dataframe
df = df.iloc[::-1]
#Transform dates(index) into number
df.set_index(mdates.date2num(df.index), inplace=True)
return df
def data_split(df, train_pct=train_pct, test_pct=test_pct, val_pct=val_pct):
#spliting data into training and testing
start_index = df.head().index.values[0]
n = df.tail().index.values[4] - start_index
train_df = df[start_index : int(train_pct* n) + start_index - 1]
val_df = df[(int(n* train_pct) + start_index) :(int(n*(train_pct + val_pct)) + start_index) - 1]
test_df = df[(int(n*(train_pct + val_pct)) + start_index) : (start_index + n)]
#den ene verdien blir sett to ganger når aksjen er msft, kan være desimal feil.(avrunding 735470.0)
data_normaliser = preprocessing.StandardScaler()
train_np = data_normaliser.fit_transform(train_df[['Open', 'High', 'Low', 'Close', '6. volume', '7. dividend amount']])
test_np = data_normaliser.transform(test_df[['Open', 'High', 'Low', 'Close', '6. volume', '7. dividend amount']])
val_np = data_normaliser.transform(val_df[['Open', 'High', 'Low', 'Close', '6. volume', '7. dividend amount']])
#turn numpy into dataframe again
train = pd.DataFrame(train_np, columns=['Open', 'High', 'Low', 'Close', 'Volum', 'Dividend'])
test = pd.DataFrame(test_np, columns=['Open', 'High', 'Low', 'Close', 'Volum', 'Dividend'])
val = pd.DataFrame(val_np, columns=['Open', 'High', 'Low', 'Close', 'Volum', 'Dividend'])
return train, val, test
def create_df(df, sequence_length=sequence_length, target_prediction=target_prediction):
x = []
y = []
index1 = len(df)
for i in range(sequence_length, index1):
x.append(df.iloc[i-sequence_length:i,:])
y.append(df.iloc[i,0])
x, y = np.array(x), np.array(y)
return x, y
################################# CODE ITSELF START HERE #################################
df = get_df('msft')
train, val, test = data_split(df)
x_train, y_train = create_df(train)
x_val, y_val = create_df(val)
x_test, y_test = create_df(test)
### MODEL ###
model = Sequential()
model.add(LSTM(units = 50, activation = 'relu', return_sequences=True, input_shape = (x_train.shape[1], 6)))
model.add(Dropout(0.2))
model.add(LSTM(units = 60, activation = 'relu', return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(units = 80, activation = 'relu', return_sequences=True))
model.add(Dropout(0.4))
model.add(LSTM(units = 120, activation = 'relu'))
model.add(Dense(units = 1))
model.add(Dropout(0.5))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, epochs=10, batch_size=64)
### TESTING THE MODEL ###
from sklearn import preprocessing
import matplotlib.pyplot as plt
y_pred = model.predict(x_train)
y_pred = data_normalizer.inverse_transform(y_pred)
### VISUALIZATION ###
plt.figure(figsize=(20,7))
plt.plot(y_train, color = 'red', label = 'Real stock price')
plt.plot(y_pred, color = 'blue', label = 'Predicted stock price')
plt.title('Stock price prediction')
plt.ylabel('Price')
plt.legend()
plt.show()
ValueError Traceback (most recent call last)
<ipython-input-161-16162cc08160> in <module>()
4
5 y_pred = model.predict(x_train)
----> 6 y_pred = data_normalizer.inverse_transform(y_pred)
7
8 ### VISUALIZATION ###
/usr/local/lib/python3.6/dist-packages/sklearn/preprocessing/_data.py in inverse_transform(self, X)
434 force_all_finite="allow-nan")
435
--> 436 X -= self.min_
437 X /= self.scale_
438 return X
ValueError: non-broadcastable output operand with shape (3715,1) doesn't match the broadcast shape (3715,6)