I am trying to backtest my code by providing some data, but its showing "ValueError: No objects to concatenate"
This is my code:
import yfinance as yf
import numpy as np
import pandas as pd
sp_500 = yf.Ticker("MSFT")
sp_500 = sp_500.history(period="max")
# sp_500.plot.line(y="Close", use_index=True)
del sp_500["Dividends"]
del sp_500["Stock Splits"]
sp_500["Tomorrow"] = sp_500["Close"].shift(-1)
sp_500["Target"] = (sp_500["Tomorrow"] > sp_500["Close"]).astype(int) # for boolean value instead of TRUE AND FALSE
sp_500 = sp_500.loc["2022-01-01":].copy()
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score
model = RandomForestClassifier(n_estimators=100, min_samples_split=100, random_state=1)
train = sp_500.iloc[:-100]
test = sp_500.iloc[-100:]
predictors = ["Close", "Volume", "Open", "High", "Low"]
model.fit(train[predictors], train["Target"])
preds = model.predict(test[predictors])
preds = pd.Series(preds, index=test.index)
combined = pd.concat([test["Target"], preds], axis=1)
def predict(train, test, predictors, model):
model.fit(train[predictors], train["Target"])
preds = model.predict(test[predictors])
preds = pd.Series(preds, index=test.index, name="Predictions")
combined = pd.concat([test["Target"], preds], axis=1)
return combined
def backtest(data, model, predictors, start=2500, step=250):
all_predictions = []
for i in range(start, data.shape[0] - step, step):
train = data.iloc[i:(i + step)]
test = data.iloc[(i + step):(i + 2 * step)]
predictions = predict(train, test, predictors, model)
all_predictions.append(predictions)
return pd.concat(all_predictions)
predictions = backtest(sp_500, model, predictors)
print(predictions["Predictions"].value_counts())
I was expecting an output that will be the count of predicted price increases and decreases during the backtesting process. The number of predicted "1" (price increase) and "0" (price decrease) in the "Predictions" column will be displayed