I implemented a univariate xgboost time series using the following code from this site:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
cols = list()
# input sequence (t-n, ... t-1)
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
# forecast sequence (t, t+1, ... t+n)
for i in range(0, n_out):
cols.append(df.shift(-i))
# put it all together
agg = pd.concat(cols, axis=1)
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
return agg.values
def train_test_split(data, n_test):
return data[:-n_test, :], data[-n_test:, :]
def xgboost_forecast(train, testX):
# transform list into array
train = np.asarray(train)
# split into input and output columns
trainX, trainy = train[:, :-1], train[:, -1]
print(trainX.shape)
# fit model
model = XGBRegressor(objective='reg:squarederror', n_estimators=1000)
model.fit(trainX, trainy)
# make a one-step prediction
yhat = model.predict(np.asarray([testX]))
return yhat[0]
def walk_forward_validation(tdata, n_test):
predictions = list()
train, test = train_test_split(tdata, n_test)
# seed history with training dataset
history = [x for x in train]
# step over each time-step in the test set
for i in range(len(test)):
# split test row into input and output columns
testX, testy = test[i, :-1], test[i, -1]
# fit model on history and make a prediction
yhat = xgboost_forecast(history, testX)
# store forecast in list of predictions
predictions.append(yhat)
# add actual observation to history for the next loop
history.append(test[i])
# summarize progress
print('>expected=%.1f, predicted=%.1f' % (testy, yhat))
# estimate prediction error
error = mean_absolute_error(test[:, -1], predictions)
return error, test[:, -1], predictions
tdata = series_to_supervised(list(df['seq'].values), n_in=6)
mae, y, yhat = walk_forward_validation(tdata, 20)
How can I modify the program to work for multivariate time series analysis? Is it just the series to supervised function that needs to be modified or changes are required elsewhere as well?