multivariate xgboost time series

Question

I implemented a univariate xgboost time series using the following code from this site:

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols = list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    # put it all together
    agg = pd.concat(cols, axis=1)
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg.values

def train_test_split(data, n_test):
    return data[:-n_test, :], data[-n_test:, :]

def xgboost_forecast(train, testX):
    # transform list into array
    train = np.asarray(train)
    # split into input and output columns
    trainX, trainy = train[:, :-1], train[:, -1]
    print(trainX.shape)
    # fit model
    model = XGBRegressor(objective='reg:squarederror', n_estimators=1000)
    model.fit(trainX, trainy)
    # make a one-step prediction
    yhat = model.predict(np.asarray([testX]))
    return yhat[0]

def walk_forward_validation(tdata, n_test):
    predictions = list()
    
    train, test = train_test_split(tdata, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # split test row into input and output columns
        testX, testy = test[i, :-1], test[i, -1]
        # fit model on history and make a prediction
        yhat = xgboost_forecast(history, testX)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
        # summarize progress
        print('>expected=%.1f, predicted=%.1f' % (testy, yhat))
    # estimate prediction error
    error = mean_absolute_error(test[:, -1], predictions)
    return error, test[:, -1], predictions

tdata = series_to_supervised(list(df['seq'].values), n_in=6)

mae, y, yhat = walk_forward_validation(tdata, 20)

How can I modify the program to work for multivariate time series analysis? Is it just the series to supervised function that needs to be modified or changes are required elsewhere as well?

I'm assuming you want to do multivariate forecasting. There are different way to do that with regression algorithms. For example, you can use a separate model for each univariate series (e.g. using the `MultiOutputRegressor` from `scikit-learn`) or write a custom objective function for multi-output regression using `xgboost` (see https://stackoverflow.com/a/47685713/9334962). — mloning, Oct 06 '21 at 21:20

multivariate xgboost time series

0 Answers0