When I shift my time series data, I get some NaNs in the dataframe. The only interpolation method that can replace these NaNs with numbers is 'linear'
. The NaN are replaced by the same number, which isn't preferable.
Is there some way to instead use a different method like 'cubic'
or 'quadratic'
?
import numpy as np
import pandas as pd
# original data
df = pd.DataFrame()
np.random.seed(0)
days = pd.date_range(start='2015-01-01', end='2015-01-10', freq='1D')
df = pd.DataFrame({'Date': days, 'col1': np.random.randn(len(days))})
df = df.set_index('Date')
# add lags
df['lag1'] = df['col1'].shift(1)
df['lag3'] = df['col1'].shift(3)
print(df)
def interp(dfObj):
if dfObj.isna().sum()>0:
dfObj0 = dfObj.interpolate(method='linear', limit_direction='both')
return dfObj0
else:
return dfObj
df['lag1'] = interp(df['lag1'])
df['lag3'] = interp(df['lag3'])
print(df)