I want to experiment DL regression model over time-series data by implementing the model using sklearn pipeline()
properly. I formed the following DL model in the form of the class WaveNet
and would like to call it within the pipeline accordingly.
#Model definition
import keras.backend as K
from keras.models import Model
from keras.layers import Input, Conv1D, Activation, Add, Multiply,Lambda, Convolution1D, Dense, Dropout
from keras.initializers import TruncatedNormal
class WaveNet:
def __init__(self, timesteps, dilation_depth=9, n_filters=32):
self.timesteps = timesteps
self.dilation_depth = dilation_depth
self.n_filters = n_filters
self.model = self._build_model()
def _build_model(self):
# Define the input layer
input_layer = Input(shape=(self.timesteps, 1))
# Define the residual blocks
skip_connections = []
x = input_layer
for i in range(self.dilation_depth):
# Define the dilation rate
dilation_rate = 2 ** i
# Define the residual block
tanh_out = Convolution1D(self.n_filters, 3, activation='tanh', padding='causal', dilation_rate=dilation_rate)(x)
sigm_out = Convolution1D(self.n_filters, 3, activation='sigmoid', padding='causal', dilation_rate=dilation_rate)(x)
x = Multiply()([tanh_out, sigm_out])
skip_connections.append(x)
# Define the skip connection layer
summed = Add()(skip_connections)
out = Activation('relu')(summed)
# Define the output layers
out = Convolution1D(1, 1, activation='linear', padding='same')(out)
out = Lambda(lambda x: x[:, -1, :])(out)
out = Dense(1, kernel_initializer=TruncatedNormal(stddev=0.01))(out)
# Define the model and compile it
model = Model(input_layer, out)
model.compile(optimizer='adam', loss='mse')
return model
def summary(self):
self.model.summary()
#build an end-to-end pipeline, and supply the data into a regression model and train within the pipeline.
#Train and fit the WaveNet model into the pipeline chain
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import Pipeline, make_pipeline
WaveNetRegressor = WaveNet(timesteps=5, dilation_depth=9, n_filters=32)
WNet_pipeline = Pipeline(steps=[('scaler', MinMaxScaler()),('WNet', WaveNetRegressor())]).fit(X_train, y_train) #X, y
And get the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-26-9c90503b7a90> in <cell line: 94>()
92
93 WaveNetRegressor = WaveNet(timesteps=5, dilation_depth=9, n_filters=32)
---> 94 WNet_pipeline = Pipeline([('scaler', MinMaxScaler()),('WNet', WaveNetRegressor())]).fit(X_train, Y_train) #X, y
95
96 #Displaying a Pipeline with a Preprocessing Step and Regression
TypeError: 'WaveNet' object is not callable
I checked related posts, and they offer wrapper for non-sklearn models to integrate into sklearn pipeline, but I couldn't figure out to implement it so far. I also couldn't figure out how to access important hyperparameters e.g, batch_size
or epochs
within pipeline during training (the model itself has some hyper-parameters as well):
Wrapper for sklearn pipeline:
here they used model function def model
, not class model
by using keras.wrappers.scikit_learn
and no info if we need to manipulate and set hyper-parameters. Maybe there is an elegant way to use the class and access hyper-parameters too:
# wrap the model using the function you created
from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
reg = KerasRegressor(build_fn=reg_model, verbose=0)
# just create the pipeline
pipeline = Pipeline(steps=[('reg',reg)]).fit(X_train, y_train)
Access the learning hyperparameters within the pipeline:
Here they used make_pipeline
not Pipeline()
Edit: WaveNetRegressor
instead of WaveNetRegressor()
....
WNet_pipeline = Pipeline([('scaler', MinMaxScaler()),('WNet', WaveNetRegressor)]).fit(X_train, Y_train) #X, y
...
get as follow:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-2-077fd9956e62> in <cell line: 94>()
92
93 WaveNetRegressor = WaveNet(timesteps=5, dilation_depth=9, n_filters=32)
---> 94 WNet_pipeline = Pipeline([('scaler', MinMaxScaler()),('WNet', WaveNetRegressor)]).fit(X_train, Y_train) #X, y
95
96 #Displaying a Pipeline with a Preprocessing Step and Regression
2 frames
/usr/local/lib/python3.9/dist-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
399 """
400 fit_params_steps = self._check_fit_params(**fit_params)
--> 401 Xt = self._fit(X, y, **fit_params_steps)
402 with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
403 if self._final_estimator != "passthrough":
/usr/local/lib/python3.9/dist-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params_steps)
337 # shallow copy of steps - this should really be steps_
338 self.steps = list(self.steps)
--> 339 self._validate_steps()
340 # Setup the memory
341 memory = check_memory(self.memory)
/usr/local/lib/python3.9/dist-packages/sklearn/pipeline.py in _validate_steps(self)
241 and not hasattr(estimator, "fit")
242 ):
--> 243 raise TypeError(
244 "Last step of Pipeline should implement fit "
245 "or be the string 'passthrough'. "
TypeError: Last step of Pipeline should implement fit or be the string 'passthrough'. '<__main__.WaveNet object at 0x7f458634ea90>' (type <class '__main__.WaveNet'>) doesn't