I'm getting error while using make_column_transformer with LabelEncoder
def train_or_load_model(data,learn=True):
to_categorical = None
to_OH = None
to_drop = None
with open('to_categorical.pickle','rb') as f:
to_categorical=pickle.load(f)
with open('to_OH.pickle','rb') as f:
to_OH=pickle.load(f)
with open('to_drop.pickle','rb') as f:
to_drop=pickle.load(f)
# print(to_drop)
ID = data.drop(['id'],axis=1,inplace=True)
if learn:
target = np.array(data[['target']])
target.reshape((300000,-1))
print(type(target))
to_drop.append('target')
data.drop(to_drop,axis=1,inplace=True)
else:
data.drop(to_drop,axis=1,inplace=True)
if learn:
transformer = make_column_transformer(
(LabelEncoder(),to_categorical),
(OneHotEncoder(),to_OH)
)
if learn:
model = Pipeline(
steps=[('preprocess_data', transformer),
('model',KNeighborsClassifier(2,n_jobs=-1))
]
)
X_train,X_test, y_train, y_test = train_test_split(data,target,test_size=0.2)
model.fit(X_train,y_train)
I'm using data from https://www.kaggle.com/c/cat-in-the-dat/data and I get error like
Traceback (most recent call last):
File "c:\Users\barte\.vscode\extensions\ms-python.python-2019.11.50794\pythonFiles\ptvsd_launcher.py", line 43, in <module>
main(ptvsdArgs)
File "c:\Users\barte\.vscode\extensions\ms-python.python-2019.11.50794\pythonFiles\lib\python\old_ptvsd\ptvsd\__main__.py", line 432, in main
run()
File "c:\Users\barte\.vscode\extensions\ms-python.python-2019.11.50794\pythonFiles\lib\python\old_ptvsd\ptvsd\__main__.py", line 316, in run_file
runpy.run_path(target, run_name='__main__')
File "C:\Users\barte\AppData\Local\Programs\Python\Python36\Lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "C:\Users\barte\AppData\Local\Programs\Python\Python36\Lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "C:\Users\barte\AppData\Local\Programs\Python\Python36\Lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "c:\Users\barte\Desktop\Projects\tf\kaggle categorical feature\main.py", line 102, in <module>
print(train_or_load_model(raw_data))
File "c:\Users\barte\Desktop\Projects\tf\kaggle categorical feature\main.py", line 97, in train_or_load_model
model.fit(X_train,y_train)
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\sklearn\pipeline.py", line 352, in fit
Xt, fit_params = self._fit(X, y, **fit_params)
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\sklearn\pipeline.py", line 317, in _fit
**fit_params_steps[name])
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\joblib\memory.py", line 355, in __call__
return self.func(*args, **kwargs)
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\sklearn\pipeline.py", line 716, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\sklearn\compose\_column_transformer.py", line 476, in fit_transform
result = self._fit_transform(X, y, _fit_transform_one)
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\sklearn\compose\_column_transformer.py", line 420, in _fit_transform
self._iter(fitted=fitted, replace_strings=True), 1))
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\joblib\parallel.py", line 921, in __call__
if self.dispatch_one_batch(iterator):
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\joblib\parallel.py", line 759, in dispatch_one_batch
self._dispatch(tasks)
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\joblib\parallel.py", line 716, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\joblib\_parallel_backends.py", line 182, in apply_async
result = ImmediateResult(func)
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\joblib\_parallel_backends.py", line 549, in __init__
self.results = batch()
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\joblib\parallel.py", line 225, in __call__
for func, args, kwargs in self.items]
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\joblib\parallel.py", line 225, in <listcomp>
for func, args, kwargs in self.items]
File "C:\Users\barte\Desktop\Projects\tf\env\lib\site-packages\sklearn\pipeline.py", line 716, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
TypeError: fit_transform() takes 2 positional arguments but 3 were given
enter code here
I don't know why this pipeline gives 3 arguments to LabelEncoder while only X_train should go there
I have also tried to make my own class like class MyLabelEncoder(BaseEstimator,TransformerMixin)
but that turned out to have bad shape error when fitting.