I'he realised custom transformer of sklearn, where I porcess a column of text data.
I create a pipeline, where I combine two transofrmers - NameTransformer, OneHotEncoder. But I have got an error.
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
import re
import pandas as pd
def get_title_cat(row):
name = str(row.Name)
return name.split()[0]
class NameTransformer(BaseEstimator, TransformerMixin):
def __init__(self) -> None:
super().__init__()
self.name_col = 'Name'
def fit(self, X, y=None, **fit_params):
return self
def transform(self, X: pd.DataFrame, y=None, **fit_params):
return pd.DataFrame(X.apply(get_title_cat, axis='columns'), columns=['Title_cat'])
def get_feature_names_out(self):
return ['Title_cat']
name_pipe = Pipeline([
('name_cat', NameTransformer()),
('ohe_tr', OneHotEncoder(handle_unknown='infrequent_if_exist', sparse_output=False))
])
data = {'Name': ['Braund, Mr. Owen Harris',
'Cumings, Mrs. John Bradley (Florence Briggs Thayer)',
'Heikkinen, Miss. Laina',
'Byles, Rev. Thomas Roussel Davids']}
df = pd.DataFrame(data)
name_pipe.fit_transform(df)
name_pipe.get_feature_names_out()
How to correctly realise get_feature_names_out to not get this error?
750 if not hasattr(transform, "get_feature_names_out"):
751 raise AttributeError(
752 "Estimator {} does not provide get_feature_names_out. "
753 "Did you mean to call pipeline[:-1].get_feature_names_out"
754 "()?".format(name)
755 )
--> 756 feature_names_out = transform.get_feature_names_out(feature_names_out)
757 return feature_names_out
TypeError: get_feature_names_out() takes 1 positional argument but 2 were given