How to make the pipeline apply PolynomialFeatures
and then StandardScaler
to x1
, x2
, and all generated polynomial features?
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
df = pd.DataFrame({"x1": [1, 2, 6],
"x2": [3, 18, 41],
"x3": ["red", "red", "green"],
"y": [1, 1, 0]})
X = df[["x1", "x2", "x3"]]
y = df["y"]
ct = ColumnTransformer(
transformers=[("poly", PolynomialFeatures(degree=2, include_bias=False), ["x1", "x2"]),
("scaler", StandardScaler(), ["x1", "x2"]), # Should contain x1, x2, and PolynomialFeatures
("ohe", OneHotEncoder(), ["x3"])])
pipeline = Pipeline(steps=[("transformer", ct),
("classifier", LogisticRegression(class_weight="balanced"))])
pipeline.fit(X, y)