0

How to make the pipeline apply PolynomialFeatures and then StandardScaler to x1, x2, and all generated polynomial features?

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression


df = pd.DataFrame({"x1": [1, 2, 6], 
                   "x2": [3, 18, 41],
                   "x3": ["red", "red", "green"],
                   "y": [1, 1, 0]})

X = df[["x1", "x2", "x3"]]
y = df["y"]

ct = ColumnTransformer(
    transformers=[("poly", PolynomialFeatures(degree=2, include_bias=False), ["x1", "x2"]),
                  ("scaler", StandardScaler(), ["x1", "x2"]), # Should contain x1, x2, and PolynomialFeatures
                  ("ohe", OneHotEncoder(), ["x3"])])

pipeline = Pipeline(steps=[("transformer", ct),
                           ("classifier", LogisticRegression(class_weight="balanced"))])

pipeline.fit(X, y)
Anne Maier
  • 299
  • 1
  • 8

0 Answers0