Can I ask, when I run this code, it produces an output without error:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.model_selection import cross_val_score, cross_val_predict,cross_validate
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import chi2, f_regression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import SelectKBest
#from xgboost import XGBRegressor
from sklearn.feature_selection import f_regression
from sklearn.feature_selection import mutual_info_regression
from sklearn.feature_selection import mutual_info_classif
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.feature_selection import SelectKBest
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.pipeline import Pipeline
from scipy.stats import spearmanr
from sklearn.svm import SVR
from sklearn.svm import SVC
from sklearn.metrics import roc_curve, precision_recall_curve, auc, make_scorer, recall_score, accuracy_score, precision_score, confusion_matrix,classification_report
import pickle
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import make_scorer
from sklearn.metrics import precision_score,recall_score
from sklearn.datasets import make_classification
#Generate fake data
X, y = make_classification(n_samples=5000, n_classes=2, n_features=20, n_redundant=0,random_state=0) #fake data
X_train = X[:4500] #.iloc for df
y_train = y[:4500]
X_test = X[4500:]#.reset_index(drop=True,inplace=True)
y_test = y[4500:]
scorers = {
'precision_score': make_scorer(precision_score),
'recall_score': make_scorer(recall_score),
'accuracy_score': make_scorer(accuracy_score)
}
def run_SVC(X_train, y_train, X_test, y_test,output_file,data_name,refit_score='precision_score'):
'''
run SVC algorithm, with CV and hyperparameter tuning.
'''
short_dataname = data_name.strip().split('/')
file_model_name = output_file + '_svc_' + short_dataname[-1]
clf = SVC()
skf = StratifiedKFold(n_splits=2,random_state=42,shuffle=True)
#fs = SelectKBest(score_func = mutual_info_classif)
pipeline = Pipeline(steps=[('svc',clf)]) #,('sel',fs)
print(pipeline.get_params().keys())
search = GridSearchCV(
pipeline,
param_grid={
'svc__C': [0.01, 0.1, 10, 1000], ##Regularization
'svc__gamma': [0.0001, 0.01, 1, 10],
'svc__kernel':['linear','rbf'],
},
return_train_score=True,
verbose=3,
refit=refit_score,
scoring=scorers,
cv=skf,
n_jobs=-1,
)
search.fit(X_train, y_train)
# make the predictions
y_pred = search.predict(X_test)
print('Best params for {}'.format(refit_score))
print(search.best_params_)
print(classification_report(y_test,y_pred)) #labels=['neg','pos']
return
print(run_SVC(X_train,y_train,X_test,y_test,'test.txt','dataset'))
When i comment in the only two lines that are commented out (#fs = SelectKBest(score_func = mutual_info_classif)
) and fs in the line after that, I get the error:
TypeError: All intermediate steps should be transformers and implement fit and transform or be the string 'passthrough' 'SVC()' (type <class 'sklearn.svm._classes.SVC'>) doesn't
I can see that other people have addressed this on SO before, e.g. here, so I tried to follow that person's answer, but my SelectKBest is already before my pipeline - when I move the line with 'fs' to be higher in my code (which I thought was what the answer was saying), I get the same error.
Could someone show me where I'm going wrong here and what I'm meant to change to remove this error?