The correct method for performing cross-validation
is, to scale
the data while performing cross-validation when you split in folds, not before because when you split
the scaled data
into folds, you will be leaking the information in validation set
while evaluating.
from sklearn.model_selection import StratifiedKFold
scv = StratifiedKFold(n_splits=9,shuffle=True,random_state=7)
from sklearn.preprocessing import StandardScaler
def cross_validation_score(cv):
scores_per_fold=[]
for fold_no,(train,test) in enumerate(cv.split(X,Y)):
print(f"Training for Fold {fold_no+1}")
sc= StandardScaler()
x_train , x_test = X[train] , X[test]
x_train=sc.fit_transform(x_train)
x_test=sc.transform(x_test)
# build model is function where neural network is defined
nn=build_model()
nn.fit(x_train,Y[train],batch_size=20,epochs=50,verbose=0)
scores = nn.evaluate(x_test,Y[test])
scores_per_fold.append(scores)
return scores_per_fold
skcv_scores = cross_validation_score(scv)
You can also use sklearn.pipeline.Pipeline
. In the above code if i want use pipeline
then i have to use wrapper around keras, i didn't to make this simpler to understand.