I want to plot the train auc and cv auc w.r.t depth change in decision tree model but min_samples_split value changing as shown in the code . If i fix the value of min_samples_split = 5 or 10 . then the curve is plotted , but if i take 2 values for min_samples_split = [5 , 10] then i am getting the VALUE ERROR : x and y must have same first dimension, but have shapes (5,) and (10,) . I am understanding the error , but how to get it plotted .
train_auc = []
cv_auc = []
depth = [1, 5, 10, 50, 100]
k = [5, 10]
for i in depth :
for p in k :
clf = DecisionTreeClassifier(criterion='gini', max_depth= i ,
min_samples_split= p , class_weight = 'balanced' )
clf.fit(X_train, y_train)
y_train_pred = clf.predict(X_train)
y_cv_pred = clf.predict(X_cv)
train_auc.append(roc_auc_score(y_train,y_train_pred))
cv_auc.append(roc_auc_score(y_cv, y_cv_pred))
plt.plot(depth , train_auc, label='Train AUC')
plt.plot(depth , cv_auc, label='CV AUC')
plt.scatter(depth , train_auc, label='Train AUC points')
plt.scatter(depth , cv_auc , label='CV AUC points')
plt.legend()
plt.xlabel("depth")
plt.ylabel("AUC")
plt.title("ERROR PLOTS")
plt.grid()
plt.show()