1

In Sklearn Decision Rules for Specific Class in Decision tree the decision rules are for single sample an not for single class. I need decision rules for single class. The example below I would like to have decision rules for class = versicolor as the example of https://scikit-learn.org/stable/modules/tree.html.

Iris graphviz

from matplotlib import pyplot as plt
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier 
from sklearn import tree
from sklearn.tree import _tree
import numpy as np


# Prepare the data data
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Fit the classifier with max_depth=3
clf = DecisionTreeClassifier(max_depth=3, random_state=1234)
model = clf.fit(X, y)

def get_rules(tree, feature_names, class_names):
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]

    paths = []
    path = []
    
    def recurse(node, path, paths):
        
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            p1, p2 = list(path), list(path)
            p1 += [f"({name} <= {np.round(threshold, 3)})"]
            recurse(tree_.children_left[node], p1, paths)
            p2 += [f"({name} > {np.round(threshold, 3)})"]
            recurse(tree_.children_right[node], p2, paths)
        else:
            path += [(tree_.value[node], tree_.n_node_samples[node])]
            paths += [path]
            
    recurse(0, path, paths)

    # sort by samples count
    samples_count = [p[-1][1] for p in paths]
    ii = list(np.argsort(samples_count))
    paths = [paths[i] for i in reversed(ii)]
    
    rules = []
    for path in paths:
        rule = "if "
        
        for p in path[:-1]:
            if rule != "if ":
                rule += " and "
            rule += str(p)
        rule += " then "
        if class_names is None:
            rule += "response: "+str(np.round(path[-1][0][0][0],3))
        else:
            classes = path[-1][0][0]
            l = np.argmax(classes)
            rule += f"class: {class_names[l]} (proba: {np.round(100.0*classes[l]/np.sum(classes),2)}%)"
        rule += f" | based on {path[-1][1]:,} samples"
        rules += [rule]
        
    return rules

rules = get_rules(clf, iris.feature_names, iris.target_names)
for r in rules:
    print(r)

Result:
if (petal length (cm) <= 2.45) then class: setosa (proba: 100.0%) | based on 50 samples
if (petal length (cm) > 2.45) and (petal width (cm) <= 1.75) and (petal length (cm) <= 4.95) then class: versicolor (proba: 97.92%) | based on 48 samples
if (petal length (cm) > 2.45) and (petal width (cm) > 1.75) and (petal length (cm) > 4.85) then class: virginica (proba: 100.0%) | based on 43 samples
if (petal length (cm) > 2.45) and (petal width (cm) <= 1.75) and (petal length (cm) > 4.95) then class: virginica (proba: 66.67%) | based on 6 samples
if (petal length (cm) > 2.45) and (petal width (cm) > 1.75) and (petal length (cm) <= 4.85) then class: virginica (proba: 66.67%) | based on 3 samples
James P.
  • 41
  • 5

0 Answers0