I have a problem. I would like to output how the model has decided. I would like to use LIME for this. I have found the following tutorial. I have a free text field and would like to identify which case it is - this is to be solved with the help of a CNN neural network. I have several classes. However, I don't know how to use Lime, I tried it but got the following error.
How can I use Lime for a CNN with multiple classes?
# Loading data
# data wragling
# ...
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(
X,
y,
stratify=y,
test_size=0.25)
## Tokenize the sentences
tokenizer = Tokenizer(num_words=num_unique_words)
tokenizer.fit_on_texts(list(train_X))
train_X = tokenizer.texts_to_sequences(train_X)
test_X = tokenizer.texts_to_sequences(test_X)
## Pad the sentences
train_X = pad_sequences(train_X, maxlen=maxlen)
test_X = pad_sequences(test_X, maxlen=maxlen)
word_index = tokenizer.word_index
vocab_size = len(tokenizer.word_index) + 1
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
#label encoding
le = LabelEncoder()
train_y = le.fit_transform(train_y.tolist())
test_y = le.transform(test_y.tolist())
#one hot encoding
train_y = to_categorical(train_y)
test_y = to_categorical(test_y)
%%time
EMBEDDING_FILE = r'./input/glove.42B.300d.txt'
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE, encoding="utf-8"))
# since we only consider 'max_features' most frequent words, we update word index in tokenizer
tokenizer.word_index = {e:i for e,i in tokenizer.word_index.items() if i <= max_features}
#Creating a embedding matrix which is required as weights in the embedding layer on LSTM model
embedding_matrix = np.zeros((vocab_size, 300))
for word, i in tokenizer.word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
class CNN_Text:
def __init__(self, x, y):
self.x =x
self.y = y
def forward(self):
filter_sizes = [1,2,3,5]
num_filters = 32
inp = Input(shape=(maxlen, ))
x = Embedding(embedding_matrix.shape[0], 300, weights=[embedding_matrix], trainable=False)(inp)
x = SpatialDropout1D(0.4)(x)
x = Reshape((maxlen, embed_size, 1))(x)
conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embed_size), kernel_initializer='normal',
activation='elu')(x)
conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embed_size), kernel_initializer='normal',
activation='elu')(x)
conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embed_size), kernel_initializer='normal',
activation='elu')(x)
conv_3 = Conv2D(num_filters, kernel_size=(filter_sizes[3], embed_size), kernel_initializer='normal',
activation='elu')(x)
maxpool_0 = MaxPool2D(pool_size=(maxlen - filter_sizes[0] + 1, 1))(conv_0)
maxpool_1 = MaxPool2D(pool_size=(maxlen - filter_sizes[1] + 1, 1))(conv_1)
maxpool_2 = MaxPool2D(pool_size=(maxlen - filter_sizes[2] + 1, 1))(conv_2)
maxpool_3 = MaxPool2D(pool_size=(maxlen - filter_sizes[3] + 1, 1))(conv_3)
z = Concatenate(axis=1)([maxpool_0
, maxpool_1
, maxpool_2
, maxpool_3
])
# z = Dropout(0.3)(z)
z = Flatten()(z)
z = Dropout(0.3)(z)
outp = Dense(53, activation="softmax")(z)
model = Model(inputs=inp, outputs=outp)
model.summary()
return model
p1 = CNN_Text(embedding_matrix.shape[0], maxlen)
model = p1.forward()
loss = keras.losses.categorical_crossentropy
optim = keras.optimizers.Adam(learning_rate=0.0009)
metrics = ["accuracy"]
model.compile(loss=loss ,optimizer = optim, metrics=metrics)
history = model.fit(train_X, train_y, batch_size=32, epochs=10, validation_data=(test_X, test_y))
What I tried
from lime import lime_text
from sklearn.pipeline import make_pipeline
from lime.lime_text import LimeTextExplainer
explainer = LimeTextExplainer(class_names=df_complete['forwarder_name'].unique())
c = make_pipeline(tokenizer, model)
[OUT]
TypeError: All intermediate steps should be transformers and implement fit and transform or be the string 'passthrough' '<keras.preprocessing.text.Tokenizer object at 0x00000163D3E86DC0>' (type <class 'keras.preprocessing.text.Tokenizer'>) doesn't
Example text: "I have been on this birth control for one cycle. After reading some of the reviews on this type and..."
What I want