I am trying to create few custom NER for my use case.
This is a sample of my training data:
[[' webex enable or disable auto join from connected video device advised user to raise a iiq request to get webex access. hence closing this ticket getting error while joing webex since i got a replacement lap top i have not been able to access webex 954 995 0444 9 30am 6pm thurs mon getting error while joing webex.',
{'entities': [[1, 62, 'ISSUE'],
[63, 118, 'RESOLVE_ACTION'],
[147, 178, 'ISSUE'],
[193, 212, 'USER_ACTIVITY']]}],
[' virtual desktop infrastructure vdi performance or latency issues or slow response education vdi user was getting vdi slowness issue while working on it. i have guide to user kindly logoff your vdi machine and login back after 10 minutes. user was getting vdi slowness issue while working on it. vdi slowness.',
{'entities': [[1, 47, 'ISSUE'],
[51, 65, 'ISSUE'],
[69, 96, 'ISSUE'],
[183, 238, 'RESOLVE_ACTION']]}]]
My code is below:
ner = nlp.get_pipe("ner")
for _, annotations in train_data:
for ent in annotations.get('entities'):
ner.add_label(ent[2])
disable_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
import random
from spacy.util import minibatch, compounding
from pathlib import Path
with nlp.disable_pipes(*disable_pipes):
optimizer =nlp.resume_training()
for iteration in range(100):
random.shuffle(train_data)
losses = {}
batches = minibatch(train_data,size=compounding(1, 16, 1.001))
for batch in batches:
text,annotation = zip(*batch)
print (text,annotation)
nlp.update(text,annotation,drop=0.5,losses=losses,sgd=optimizer)
print("Losses",losses)
output_dir = '/<my model path>/models'
if output_dir is not None:
output_dir = Path('/my model path>/models')
if not output_dir.exists():
output_dir.mkdir()
nlp.meta['name'] = "new_model" # rename model
nlp.to_disk(output_dir)
print("Saved model to", output_dir)
#Testing model on training data set itself
for text, _ in train_data:
doc = nlp2(text)
print ('Entities',[(ent.text,ent.label_) for ent in doc.ents])
displacy.render(nlp2(doc.text), style="ent", jupyter=True)
My models are returning the labels for my training data, but they are not returning anything for any other sample data / text.