I'm teaching myself (probably my first mistake) classes and methods by converting a sentiment analysis script to use them.
I thought I had all of the methods in place, but I keep getting
global name 'get_bigram_word_feats' is not defined
I'm sure I'd be getting an error for get_word_feats
, too, if it got that far.
I'm banging my head against this one big-time. I tried removing staticmethod
and adding self. What am I doing wrong?
Here's my code:
def word_feats(words):
return dict([(word, True) for word in words])
class SentClassifier:
def __init__(self, name, location):
self.name = name
self.location = location
self.fullpath = location + "/" + name
def doesexist(self):
return os.path.isfile(self.fullpath)
def save_classifier(self):
rf = open(self.fullpath, 'wb')
pickle.dump(self.fullpath, rf)
rf.close()
def load_classifier(self):
sf = open(self.fullpath, 'rb')
sclassifier = pickle.load(sf)
sf.close()
return sclassifier
class Training:
def __init__(self, neg, pos):
self.neg = neg
self.pos = pos
self.negids = open(self.neg, 'rb').read().splitlines(True)
self.posids = open(self.pos, 'rb').read().splitlines(True)
self.exclude = set(string.punctuation)
self.exclude = self.exclude, '...'
self.swords = stopwords.words('english')
def tokens(self, words):
words = [w for w in nltk.word_tokenize(words) if w not in self.exclude and len(w) > 1
and w not in self.swords and wordnet.synsets(w)]
return words
def idlist(self, words):
thisidlist = [self.tokens(tf) for tf in words]
return thisidlist
@staticmethod
def get_word_feats(words):
return dict([(word, True) for word in words])
@staticmethod
def get_bigram_word_feats(twords, score_fn=BigramAssocMeasures.chi_sq, tn=200):
words = [w for w in twords]
bigram_finder = BigramCollocationFinder.from_words(words)
bigrams = bigram_finder.nbest(score_fn, tn)
return dict([(ngram, True) for ngram in itertools.chain(words, bigrams)])
@staticmethod
def label_feats(thelist, label):
return [(get_word_feats(lf), label) for lf in thelist]
@staticmethod
def label_grams(thelist, label):
return [(get_bigram_word_feats(gf), label) for gf in thelist()]
@staticmethod
def combinegrams(grams, feats):
for g in grams():
feats.append(g)
return feats
def negidlist(self):
return self.idlist(self.negids)
def posidlist(self):
return self.idlist(self.posids)
def posgrams(self):
return self.label_grams(self.posidlist, 'pos')
def neggrams(self):
return self.label_grams(self.negidlist, 'neg')
def negwords(self):
return self.label_feats(self.negidlist, 'neg')
def poswords(self):
return self.label_feats(self.posidlist, 'pos')
def negfeats(self):
return self.combinegrams(self.neggrams, self.negwords)
def posfeats(self):
return self.combinegrams(self.posgrams, self.poswords)
starttime = time.time()
myclassifier = SentClassifier("sentanalyzer.pickle", "classifiers")
if myclassifier.doesexist() is False:
print "training new classifier"
trainset = Training('data/neg.txt', 'data/pos.txt')
negfeats = trainset.negfeats()
posfeats = trainset.posfeats()
negcutoff = len(negfeats) * 8 / 10
poscutoff = len(posfeats) * 8 / 10
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
classifier = NaiveBayesClassifier.train(trainfeats)
print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
myclassifier.save_classifier()
else:
print "using existing classifier"
classifier = myclassifier.load_classifier()
classifier.show_most_informative_features(20)
mystr = "16 steps to an irresistible sales pitch, via @vladblagi: slidesha.re/1bVV7OS"
myfeat = word_feats(nltk.word_tokenize(mystr))
print classifier.classify(myfeat)
probd = classifier.prob_classify(myfeat)
print probd.prob('neg')
print probd.prob('pos')
donetime = time.time() - starttime
print donetime