I am trying to apply the doc2vec tutorial and instead of testing on a random test corpus document, testing on the entire test corpus
I just modified the following line:
code:
# Pick a random document from the test corpus and infer a vector from the model
#doc_id = random.randint(0, len(test_corpus) - 1)
doc_id = [index for index, text in enumerate(test_corpus)]
inferred_vector = model.infer_vector(test_corpus[doc_id])
sims = model.docvecs.most_similar([inferred_vector], topn=len(model.docvecs))
# Compare and print the most/median/least similar documents from the train corpus
print('Test Document ({}): «{}»\n'.format(doc_id, ' '.join(test_corpus[doc_id])))
print(u'SIMILAR/DISSIMILAR DOCS PER MODEL %s:\n' % model)
for label, index in [('MOST', 0), ('MEDIAN', len(sims)//2), ('LEAST', len(sims) - 1)]:
print(u'%s %s: «%s»\n' % (label, sims[index], ' '.join(train_corpus[sims[index][0]].words)))
Error:
TypeError: list indices must be integers or slices, not list