I'm trying to get my data to output side by side, name and e-mail. Right now it just prints the e-mails and then the names.
Here's my code:
import re
import nltk
from nltk.corpus import stopwords
stop = stopwords.words('english')
inputfile = open('/Users/jchome/Downloads/StockXRF/untitled.txt','r')
string = inputfile.read()
def extract_email_addresses(string):
r = re.compile(r'[\w\.-]+@[\w\.-]+')
return r.findall(string)
def ie_preprocess(document):
document = ' '.join([i for i in document.split() if i not in stop])
sentences = nltk.sent_tokenize(document)
sentences = [nltk.word_tokenize(sent) for sent in sentences]
sentences = [nltk.pos_tag(sent) for sent in sentences]
return sentences
def extract_names(document):
names = []
sentences = ie_preprocess(document)
for tagged_sentence in sentences:
for chunk in nltk.ne_chunk(tagged_sentence):
if type(chunk) == nltk.tree.Tree:
if chunk.label() == 'PERSON':
names.append(' '.join([c[0] for c in chunk]))
return names
if __name__ == '__main__':
emails = extract_email_addresses(string)
names = extract_names(string)
print (emails + names)
output:
['1lawrencenage1l@gmail.com', george@gmail.com, 'Lawrence', 'George']
How can I put the output next to each other and write to a text file?