from bs4 import BeautifulSoup
import urllib2
opening Nytimes and reading the page
response = urllib2.urlopen('http://www.nytimes.com').read()
soup=BeautifulSoup(response)
data = []
I am taking all the headings on the homepage and taking them in to a list
for story_heading in soup.find_all(class_="story-heading"):
story_title = story_heading.text.replace("\n", "").strip()
new_story_title = story_title.encode('utf-8')
im converting the words of each title into a list
words = new_story_title.split()
data.append(words)
print data
Now, I want to remove the numbers in this text how can i do it?