I wanted to call the function spider which is within a class with the parameters such as url,word and maxPages.
when I try calling it the following way I get an error because spider() gets more than 3 arguments (it gets 4 arguments instead).
Please can someone guide me as to how I can call the function which is within a class correctly.
My code looks like this:
import HTMLParser
from urllib2 import urlopen
from pandas.io.parsers import TextParser
class LinkParser(HTMLParser.HTMLParser):
#other methods
def spider(url,word,maxPages):
pagesTovisit = [url]
numberVisited=0
foundWord = False
maxPages = 0
while numberVisited < maxPages and pagesTovisit != [] and not foundWord:
numberVisited = numberVisited +1
url = pagesTovisit[0]
pagesTovisit = pagesTovisit[1:]
try:
print numberVisited, "Visiting:", url
parser = LinkParser()
data, links = parser.getLinks(url)
if data.find(word)>-1:
foundWord = True
pagesTovisit = pagesTovisit +links
print "Success"
except:
print "failed"
if foundWord:
print "the word",word,"was found at",url
else:
print "word not found"
url = raw_input("enter the url: ")
word = raw_input("enter the word to search for: ")
maxPages = raw_input("the max pages you want to search in for are: ")
lp=LinkParser()
lp.spider(url,word,maxPages)