link_finder has:
from HTMLParser import HTMLParser
from urlparse import urlparse
from urlparse import urljoin
# create a subclass and override the handler methods
class LinkFinder(HTMLParser):
def __init__(self, base_url, page_url):
self.base_url = base_url
self.page_url = page_url
self.links = set()
def handle_starttag(self, tag, attrs):
if tag == 'a':
for (attribute, value) in attrs:
if attribute == 'href':
url = urlparse.urljoin(self.base_url, value)
self.links.add(url)
def page_url(self):
return self.links
finder = LinkFinder()
finder.feed('<HTMLParser><head><title>Test</title></head>'
'<body><h1>Parse me!</h1></body></html>')
And I got this one:
def handle_starttag(self, tag, attrs):
^
IndentationError: unindent does not match any outer indentation level
I checked once again, but he was not really friendly to me? Any help?
And do I have to improt urljoin
while import urlparse
is all ready there?