0

link_finder has:

from HTMLParser import HTMLParser
from urlparse import urlparse
from urlparse import urljoin

# create a subclass and override the handler methods
class LinkFinder(HTMLParser):

    def __init__(self, base_url, page_url):
        self.base_url = base_url
        self.page_url = page_url
        self.links = set()



    def handle_starttag(self, tag, attrs):
        if tag == 'a':
            for (attribute, value) in attrs:
                if attribute == 'href':
                    url = urlparse.urljoin(self.base_url, value)
                    self.links.add(url)

     def page_url(self):
        return self.links

finder = LinkFinder()
finder.feed('<HTMLParser><head><title>Test</title></head>'
            '<body><h1>Parse me!</h1></body></html>')

And I got this one:

def handle_starttag(self, tag, attrs):
                                         ^
IndentationError: unindent does not match any outer indentation level

I checked once again, but he was not really friendly to me? Any help? And do I have to improt urljoin while import urlparse is all ready there?

1 Answers1

0

There's an extra space before the def page_url(self): method, just remove this space to fix the issue.

Here's the correctly indented code:

from HTMLParser import HTMLParser
from urlparse import urlparse
from urlparse import urljoin

# create a subclass and override the handler methods
class LinkFinder(HTMLParser):

    def __init__(self, base_url, page_url):
        self.base_url = base_url
        self.page_url = page_url
        self.links = set()



    def handle_starttag(self, tag, attrs):
        if tag == 'a':
            for (attribute, value) in attrs:
                if attribute == 'href':
                    url = urlparse.urljoin(self.base_url, value)
                    self.links.add(url)

    def page_url(self):
        return self.links

finder = LinkFinder()
finder.feed('<HTMLParser><head><title>Test</title></head>'
            '<body><h1>Parse me!</h1></body></html>')
Abdul Rehman
  • 5,326
  • 9
  • 77
  • 150