0

Trying to setup simple example to traverse the requests in priority order. I am creating a random priority order and attach it to the request. The second function get_link_title, output the names to check if the order of priority is respected. And it is not. Can't figure what I am doing wrong:

from tutorial.items import TutorialItem
from scrapy.http import Request
import random

class DmozSpider(BaseSpider):
    name = "dmoz"
    start_urls = [
            "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/"
            ]

    def parse(self, response):
        #filename = response.url.split('/')[-2]
        #open(filename, 'wb').write(response.body)
        tree = HtmlXPathSelector(response)
        sites = tree.xpath('//ul[@class="directory-url"]/li')
        items = []
        reqs = []

        for site in sites:
            item = TutorialItem()
            item['name'] = site.xpath('a/text()').extract()[0]
            item['url'] = site.xpath('a/@href').extract()[0]
            item['description'] = site.xpath('./text()').re('-\s([^\n]*?)\\n')[0]
            items.append(item)
            pty = random.randint(0, 100)
            print 'From first page', item['name'], pty
            reqs.append(Request(item['url'], meta={'item':item}, callback=self.get_link_title, priority=pty))
        return reqs

    def get_link_title(self, response):
        item = response.meta['item']
        print 'Title for', item['name']
        tree = HtmlXPathSelector(response)
        item['title_link'] = tree.xpath('//title/text()').extract()[0]
        return item
user2016508
  • 418
  • 1
  • 3
  • 12

0 Answers0