im trying to crawl viagogo.com i want to crawl on each show from the page: http://www.viagogo.com/Concert-Tickets/Rock-and-Pop im able to get the show on the first page, but when im trying to move the next page it just doesnt crawl! here is my code:
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from viagogo.items import ViagogoItem
from scrapy.http import Request, FormRequest
class viagogoSpider(CrawlSpider):
name="viagogo"
allowed_domains=['viagogo.com']
start_urls = ["http://www.viagogo.com/Concert-Tickets/Rock-and-Pop"]
rules = (
# Running on pages
Rule(SgmlLinkExtractor(restrict_xpaths=('//*[@id="clientgridtable"]/div[2]/div[2]/div/ul/li[7]/a')), callback='Parse_Page', follow=True),
# Running on artists in title
Rule(SgmlLinkExtractor(restrict_xpaths=('//*[@id="clientgridtable"]/table/tbody')), callback='Parse_artists_Tickets', follow=True),
)
#all_list = response.xpath('//a[@class="t xs"]').extract()
def Parse_Page(self, response):
item = ViagogoItem()
item["title"] = response.xpath('//title/text()').extract()
item["link"] = response.url
print 'Page!' + response.url
yield Request(url=response.url, meta={'item': item}, callback=self.Parse_Page)
def Parse_artists_Tickets(self, response):
item = ViagogoItem()
item["title"] = response.xpath('//title/text()').extract()
item["link"] = response.url
print response.url
with open('viagogo_output', 'a') as f:
f.write(str(item["title"]) + '\n')
return item
i cannot understand what im doing wrong, but the output (inside the file) is only the first page shows..
thanks!