I am having difficulty to achieve my scraper (I took the initial example code from here[selenium with scrapy for dynamic page from @alecxe, and completed for getting some results, but if the scraper seems to lauch (we can observe the simulation of clicking the next button), it shuts down one second after and doesn't print or get anything in the items.
Here is the code
from scrapy.spider import BaseSpider
from selenium import webdriver
class product_spiderItem(scrapy.Item):
title = scrapy.Field()
price=scrapy.Field()
pass
class ProductSpider(BaseSpider):
name = "product_spider"
allowed_domains = ['ebay.com']
start_urls = ['http://www.ebay.com/sch/i.html?_odkw=books&_osacat=0&_trksid=p2045573.m570.l1313.TR0.TRC0.Xpython&_nkw=python&_sacat=0&_from=R40']
def __init__(self):
self.driver = webdriver.Firefox()
def parse(self, response):
self.driver.get(response.url)
while True:
next = self.driver.find_element_by_xpath('//td[@class="pagn-next"]/a')
try:
next.click()
# get the data and write it to scrapy items
response = TextResponse(url=response.url, body=self.driver.page_source, encoding='utf-8')
print response.url
for prod in response.xpath('//ul[@id="GalleryViewInner"]/li/div/div'):
item = product_spiderItem()
item['title'] = prod.xpath('.//div[@class="gvtitle"]/h3/a/text()').extract()[0]
item['price'] = prid.xpath('.//div[@class="prices"]/span[@class="bold"]/text()').extract()[0]
print item['price']
yield item
except:
break
self.driver.close()
I use scrapy crawl product_scraper -o products.json, to store results.what am i missing?