I have wrote a spider to scrap a few elements from a website but the problem is i am unable to fetch some of the elements and some are working fine. Please help me in right direction.
Here is my spider code:
from scrapy.selector import Selector
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from ScrapyScraper.items import ScrapyscraperItem
class ScrapyscraperSpider(CrawlSpider) :
name = "rs"
allowed_domains = ["mega.pk"]
start_urls = ["http://www.mega.pk/mobiles/"]
rules = (
Rule(SgmlLinkExtractor(allow = ("http://www\.mega\.pk/mobiles_products/[0-9]+\/[a-zA-Z-0-9.]+",)), callback = 'parse_item', follow = True),
)
def parse_item(self, response) :
sel = Selector(response)
item = ScrapyscraperItem()
item['Heading'] = sel.xpath('//*[@id="main1"]/div[1]/div[1]/div/div[2]/div[2]/div/div[1]/h2/span/text()').extract()
item['Content'] = sel.xpath('//*[@id="main1"]/div[1]/div[1]/div/div[2]/div[2]/div/p/text()').extract()
item['Price'] = sel.xpath('//*[@id="main1"]/div[1]/div[1]/div/div[2]/div[2]/div/div[2]/div[1]/div[2]/span/text()').extract()
item['WiFi'] = sel.xpath('//*[@id="laptop_detail"]/tbody/tr/td[contains(. ,"Wireless")]/text()').extract()
return item
Now i am able to get Heading, Content and Price but Wifi returns nothing. The point where i get totally confused is that the same xpath works in chrome and not in python(scrapy).