I am trying to scrape http://www.sueryder.org/Get-involved/Volunteering/All-Roles, as you can see if you click on the second page the URL of the page doesn't change and it is processed through javascript. I've been trying to use the network tab in "inspect element" but I am completely lost. I managed to scrape the first page of the website and here's the code.
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from scrapy_demo.items import ScrapyDemoItem
class MySpider(BaseSpider):
name = "test"
allowed_domains = ["sueryder.org"]
start_urls = ["http://www.sueryder.org/Get-involved/Volunteering/All-Roles"]
def parse(self, response):
hxs = HtmlXPathSelector(response)
titles = hxs.select('//tr')
items = []
for titles in titles:
item = ScrapyDemoItem()
item ["link"] = titles.select('td/text()').extract()
items.append(item)
return items