0
from scrapy.spider import BaseSpider
from project.items import QualificationItem
from scrapy.selector import HtmlXPathSelector
from scrapy.http.request import Request
from urlparse import urljoin

USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64; rv:27.0) Gecko/20100101 Firefox/27.0'

class recursiveSpider(BaseSpider):
name = 'bristol'
allowed_domains = ['bristol.ac.uk/']
start_urls = ['http://www.bristol.ac.uk/international/countries/']

def parse(self, response):
    hxs = HtmlXPathSelector(response)

    xpath = '//*[@id="all-countries"]/li/ul/li/a/@href'
    a_of_the_link = '//*[@id="all-countries"]/li/ul/li/a/text()'
    for text, link in zip(hxs.select(a_of_the_link).extract(), hxs.select(xpath).extract()):
        yield Request(urljoin(response.url, link),
        meta={'a_of_the_link': text},
        headers={'User-Agent': USER_AGENT},
        callback=self.parse_linkpage,
        dont_filter=True)

def parse_linkpage(self, response):
    hxs = HtmlXPathSelector(response)
    item = QualificationItem()
    xpath = """
            //h2[normalize-space(.)="Entry requirements for undergraduate courses"]
             /following-sibling::p[not(preceding-sibling::h2[normalize-space(.)!="Entry requirements for undergraduate courses"])]
            """
    item['Qualification'] = hxs.select(xpath).extract()[1:]
    item['Country'] = response.meta['a_of_the_link']
    return item

Here is my code that scrapes the different country's entry requirements, I can export the results into a csv file then manually upload them to MySQL but I was wondering if there is a way to do it automatically. I already have a MySQL database set up.

Dyl10
  • 161
  • 2
  • 11
  • Err, do you just mean [How do I connect to MySQL database using Python](http://stackoverflow.com/questions/372885/how-do-i-connect-to-a-mysql-database-in-python)? – Two-Bit Alchemist Mar 27 '14 at 21:52
  • Write your own custom item pipeline, like this example: http://doc.scrapy.org/en/latest/topics/item-pipeline.html#write-items-to-a-json-file – Talvalin Mar 28 '14 at 11:32

0 Answers0