He created a spider in Scrapy: items.py:
from scrapy.item import Item, Field
class dns_shopItem (Item):
# Define the fields for your item here like:
# Name = Field ()
id = Field ()
idd = Field ()
dns_shop_spider.py:
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.contrib.loader.processor import TakeFirst
from scrapy.contrib.loader import XPathItemLoader
from scrapy.selector import HtmlXPathSelector
from dns_shop.items import dns_shopItem
class dns_shopLoader (XPathItemLoader):
default_output_processor = TakeFirst ()
class dns_shopSpider (CrawlSpider):
name = "dns_shop_spider"
allowed_domains = ["www.playground.ru"]
start_urls = ["http://www.playground.ru/files/stalker_clear_sky/"]
rules = (
Rule (SgmlLinkExtractor (allow = ('/ files / s_t_a_l_k_e_r_chistoe_nebo')), follow = True),
Rule (SgmlLinkExtractor (allow = ('/ files / s_t_a_l_k_e_r_chistoe_nebo')), callback = 'parse_item'),
)
def parse_item (self, response):
hxs = HtmlXPathSelector (response)
l = dns_shopLoader (dns_shopItem (), hxs)
l.add_xpath ('id', "/ html / body / table [2] / tbody / tr [5] / td [2] / table / tbody / tr / td / div [6] / h1/text ()" )
l.add_xpath ('idd', "/ / html / body / table [2] / tbody / tr [5] / td [2] / table / tbody / tr / td / div [6] / h1/text () ")
return l.load_item ()
Run the following command:
scrapy crawl dns_shop_spider-o scarped_data_utf8.csv-t csv
This log shows that Scrapy through all the necessary url, but why not write to the specified file when you start the spider. In what could be the problem?