I'm working on a crawler and I have to save the output in a csv file.
Here is my code:
import scrapy
class ArticleSpider(scrapy.Spider):
name = "article"
def start_requests(self):
urls = [
'https://www.topart-online.com/de/Ahorn-japan.%2C-70cm%2C--36-Blaetter----Herbst/c-KAT282/a-150001HE'
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
page = response.url.split("/")[-1]
filename = 'article-%s.html' % page
with open(filename, 'wb') as f:
f.write(response.body)
self.log('Saved file %s' % filename)
def parse(self, response):
yield{
'title': response.xpath('//h1[@class="text-center text-md-left mt-0"]/text()').get(),
'quantity': response.xpath('//div[@class="col-6"]/text()')[0].get().strip(),
'delivery_status': response.xpath('//div[@class="availabilitydeliverytime"]/text()').get().replace('/','').strip(),
'itemattr': response.xpath('//div[@class="productcustomattrdesc word-break col-6"]/text()').getall(),
'itemvalues': response.xpath('//div[@class="col-6"]/text()').getall()
}
My question is:
How can I output itemattr
and itemvalues
in the correct order? So I can see for example: Umkarton(itemattr) 20/20/20(dimension of a Umkarton)