I'm trying to build a for each for products so I want to scrape one by one from an array and I would also like to know where to place my for each.
The array i want to use it's called EAN.
import scrapy
import re
import MySQLdb
class ProductSpider(scrapy.Spider):
db = MySQLdb.connect(host="localhost", # Host name
user="root", # User Name
passwd="", # Passwoord
db="ProductSpider") # Database name
cur = db.cursor()
cur.execute("SELECT EAN FROM product")
name = 'product'
EAN = []
rows = cur.fetchall()
for row in rows:
EAN = (row[0])
# print(row) #activate to see EAN codes.
start_urls = ['https://www.google.nl/search?client=opera&biw=1880&bih=1008&output=search&tbm=shop&q='+EAN+'&oq='+EAN+'&gs_l=products-cc.12...0.0.0.2112.0.0.0.0.0.0.0.0..0.0....0...1ac..64.products-cc..0.0.0....0.Mgj-aNT06E4']
custom_settings = {
'FEED_URI': 'tmp/' + EAN + '.csv'
}
Here is what I've made.
for EAN in range(len(EAN)): #forloop afmaken
EAN.append('EAN')
print(EAN)
def parse(self, response):
urls = response.css('.MCpGKc > a::attr("href")').extract()
for url in urls:
url = response.urljoin(url)
yield scrapy.Request(url, callback=self.parse)
response.selector.remove_namespaces()
all_sellers = response.css(".os-seller-name-primary > a::text").extract()
all_prices = response.css("td.os-total-col::text").re("\d+\,\d{1,2}")
all_urls= response.css(".os-seller-name-primary > a::attr('href')").extract()
for item in zip(all_prices, all_sellers, all_urls):
scrapped_info = {
'price': item[0],
'seller': item[1],
'url' : item[2]
}
yield scrapped_info
next_page_url = response.css('.pag-prev-next-links > a:last-child::attr(href)').extract_first()
if next_page_url:
next_page_url = response.urljoin(next_page_url)
yield scrapy.Request(url=next_page_url, callback=self.parse)