0

I'm trying to build a for each for products so I want to scrape one by one from an array and I would also like to know where to place my for each.

The array i want to use it's called EAN.

import scrapy
import re
import MySQLdb

class ProductSpider(scrapy.Spider):
    db = MySQLdb.connect(host="localhost",  # Host name
                         user="root",  # User Name
                         passwd="",  # Passwoord
                         db="ProductSpider")  # Database name
    cur = db.cursor()
    cur.execute("SELECT EAN FROM product")
    name = 'product'
    EAN = []
    rows = cur.fetchall()
    for row in rows:
        EAN = (row[0])
        # print(row) #activate to see EAN codes.
    start_urls = ['https://www.google.nl/search?client=opera&biw=1880&bih=1008&output=search&tbm=shop&q='+EAN+'&oq='+EAN+'&gs_l=products-cc.12...0.0.0.2112.0.0.0.0.0.0.0.0..0.0....0...1ac..64.products-cc..0.0.0....0.Mgj-aNT06E4']
    custom_settings = {
        'FEED_URI': 'tmp/' + EAN + '.csv'
    }

Here is what I've made.

    for EAN in range(len(EAN)):  #forloop afmaken
        EAN.append('EAN')
        print(EAN)

    def parse(self, response):
        urls = response.css('.MCpGKc > a::attr("href")').extract()
        for url in urls:
            url = response.urljoin(url)
            yield scrapy.Request(url, callback=self.parse)
        response.selector.remove_namespaces()
        all_sellers = response.css(".os-seller-name-primary > a::text").extract()
        all_prices = response.css("td.os-total-col::text").re("\d+\,\d{1,2}")
        all_urls= response.css(".os-seller-name-primary > a::attr('href')").extract()

        for item in zip(all_prices, all_sellers, all_urls):
            scrapped_info = {
                'price': item[0],
                'seller': item[1],
                'url' : item[2]
            }
            yield scrapped_info
        next_page_url = response.css('.pag-prev-next-links > a:last-child::attr(href)').extract_first()
        if next_page_url:
            next_page_url = response.urljoin(next_page_url)
            yield scrapy.Request(url=next_page_url, callback=self.parse)
Rousblack
  • 57
  • 1
  • 9
  • what is your question/problem ? – parik Apr 18 '18 at 10:33
  • I don´t know exactly how to build an for each in python and where to place it exactly I´m new to this – Rousblack Apr 18 '18 at 10:35
  • You need to put your looping that yields each URL to be scraped in a `def start_requests(self)` instead of in your class body. – Jon Clements Apr 18 '18 at 10:35
  • https://stackoverflow.com/questions/40346498/python-foreach-equivalent – parik Apr 18 '18 at 10:45
  • Does this answer your question? [Is there a 'foreach' function in Python 3?](https://stackoverflow.com/questions/18294534/is-there-a-foreach-function-in-python-3) – AMC Feb 08 '20 at 16:19

0 Answers0