I'm creating a web crawler that is capturing data from a website and then inserting it into my database. I'm using scrapy and mysql. I created the following code:
pipelines.py:
class MySQLStorePipeline(object): def __init__(self): self.conn = MySQLdb.connect(host ='localhost', user ='root', passwd ='', db ='imoveis', charset="utf8", use_unicode=True) self.cursor = self.conn.cursor() def process_item(self, item, spider): try: self.cursor.execute("""INSERT INTO imovel (Titulo, Tipo_Negocio, Preco, Localizacao, Tipo_Imovel, Condicao, Numero_Divisoes, Numero_Quartos, Numero_Casas_Banho, Certificado_Energetico, Ano_Construcao, Area_Util, Area_Bruta, Piso) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", (item['Titulo'], item['Tipo_Negocio'], item['Preco'], item['Localizacao'], item['Tipo_Imovel'], item['Condicao'], item['Numero_Divisoes'], item['Numero_Quartos'], item['Numero_Casas_Banho'], item['Certificado_Energetico'], item['Ano_Construcao'], item['Area_Util'], item['Area_Bruta'], item['Piso'])) self.conn.commit() except MySQLdb.Error as e: print('Error %d: %s' % (e.args[0], e.args[1])) sys.exit(1) return item
settings.py:
BOT_NAME = 'novo' SPIDER_MODULES = ['novo.spiders'] NEWSPIDER_MODULE = 'novo.spiders' FEED_EXPORT_ENCODING = 'utf-8' ITEM_PIPELINES = { 'novo.pipelines.MySQLStorePipeline' : 300 } ROBOTSTXT_OBEY = True
crawler.py:
class SapoSpider(scrapy.Spider): name = "imoveis" allowed_domains = ["maisconsultores.pt"] start_urls = ["https://www.maisconsultores.pt/properties?page=%d&s=eedce" % i for i in range(23)] def parse(self,response): subpage_links = [] for i in response.css('div.item.col-sm-4'): youritem = { 'Titulo':i.css('div[class=image] h3::text').extract(), 'Tipo_Negocio':i.css('div.price::text').re('[^\t\n\r\a]+'), } subpage_link = i.css('div[class=image] a::attr(href)').extract_first() full_url = response.urljoin(subpage_link) yield scrapy.Request(full_url, callback=self.parse_subpage, meta={'item':youritem}) def parse_subpage(self,response): youritem = response.meta.get('item') youritem['Tipo_Imovel'] = response.xpath('//ul[@class="amenities"]//li[1]/text()').extract() youritem['Condicao'] = response.xpath('//ul[@class="amenities"]//li[2]/text()').extract() yield youritem
The error that appears when I run scrapy is this:
_mysql_exceptions.OperationalError: (1241, 'Operand should contain 1 column(s)')
I really dont know or cant see what I am missing here. I would really appreciate if u guys could help me out.