I want to scrape a data from a website and want to insert that in mySQL workbench.
Here goes my goes:
Pipelines.py
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
from itemadapter import ItemAdapter
import mysql.connector
import mysql
class MojulePipeline(object):
def __init__(self):
self.create_connection()
self.create_table()
def create_connection(self):
self.conn = mysql.connector.connect(
host = 'localhost',
user = 'root',
passwd = 'asdfasdf',
database = 'refq'
)
self.curr = self.conn.cursor()
def create_table(self):
# self.curr.execute("DROP TABLE IF EXISTS refqt")
self.curr.execute("""create table refqt(
subtitle text,
question text,
canswer text,
wanswer1 text,
wanswer2 text,
wanswer3 text,
)""")
def process_item(self, item, spider):
self.store_db(item)
return item
def store_db(self,item):
self.curr.execute("""insert into refqt values (%s,%s,%s,%s,%s,%s)""", (
item['subtitle'][0],
item['question'][0],
item['canswer'][0],
item['wanswer1'][0],
item['wanswer2'][0],
item['wanswer3'][0],
))
self.conn.commit()
And then items.py
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
class MojuleItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
subtitle = scrapy.Field()
question = scrapy.Field()
canswer = scrapy.Field()
wanswer1 = scrapy.Field()
wanswer2 = scrapy.Field()
wanswer3 = scrapy.Field()
my_spider.py
import scrapy
from ..items import MojuleItem
class MojuleSpider(scrapy.Spider):
name = 'mojule'
start_urls = ['http://quotes.toscrape.com/']
def parse(self,response):
items = MojuleItem()
all_div_quotes = response.css('div.group')
for quotes in all_div_quotes:
subtitle = quotes.css('ol.ques h4::text').extract()
question = quotes.css('ol.ques li p::text').extract()
canswer = quotes.css('ol.choice li:nth-child(1)::text').extract()
wanswer1 = quotes.css('ol.choice li:nth-child(2)::text').extract()
wanswer2 = quotes.css('ol.choice li:nth-child(3)::text').extract()
wanswer3 = quotes.css('ol.choice li:nth-child(4)::text').extract()
items['subtitle'] = subtitle
items['question'] = question
items['canswer'] = canswer
items['wanswer1'] = wanswer1
items['wanswer2'] = wanswer2
items['wanswer3'] = wanswer3
# yield items
yield items
I am able to fetch all the data and can see the output. But when I'm trying to insert it is showing a lot of error.
Error goes like:
2020-07-07 10:45:06 [scrapy.utils.log] INFO: Scrapy 2.2.0 started (bot: mojule)
2020-07-07 10:45:06 [scrapy.utils.log] INFO: Versions: lxml 4.5.1.0, libxml2 2.9.10, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.8.3 (v3.8.3:6f8c8320e9, May 13 2020, 16:29:34) - [Clang 6.0 (clang-600.0.57)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1g 21 Apr 2020), cryptography 2.9.2, Platform macOS-10.15.5-x86_64-i386-64bit
2020-07-07 10:45:06 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2020-07-07 10:45:06 [scrapy.crawler] INFO: Overridden settings:
{'BOT_NAME': 'mojule',
'NEWSPIDER_MODULE': 'mojule.spiders',
'ROBOTSTXT_OBEY': True,
'SPIDER_MODULES': ['mojule.spiders']}
2020-07-07 10:45:06 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
'scrapy.extensions.memusage.MemoryUsage',
'scrapy.extensions.logstats.LogStats']
2020-07-07 10:45:06 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware',
'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
'scrapy.downloadermiddlewares.retry.RetryMiddleware',
'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware',
'scrapy.downloadermiddlewares.redirect.RedirectMiddleware',
'scrapy.downloadermiddlewares.cookies.CookiesMiddleware',
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware',
'scrapy.downloadermiddlewares.stats.DownloaderStats']
2020-07-07 10:45:06 [scrapy.middleware] INFO: Enabled spider middlewares:
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
'scrapy.spidermiddlewares.referer.RefererMiddleware',
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
'scrapy.spidermiddlewares.depth.DepthMiddleware']
Unhandled error in Deferred:
2020-07-07 10:45:06 [twisted] CRITICAL: Unhandled error in Deferred:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/crawler.py", line 192, in crawl
return self._crawl(crawler, *args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/crawler.py", line 196, in _crawl
d = crawler.crawl(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/twisted/internet/defer.py", line 1613, in unwindGenerator
return _cancellableInlineCallbacks(gen)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/twisted/internet/defer.py", line 1529, in _cancellableInlineCallbacks
_inlineCallbacks(None, g, status)
--- <exception caught here> ---
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
result = g.send(result)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/crawler.py", line 87, in crawl
self.engine = self._create_engine()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/crawler.py", line 101, in _create_engine
return ExecutionEngine(self, lambda _: self.stop())
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/core/engine.py", line 70, in __init__
self.scraper = Scraper(crawler)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/core/scraper.py", line 71, in __init__
self.itemproc = itemproc_cls.from_crawler(crawler)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/middleware.py", line 53, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/middleware.py", line 35, in from_settings
mw = create_instance(mwcls, settings, crawler)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/utils/misc.py", line 156, in create_instance
instance = objcls(*args, **kwargs)
File "/Users/davidM/Dev/scrapie/mojule/mojule/pipelines.py", line 16, in __init__
self.create_connection()
File "/Users/davidM/Dev/scrapie/mojule/mojule/pipelines.py", line 20, in create_connection
self.conn = mysql.connector.connect(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/__init__.py", line 179, in connect
return MySQLConnection(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/connection.py", line 95, in __init__
self.connect(**kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/abstracts.py", line 716, in connect
self._open_connection()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/connection.py", line 208, in _open_connection
self._do_auth(self._user, self._password,
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/connection.py", line 137, in _do_auth
packet = self._protocol.make_auth(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/protocol.py", line 99, in make_auth
packet += self._auth_response(client_flags, username, password,
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/protocol.py", line 58, in _auth_response
auth = get_auth_plugin(auth_plugin)(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/authentication.py", line 190, in get_auth_plugin
raise errors.NotSupportedError(
mysql.connector.errors.NotSupportedError: Authentication plugin 'caching_sha2_password' is not supported
2020-07-07 10:45:06 [twisted] CRITICAL:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
result = g.send(result)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/crawler.py", line 87, in crawl
self.engine = self._create_engine()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/crawler.py", line 101, in _create_engine
return ExecutionEngine(self, lambda _: self.stop())
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/core/engine.py", line 70, in __init__
self.scraper = Scraper(crawler)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/core/scraper.py", line 71, in __init__
self.itemproc = itemproc_cls.from_crawler(crawler)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/middleware.py", line 53, in from_crawler
return cls.from_settings(crawler.settings, crawler)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/middleware.py", line 35, in from_settings
mw = create_instance(mwcls, settings, crawler)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/scrapy/utils/misc.py", line 156, in create_instance
instance = objcls(*args, **kwargs)
File "/Users/davidM/Dev/scrapie/mojule/mojule/pipelines.py", line 16, in __init__
self.create_connection()
File "/Users/davidM/Dev/scrapie/mojule/mojule/pipelines.py", line 20, in create_connection
self.conn = mysql.connector.connect(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/__init__.py", line 179, in connect
return MySQLConnection(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/connection.py", line 95, in __init__
self.connect(**kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/abstracts.py", line 716, in connect
self._open_connection()
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/connection.py", line 208, in _open_connection
self._do_auth(self._user, self._password,
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/connection.py", line 137, in _do_auth
packet = self._protocol.make_auth(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/protocol.py", line 99, in make_auth
packet += self._auth_response(client_flags, username, password,
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/protocol.py", line 58, in _auth_response
auth = get_auth_plugin(auth_plugin)(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/mysql/connector/authentication.py", line 190, in get_auth_plugin
raise errors.NotSupportedError(
mysql.connector.errors.NotSupportedError: Authentication plugin 'caching_sha2_password' is not supported
I am in learning phase and wrote the code from a YouTube video. There it was working and when I tried to type step by step still it is not working. Please help I'm entirely new to scrapy and all this. I've also tried to change the interpreter but it was not working. Tried different solutions, still no solution.