I'm trying to parse a Russian web-site (in Cyrillic) and insert data to a mySQL DB. The parsing is fine, but I can't save the data in the DB because of the Cyrillic letters. Python give me this error:
Traceback (most recent call last):
File "/Users/kr/PycharmProjects/education_py/vape_map.py", line 40, in <module>
print parse_shop_meta()
File "/Users/kr/PycharmProjects/education_py/vape_map.py", line 35, in parse_shop_meta
VALUES (%s, %s, %s, %s)""",(shop_title, shop_address, shop_phone, shop_site, shop_desc))
File "/Library/Python/2.7/site-packages/MySQLdb/cursors.py", line 210, in execute
query = query % args
TypeError: not all arguments converted during string formatting
My code:
# -- coding: utf-8 --
import requests
from lxml.html import fromstring
import csv
import MySQLdb
db = MySQLdb.connect(host="localhost", user="root", passwd="***", db="vape_map", charset='utf8')
def get_shop_urls():
i = 1
all_shop_urls = []
while i < 2:
url = requests.get("http://vapemap.ru/shop/?city=%D0%9C%D0%BE%D1%81%D0%BA%D0%B2%D0%B0&page={}".format(i))
page_html = fromstring(url.content)
shop_urls = page_html.xpath('//h3[@class="title"]/a/@href')
all_shop_urls += shop_urls
i +=1
return all_shop_urls
def parse_shop_meta():
shops_meta = []
csvfile = open('vape_shops.csv', 'wb')
writer = csv.writer(csvfile, quotechar='|', quoting=csv.QUOTE_ALL)
cursor = db.cursor()
for shop in get_shop_urls():
url = requests.get("http://vapemap.ru{}".format(shop), 'utf-8')
page_html = fromstring(url.content)
shop_title = page_html.xpath('//h1[@class="title"]/text()')
shop_address = page_html.xpath('//div[@class="address"]/text()')
shop_phone = page_html.xpath('//div[@class="phone"]/a/text()')
shop_site = page_html.xpath('//div[@class="site"]/a/text()')
shop_desc = page_html.xpath('//div[@class="shop-desc"]/text()')
sql = """INSERT INTO vape_shops(title, address, phone, site, description)
VALUES (%s, %s, %s, %s)""",(shop_title, shop_address, shop_phone, shop_site, shop_desc)
cursor.execute(sql, (shop_title[0], shop_address[0], shop_phone[0], shop_site[0], shop_desc[0]))
db.commit()
db.close()
return shops_meta
print parse_shop_meta()