I have some problems with the encoding text, inside a JSON, from a translation with Google Translate API and I'm a neophyte with Python and Google API too.
Below you can find a rudimental script that extract from a CSV the ID of a structure, SELECT the english description from the db and try to write the translated description in another table.
After the translation part:
t = service.translations().list(source='%s' % trans, \
target='%s' % lang, q=[message2t]).execute()
translated = t['translations'][0]['translatedText']
I have my unicode variable translated
with dirty chars in it (I have this problem with language like German or French). I don't know how to obtain the right chars.
Indeed, when I'm trying to write the string into the database, I obtain this error:
UnicodeEncodeError: 'ascii' codec can't encode character u'\xfc' in position 225: ordinal not in range(128)
This is the full rudimental code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from googleapiclient.discovery import build
import _mysql
import time
trans = 'en'
langs = [ 'de', 'dk', 'es', 'fr', 'it', 'nl', 'no', 'pg', 'pl', 'sw' ]
# Google API Environment
key = 'MYKEY'
service = build('translate', 'v2', developerKey=key)
# Open DB connection
db = _mysql.connect(user='MYUSER',
passwd='MYPASSWORD',
host='MYRDS',
port=3306,
db='MYDB')
for lang in langs:
print 'Finding structures w/o description in {} language'.format(lang.upper())
with open('nodesc_%s.csv' % lang, 'r') as structures:
for structure in structures:
id_str = structure.split('\t')[0]
text2t = """SELECT `text` FROM `texts` WHERE
`str_ID`='%s' AND
`type`='description' AND
`lang`='%s';""" % (id_str, trans)
db.query(text2t)
r = db.store_result()
message2t = r.fetch_row()[0][0]
# Check if there is a description for real
if len(message2t) is not 0:
t = service.translations().list(source='%s' % trans, \
target='%s' % lang, q=[message2t]).execute()
translated = t['translations'][0]['translatedText']
now = time.strftime("%Y-%m-%d %H:%M:%S")
texttranslated = """INSERT INTO `descriptions`
(`ID_desc`, `ID_str`, `text`, `lang`, `human_date`, `google_date`)
VALUES (NULL, '%s', '%s', '%s', '0000-00-00 00:00:00', '%s')""" \
% (id_str, translated, lang, now)
db.query(texttranslated)
else:
print 'Structure with id {} have no description in english'.format(id_str)
# Close DB connection
db.close()