import json
import codecs
from urllib.request import urlopen
url = "https://translate.yandex.net/api/v1.5/tr.json/translate?key=myapikeyhere&lang=hr-en&text="
word_list = codecs.open("inputfile.txt", encoding='utf-8').read().split(" ")
recnik = codecs.open('translation.txt', encoding='utf-8', mode='w')
for search_term in word_list:
response = urlopen(url+search_term)
translation = json.loads(response.readall().decode('utf-8'))
translation = translation['text'][0]
one_string = search_term + " " + translation + '\n'
recnik.write(one_string)
I'm trying to translate a few words I have in a text file using yandex.ru's API. After the first few words are translated I get the following error:
UnicodeEncodeError: 'ascii' codec can't encode character '\u030c' in position 142: ordinal not in range(128)
It happens when it encounters the first word in my wordlist with a non ascii character. As far as I know I'm doing everything in unicode, but I am probably missing something obvious and I would appreciate someone else's eyes on this.
Error occurs on this line:
response = urlopen(url+search_term)
EDIT:
The solution I used in case it can help somebody in the future:
import json
import codecs
from urllib.request import urlopen
from urllib.parse import quote
url = "https://translate.yandex.net/api/v1.5/tr.json/translate?key=myapikey=hr-en&text="
word_list = codecs.open("inputfile.txt", encoding='utf-8').read().split(" ")
recnik = codecs.open('translation.txt', encoding='utf-8', mode='w')
for search_term in word_list:
send = url+quote(search_term)
response = urlopen(send)
translation = json.loads(response.readall().decode('utf-8'))
translation = translation['text'][0]
one_string = search_term + " " + translation
print(one_string)
one_string += '\n'
recnik.write(one_string)