I have a very basic python script that pulls from a text file of searches and returns the first URL from Google. I'm receiving an error when the google result contains a foreign character (such as montréal)
Ideally I'd like to include any character pulled regardless of language
import requests
from bs4 import BeautifulSoup
with open("searches.txt") as input: # look at each line in our input file
content = input.readlines()
content = [x.strip() for x in content] # and strip of newline characters
print '---' # some formatting so it looks nice in terminal and our output file
header = '<Query>, <Link>' + '\n' + '---------------' + '\n'
output = open("links.txt", "w") # open file we want to write to
output.write(header)
for x in content: # for each line in our input file
print x
query = x # search google for that query
goog_search = "https://www.google.co.uk/search?sclient=psy-ab&client=ubuntu&hs=k5b&channel=fs&biw=1366&bih=648&noj=1&q=" + query
r = requests.get(goog_search)
soup = BeautifulSoup(r.text, "html.parser") # parse so we just get the link
link = soup.find('cite').text
formatted = query + ', ' + link + '\n' # more output formatting
print query + ', ' + link
output.write(formatted)
output.close()
print '---'
error I'm receiving: UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 53: ordinal not in range(128)