Three ways on Python 2.7. Note that to open the files in Excel that program likes a UTF-8 BOM encoded at the start of the file. I write it manually in the brute force method, but the utf-8-sig
codec will handle it for you otherwise. Skip the BOM signature if you aren't dealing with lame editors (Windows Notepad) or Excel.
import csv
import codecs
import cStringIO
title = u'\u0410\u0434\u043c\u0438\u043d\u0438\u0441\u0442\u0440\u0430\u0442\u043e\u0440 \u0438\u043d\u0442\u0435\u0440\u043d\u0435\u0442-\u043c\u0430\u0433\u0430\u0437\u0438\u043d\u0430'
print(title)
# Brute force
with open('avito.csv','wb') as f:
f.write(u'\ufeff'.encode('utf8')) # writes "byte order mark" UTF-8 signature
writer=csv.writer(f)
writer.writerow([title.encode('utf8')])
# Example from the documentation for csv module
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8-sig", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
with open('avito2.csv','wb') as f:
w = UnicodeWriter(f)
w.writerow([title])
# 3rd party module, install from pip
import unicodecsv
with open('avito3.csv','wb') as f:
w = unicodecsv.writer(f,encoding='utf-8-sig')
w.writerow([title])