Update: While the code in my original answer works I meanwhile release a small package at https://pypi.python.org/pypi/csv342 that provides a Python 3 like interface for Python 2. So independent of your Python version you can simply do an
import csv342 as csv
import io
with io.open('some.csv', 'r', encoding='utf-8', newline='') as csv_file:
for row in csv.reader(csv_file, delimiter='|'):
print(row)
Original answer: Here's a solution that even with Python 2 actually decodes the text to Unicode strings and consequently works with encodings other than UTF-8.
The code below defines a function csv_rows()
that returns the contents of a file as sequence of lists. Example usage:
for row in csv_rows('some.csv', encoding='iso-8859-15', delimiter='|'):
print(row)
Here are the two variants for csv_rows()
: one for Python 3+ and another for Python 2.6+. During runtime it automatically picks the proper variant. UTF8Recoder
and UnicodeReader
are verbatim copies of the examples in the Python 2.7 library documentation.
import csv
import io
import sys
if sys.version_info[0] >= 3:
# Python 3 variant.
def csv_rows(csv_path, encoding, **keywords):
with io.open(csv_path, 'r', newline='', encoding=encoding) as csv_file:
for row in csv.reader(csv_file, **keywords):
yield row
else:
# Python 2 variant.
import codecs
class UTF8Recoder:
"""
Iterator that reads an encoded stream and reencodes the input to UTF-8
"""
def __init__(self, f, encoding):
self.reader = codecs.getreader(encoding)(f)
def __iter__(self):
return self
def next(self):
return self.reader.next().encode("utf-8")
class UnicodeReader:
"""
A CSV reader which will iterate over lines in the CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
f = UTF8Recoder(f, encoding)
self.reader = csv.reader(f, dialect=dialect, **kwds)
def next(self):
row = self.reader.next()
return [unicode(s, "utf-8") for s in row]
def __iter__(self):
return self
def csv_rows(csv_path, encoding, **kwds):
with io.open(csv_path, 'rb') as csv_file:
for row in UnicodeReader(csv_file, encoding=encoding, **kwds):
yield row