0
df = pd.read_csv('movies_metadata.csv')

I have gotten the following error back:

UnicodeDecodeError                        Traceback (most recent call last)
Input In [27], in <cell line: 1>()
----> 1 df = pd.read_csv('movies_metadata.csv')

File ~\anaconda3\lib\site-packages\pandas\util\_decorators.py:311, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
305 if len(args) > num_allow_args:
306     warnings.warn(
307         msg.format(arguments=arguments),
308         FutureWarning,
309         stacklevel=stacklevel,
310     )
--> 311 return func(*args, **kwargs)

File ~\anaconda3\lib\site-packages\pandas\io\parsers\readers.py:680, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
665 kwds_defaults = _refine_defaults_read(
666     dialect,
667     delimiter,
   (...)
676     defaults={"delimiter": ","},
677 )
678 kwds.update(kwds_defaults)
--> 680 return _read(filepath_or_buffer, kwds)

File ~\anaconda3\lib\site-packages\pandas\io\parsers\readers.py:575, in _read(filepath_or_buffer, kwds)
572 _validate_names(kwds.get("names", None))
574 # Create the parser.
--> 575 parser = TextFileReader(filepath_or_buffer, **kwds)
577 if chunksize or iterator:
578     return parser

File ~\anaconda3\lib\site-packages\pandas\io\parsers\readers.py:933, in TextFileReader.__init__(self, f, engine, **kwds)
930     self.options["has_index_names"] = kwds["has_index_names"]
932 self.handles: IOHandles | None = None
--> 933 self._engine = self._make_engine(f, self.engine)

File ~\anaconda3\lib\site-packages\pandas\io\parsers\readers.py:1235, in TextFileReader._make_engine(self, f, engine)
   1232     raise ValueError(msg)
   1234 try:
-> 1235     return mapping[engine](f, **self.options)
   1236 except Exception:
   1237     if self.handles is not None:

File ~\anaconda3\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py:75, in CParserWrapper.__init__(self, src, **kwds)
 72     kwds.pop(key, None)
 74 kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
---> 75 self._reader = parsers.TextReader(src, **kwds)
 77 self.unnamed_cols = self._reader.unnamed_cols
 79 # error: Cannot determine type of 'names'

File ~\anaconda3\lib\site-packages\pandas\_libs\parsers.pyx:544, in pandas._libs.parsers.TextReader.__cinit__()

File ~\anaconda3\lib\site-packages\pandas\_libs\parsers.pyx:633, in pandas._libs.parsers.TextReader._get_header()

File ~\anaconda3\lib\site-packages\pandas\_libs\parsers.pyx:847, in pandas._libs.parsers.TextReader._tokenize_rows()

File ~\anaconda3\lib\site-packages\pandas\_libs\parsers.pyx:1952, in pandas._libs.parsers.raise_parser_error()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe7 in position 2040: invalid continuation byte
Neo
  • 525
  • 3
  • 17
  • Btw, I just started with Python in school and I am therefore kinda clueless about many things. – Thierry Gilgen Oct 04 '22 at 08:24
  • Does this answer your question? [UnicodeDecodeError when reading CSV file in Pandas with Python](https://stackoverflow.com/questions/18171739/unicodedecodeerror-when-reading-csv-file-in-pandas-with-python) – AlexK Oct 07 '22 at 01:00
  • https://stackoverflow.com/questions/18171739/unicodedecodeerror-when-reading-csv-file-in-pandas-with-python – Mehmaam Oct 08 '22 at 12:34

1 Answers1

0

The error indicates that the read function has faced a character decoding error. The encoding describes how to interpret binary stream to extract data (ASCII, UTF-8, UTF-16, etc.).

According to the read function's prototype, it seems you can pass an encoding parameter to change the default value. Try passing a other encoding such as encoding="latin1". This might solve your issue.

Note that if you know the encoding of the file, you should rather use it instead of latin1 (if it is not latin1 ..).

Jib
  • 1,334
  • 1
  • 2
  • 12