0

Hi this is my first time using Python to code a machine learning modell

I tried creating an image classification model using pandas For this, I used JupyterNotebook

I wrote the following code:

data = pd.read_csv('mnist.csv')

This is the error which shows:

UnicodeDecodeError                        Traceback (most recent call last)
Cell In[19], line 2
      1 #using pandas to read the database stored in the same folder
----> 2 data = pd.read_csv('mnist.csv')

File ~\anaconda3\Lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
    209     else:
    210         kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)

File ~\anaconda3\Lib\site-packages\pandas\util\_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
    325 if len(args) > num_allow_args:
    326     warnings.warn(
    327         msg.format(arguments=_format_argument_list(allow_args)),
    328         FutureWarning,
    329         stacklevel=find_stack_level(),
    330     )
--> 331 return func(*args, **kwargs)

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    935 kwds_defaults = _refine_defaults_read(
    936     dialect,
    937     delimiter,
   (...)
    946     defaults={"delimiter": ","},
    947 )
    948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:605, in _read(filepath_or_buffer, kwds)
    602 _validate_names(kwds.get("names", None))
    604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
    607 if chunksize or iterator:
    608     return parser

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
   1439     self.options["has_index_names"] = kwds["has_index_names"]
   1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\readers.py:1753, in TextFileReader._make_engine(self, f, engine)
   1750     raise ValueError(msg)
   1752 try:
-> 1753     return mapping[engine](f, **self.options)
   1754 except Exception:
   1755     if self.handles is not None:

File ~\anaconda3\Lib\site-packages\pandas\io\parsers\c_parser_wrapper.py:79, in CParserWrapper.__init__(self, src, **kwds)
     76     kwds.pop(key, None)
     78 kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
---> 79 self._reader = parsers.TextReader(src, **kwds)
     81 self.unnamed_cols = self._reader.unnamed_cols
     83 # error: Cannot determine type of 'names'

File ~\anaconda3\Lib\site-packages\pandas\_libs\parsers.pyx:547, in pandas._libs.parsers.TextReader.__cinit__()

File ~\anaconda3\Lib\site-packages\pandas\_libs\parsers.pyx:636, in pandas._libs.parsers.TextReader._get_header()

File ~\anaconda3\Lib\site-packages\pandas\_libs\parsers.pyx:852, in pandas._libs.parsers.TextReader._tokenize_rows()

File ~\anaconda3\Lib\site-packages\pandas\_libs\parsers.pyx:1965, in pandas._libs.parsers.raise_parser_error()

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x9d in position 10: invalid start byte
Aarush K
  • 3
  • 2
  • Check if the csv is zipped, e.g. add `compression="gz"` as additional keyword. Describing where you got the data would help. – Daraan Jul 26 '23 at 18:00
  • 2
    Search for the error message, literally hundreds of hits. Also, for a better understanding, you should extract a [mcve] to include in your question. As a new user here, please also take the [tour] and read [ask]. – Ulrich Eckhardt Jul 26 '23 at 20:45

0 Answers0