Issue with groupby.mean() - Python

Question

I am trying to find mean sales for each company in my data frame. This is what my data frame looks like:

   Company  Person  Sales

0 GOOG Sam 200 1 GOOG Charlie 120 2 MSFT Amy 340 3 MSFT Vanessa 124 4 FB Carl 243 5 FB Sarah 350

The course I am learning from uses the exact code I present below and does not seem to have an issue. However, the course is a few years old at this point.

The code I'm using is as follows:

data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}

df = pd.DataFrame(data)

byComp = df.groupby('Company')

byComp.mean()

I then get the following error message:

---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1490, in GroupBy._cython_agg_general.<locals>.array_func(values)
   1489 try:
-> 1490     result = self.grouper._cython_operation(
   1491         "aggregate",
   1492         values,
   1493         how,
   1494         axis=data.ndim - 1,
   1495         min_count=min_count,
   1496         **kwargs,
   1497     )
   1498 except NotImplementedError:
   1499     # generally if we have numeric_only=False
   1500     # and non-applicable functions
   1501     # try to python agg
   1502     # TODO: shouldn't min_count matter?

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/ops.py:959, in BaseGrouper._cython_operation(self, kind, values, how, axis, min_count, **kwargs)
    958 ngroups = self.ngroups
--> 959 return cy_op.cython_operation(
    960     values=values,
    961     axis=axis,
    962     min_count=min_count,
    963     comp_ids=ids,
    964     ngroups=ngroups,
    965     **kwargs,
    966 )

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/ops.py:657, in WrappedCythonOp.cython_operation(self, values, axis, min_count, comp_ids, ngroups, **kwargs)
    649     return self._ea_wrap_cython_operation(
    650         values,
    651         min_count=min_count,
   (...)
    654         **kwargs,
    655     )
--> 657 return self._cython_op_ndim_compat(
    658     values,
    659     min_count=min_count,
    660     ngroups=ngroups,
    661     comp_ids=comp_ids,
    662     mask=None,
    663     **kwargs,
    664 )

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/ops.py:497, in WrappedCythonOp._cython_op_ndim_compat(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs)
    495     return res.T
--> 497 return self._call_cython_op(
    498     values,
    499     min_count=min_count,
    500     ngroups=ngroups,
    501     comp_ids=comp_ids,
    502     mask=mask,
    503     result_mask=result_mask,
    504     **kwargs,
    505 )

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/ops.py:541, in WrappedCythonOp._call_cython_op(self, values, min_count, ngroups, comp_ids, mask, result_mask, **kwargs)
    540 out_shape = self._get_output_shape(ngroups, values)
--> 541 func = self._get_cython_function(self.kind, self.how, values.dtype, is_numeric)
    542 values = self._get_cython_vals(values)

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/ops.py:173, in WrappedCythonOp._get_cython_function(cls, kind, how, dtype, is_numeric)
    171 if "object" not in f.__signatures__:
    172     # raise NotImplementedError here rather than TypeError later
--> 173     raise NotImplementedError(
    174         f"function is not implemented for this dtype: "
    175         f"[how->{how},dtype->{dtype_str}]"
    176     )
    177 return f

NotImplementedError: function is not implemented for this dtype: [how->mean,dtype->object]

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/nanops.py:1692, in _ensure_numeric(x)
   1691 try:
-> 1692     x = float(x)
   1693 except (TypeError, ValueError):
   1694     # e.g. "1+1j" or "foo"

ValueError: could not convert string to float: 'CarlSarah'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/nanops.py:1696, in _ensure_numeric(x)
   1695 try:
-> 1696     x = complex(x)
   1697 except ValueError as err:
   1698     # e.g. "foo"

ValueError: complex() arg is a malformed string

The above exception was the direct cause of the following exception:

TypeError                                 Traceback (most recent call last)
Cell In[247], line 1
----> 1 byComp.mean()

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1855, in GroupBy.mean(self, numeric_only, engine, engine_kwargs)
   1853     return self._numba_agg_general(sliding_mean, engine_kwargs)
   1854 else:
-> 1855     result = self._cython_agg_general(
   1856         "mean",
   1857         alt=lambda x: Series(x).mean(numeric_only=numeric_only),
   1858         numeric_only=numeric_only,
   1859     )
   1860     return result.__finalize__(self.obj, method="groupby")

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1507, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count, **kwargs)
   1503         result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
   1505     return result
-> 1507 new_mgr = data.grouped_reduce(array_func)
   1508 res = self._wrap_agged_manager(new_mgr)
   1509 out = self._wrap_aggregated_output(res)

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py:1503, in BlockManager.grouped_reduce(self, func)
   1499 if blk.is_object:
   1500     # split on object-dtype blocks bc some columns may raise
   1501     #  while others do not.
   1502     for sb in blk._split():
-> 1503         applied = sb.apply(func)
   1504         result_blocks = extend_blocks(applied, result_blocks)
   1505 else:

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/internals/blocks.py:329, in Block.apply(self, func, **kwargs)
    323 @final
    324 def apply(self, func, **kwargs) -> list[Block]:
    325     """
    326     apply the function to my values; return a block if we are not
    327     one
    328     """
--> 329     result = func(self.values, **kwargs)
    331     return self._split_op_result(result)

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1503, in GroupBy._cython_agg_general.<locals>.array_func(values)
   1490     result = self.grouper._cython_operation(
   1491         "aggregate",
   1492         values,
   (...)
   1496         **kwargs,
   1497     )
   1498 except NotImplementedError:
   1499     # generally if we have numeric_only=False
   1500     # and non-applicable functions
   1501     # try to python agg
   1502     # TODO: shouldn't min_count matter?
-> 1503     result = self._agg_py_fallback(values, ndim=data.ndim, alt=alt)
   1505 return result

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1457, in GroupBy._agg_py_fallback(self, values, ndim, alt)
   1452     ser = df.iloc[:, 0]
   1454 # We do not get here with UDFs, so we know that our dtype
   1455 #  should always be preserved by the implemented aggregations
   1456 # TODO: Is this exactly right; see WrappedCythonOp get_result_dtype?
-> 1457 res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
   1459 if isinstance(values, Categorical):
   1460     # Because we only get here with known dtype-preserving
   1461     #  reductions, we cast back to Categorical.
   1462     # TODO: if we ever get "rank" working, exclude it here.
   1463     res_values = type(values)._from_sequence(res_values, dtype=values.dtype)

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/ops.py:994, in BaseGrouper.agg_series(self, obj, func, preserve_dtype)
    987 if len(obj) > 0 and not isinstance(obj._values, np.ndarray):
    988     # we can preserve a little bit more aggressively with EA dtype
    989     #  because maybe_cast_pointwise_result will do a try/except
    990     #  with _from_sequence.  NB we are assuming here that _from_sequence
    991     #  is sufficiently strict that it casts appropriately.
    992     preserve_dtype = True
--> 994 result = self._aggregate_series_pure_python(obj, func)
    996 npvalues = lib.maybe_convert_objects(result, try_float=False)
    997 if preserve_dtype:

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/ops.py:1015, in BaseGrouper._aggregate_series_pure_python(self, obj, func)
   1012 splitter = self._get_splitter(obj, axis=0)
   1014 for i, group in enumerate(splitter):
-> 1015     res = func(group)
   1016     res = libreduction.extract_result(res)
   1018     if not initialized:
   1019         # We only do this validation on the first iteration

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1857, in GroupBy.mean.<locals>.<lambda>(x)
   1853     return self._numba_agg_general(sliding_mean, engine_kwargs)
   1854 else:
   1855     result = self._cython_agg_general(
   1856         "mean",
-> 1857         alt=lambda x: Series(x).mean(numeric_only=numeric_only),
   1858         numeric_only=numeric_only,
   1859     )
   1860     return result.__finalize__(self.obj, method="groupby")

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py:11556, in NDFrame._add_numeric_operations.<locals>.mean(self, axis, skipna, numeric_only, **kwargs)
  11539 @doc(
  11540     _num_doc,
  11541     desc="Return the mean of the values over the requested axis.",
   (...)
  11554     **kwargs,
  11555 ):
> 11556     return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py:11201, in NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
  11194 def mean(
  11195     self,
  11196     axis: Axis | None = 0,
   (...)
  11199     **kwargs,
  11200 ) -> Series | float:
> 11201     return self._stat_function(
  11202         "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs
  11203     )

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py:11158, in NDFrame._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs)
  11154     nv.validate_stat_func((), kwargs, fname=name)
  11156 validate_bool_kwarg(skipna, "skipna", none_allowed=False)
> 11158 return self._reduce(
  11159     func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
  11160 )

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/series.py:4670, in Series._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
   4665     raise TypeError(
   4666         f"Series.{name} does not allow {kwd_name}={numeric_only} "
   4667         "with non-numeric dtypes."
   4668     )
   4669 with np.errstate(all="ignore"):
-> 4670     return op(delegate, skipna=skipna, **kwds)

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/nanops.py:96, in disallow.__call__.<locals>._f(*args, **kwargs)
     94 try:
     95     with np.errstate(invalid="ignore"):
---> 96         return f(*args, **kwargs)
     97 except ValueError as e:
     98     # we want to transform an object array
     99     # ValueError message to the more typical TypeError
    100     # e.g. this is normally a disallowed function on
    101     # object arrays that contain strings
    102     if is_object_dtype(args[0]):

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/nanops.py:158, in bottleneck_switch.__call__.<locals>.f(values, axis, skipna, **kwds)
    156         result = alt(values, axis=axis, skipna=skipna, **kwds)
    157 else:
--> 158     result = alt(values, axis=axis, skipna=skipna, **kwds)
    160 return result

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/nanops.py:421, in _datetimelike_compat.<locals>.new_func(values, axis, skipna, mask, **kwargs)
    418 if datetimelike and mask is None:
    419     mask = isna(values)
--> 421 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
    423 if datetimelike:
    424     result = _wrap_results(result, orig_values.dtype, fill_value=iNaT)

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/nanops.py:727, in nanmean(values, axis, skipna, mask)
    724     dtype_count = dtype
    726 count = _get_counts(values.shape, mask, axis, dtype=dtype_count)
--> 727 the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))
    729 if axis is not None and getattr(the_sum, "ndim", False):
    730     count = cast(np.ndarray, count)

File ~/opt/miniconda3/lib/python3.9/site-packages/pandas/core/nanops.py:1699, in _ensure_numeric(x)
   1696             x = complex(x)
   1697         except ValueError as err:
   1698             # e.g. "foo"
-> 1699             raise TypeError(f"Could not convert {x} to numeric") from err
   1700 return x

TypeError: Could not convert CarlSarah to numeric

score 0 · Answer 1 · answered Aug 31 '23 at 19:33

One of the solution could be to aggregate the names to a list:

out = df.groupby("Company").agg({"Person": list, "Sales": "mean"})
print(out)

Prints:

                 Person  Sales
Company                       
FB        [Carl, Sarah]  296.5
GOOG     [Sam, Charlie]  160.0
MSFT     [Amy, Vanessa]  232.0

If you want only Company/Sales:

out = df.groupby("Company")["Sales"].mean()
print(out)

Prints:

Company
FB      296.5
GOOG    160.0
MSFT    232.0
Name: Sales, dtype: float64

Issue with groupby.mean() - Python

1 Answers1