I am trying to extract the rolling (sliding window) features using tsfresh 1.19
in python 3.8.13
on Mac M1, locally and it doesn't work producing this error: IndexError: cannot do a non-empty take from an empty axes. Even replicating the example from the documentation gives me the same error. Here is the code that I am getting from documentation, to my understanding:
from tsfresh.utilities.dataframe_functions import roll_time_series
from tsfresh import extract_features
df = pd.DataFrame({
"id": [1, 1, 1, 1, 2, 2],
"time": [1, 2, 3, 4, 8, 9],
"x": [1, 2, 3, 4, 10, 11],
"y": [5, 6, 7, 8, 12, 13],
})
df_rolled = roll_time_series(df, column_id="id", column_sort="time")
df_features = extract_features(df_rolled, column_id="id", column_sort="time")
the df_rolled
data frame gets created with no issues and looks like this:
id | time | x | y |
---|---|---|---|
(1, 1) | 1 | 1 | 5 |
(1, 2) | 1 | 1 | 5 |
(1, 2) | 2 | 2 | 5 |
(1, 3) | 1 | 1 | 5 |
(1, 3) | 2 | 2 | 6 |
(1, 3) | 3 | 3 | 7 |
...
but the extract_features()
function produces the error IndexError: cannot do a non-empty take from an empty axes.
I read somewhere that there maybe dependency issues and library version mismatches so I tried to create a new environment only using Conda packages, and then another one using conda and pip to make sure no dependency issues is are there. in both of them, all packages seem to be installed with correct versions as suggested here but the problem persists. I also played with column data types and indexes but so far nothing helped. Can anyone with some experience with the library help me get through this please :)
Also, worth noting that running the same code on AWS SageMaker with the standard python3.8_tensorflow
kernel works with no error.
Error message:
---------------------------------------------------------------------------
RemoteTraceback Traceback (most recent call last)
RemoteTraceback:
"""
Traceback (most recent call last):
File "/Users/.../lib/python3.8/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
File "/Users/.../lib/python3.8/site-packages/tsfresh/utilities/distribution.py", line 43, in _function_with_partly_reduce
results = list(itertools.chain.from_iterable(results))
File "/Users/.../lib/python3.8/site-packages/tsfresh/utilities/distribution.py", line 42, in <genexpr>
results = (map_function(chunk, **kwargs) for chunk in chunk_list)
File "/Users/.../lib/python3.8/site-packages/tsfresh/feature_extraction/extraction.py", line 386, in _do_extraction_on_chunk
return list(_f())
File "/Users/.../lib/python3.8/site-packages/tsfresh/feature_extraction/extraction.py", line 364, in _f
result = func(x, param=parameter_list)
File "/Users/.../lib/python3.8/site-packages/tsfresh/feature_extraction/feature_calculators.py", line 2103, in friedrich_coefficients
calculated[m][r] = _estimate_friedrich_coefficients(x, m, r)
File "/Users/.../lib/python3.8/site-packages/tsfresh/feature_extraction/feature_calculators.py", line 152, in _estimate_friedrich_coefficients
df["quantiles"] = pd.qcut(df.signal, r)
File "/Users/.../lib/python3.8/site-packages/pandas/core/reshape/tile.py", line 377, in qcut
bins = np.quantile(x_np, quantiles)
File "<__array_function__ internals>", line 180, in quantile
File "/Users/.../lib/python3.8/site-packages/numpy/lib/function_base.py", line 4412, in quantile
return _quantile_unchecked(
File "/Users/.../lib/python3.8/site-packages/numpy/lib/function_base.py", line 4424, in _quantile_unchecked
r, k = _ureduce(a,
File "/Users/.../lib/python3.8/site-packages/numpy/lib/function_base.py", line 3725, in _ureduce
r = func(a, **kwargs)
File "/Users/.../lib/python3.8/site-packages/numpy/lib/function_base.py", line 4593, in _quantile_ureduce_func
result = _quantile(arr,
File "/Users/.../lib/python3.8/site-packages/numpy/lib/function_base.py", line 4704, in _quantile
previous = np.take(arr, previous_indexes, axis=DATA_AXIS)
File "<__array_function__ internals>", line 180, in take
File "/Users/.../lib/python3.8/site-packages/numpy/core/fromnumeric.py", line 190, in take
return _wrapfunc(a, 'take', indices, axis=axis, out=out, mode=mode)
File "/Users/.../lib/python3.8/site-packages/numpy/core/fromnumeric.py", line 57, in _wrapfunc
return bound(*args, **kwds)
IndexError: cannot do a non-empty take from an empty axes.
"""
The above exception was the direct cause of the following exception:
IndexError Traceback (most recent call last)
Input In [19], in <cell line: 15>()
12 df_rolled = roll_time_series(df, column_id="id", column_sort="time")
13 df_rolled.reset_index(drop=True, inplace=True)
---> 15 df_features = extract_features(df_rolled, column_id="id", column_sort="time")
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tsfresh/feature_extraction/extraction.py:164, in extract_features(timeseries_container, default_fc_parameters, kind_to_fc_parameters, column_id, column_sort, column_kind, column_value, chunksize, n_jobs, show_warnings, disable_progressbar, impute_function, profile, profiling_filename, profiling_sorting, distributor, pivot)
161 else:
162 warnings.simplefilter("default")
--> 164 result = _do_extraction(
165 df=timeseries_container,
166 column_id=column_id,
167 column_value=column_value,
168 column_kind=column_kind,
169 column_sort=column_sort,
170 n_jobs=n_jobs,
171 chunk_size=chunksize,
172 disable_progressbar=disable_progressbar,
173 show_warnings=show_warnings,
174 default_fc_parameters=default_fc_parameters,
175 kind_to_fc_parameters=kind_to_fc_parameters,
176 distributor=distributor,
177 pivot=pivot,
178 )
180 # Impute the result if requested
181 if impute_function is not None:
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tsfresh/feature_extraction/extraction.py:294, in _do_extraction(df, column_id, column_value, column_kind, column_sort, default_fc_parameters, kind_to_fc_parameters, n_jobs, chunk_size, disable_progressbar, show_warnings, distributor, pivot)
286 raise ValueError("the passed distributor is not an DistributorBaseClass object")
288 kwargs = dict(
289 default_fc_parameters=default_fc_parameters,
290 kind_to_fc_parameters=kind_to_fc_parameters,
291 show_warnings=show_warnings,
292 )
--> 294 result = distributor.map_reduce(
295 _do_extraction_on_chunk,
296 data=data,
297 chunk_size=chunk_size,
298 function_kwargs=kwargs,
299 )
301 if not pivot:
302 return result
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tsfresh/utilities/distribution.py:241, in IterableDistributorBaseClass.map_reduce(self, map_function, data, function_kwargs, chunk_size, data_length)
234 else:
235 result = (
236 self.distribute(
237 _function_with_partly_reduce, chunk_generator, map_kwargs
238 ),
239 )
--> 241 result = list(itertools.chain.from_iterable(result))
243 self.close()
245 return result
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tqdm/std.py:1195, in tqdm.__iter__(self)
1192 time = self._time
1194 try:
-> 1195 for obj in iterable:
1196 yield obj
1197 # Update and possibly print the progressbar.
1198 # Note: does not call self.update(1) for speed optimisation.
File ~/opt/anaconda3/envs/p38/lib/python3.8/multiprocessing/pool.py:868, in IMapIterator.next(self, timeout)
866 if success:
867 return value
--> 868 raise value
File ~/opt/anaconda3/envs/p38/lib/python3.8/multiprocessing/pool.py:125, in worker()
123 job, i, func, args, kwds = task
124 try:
--> 125 result = (True, func(*args, **kwds))
126 except Exception as e:
127 if wrap_exception and func is not _helper_reraises_exception:
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tsfresh/utilities/distribution.py:43, in _function_with_partly_reduce()
41 kwargs = kwargs or {}
42 results = (map_function(chunk, **kwargs) for chunk in chunk_list)
---> 43 results = list(itertools.chain.from_iterable(results))
44 return results
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tsfresh/utilities/distribution.py:42, in <genexpr>()
25 """
26 Small helper function to call a function (map_function)
27 on a list of data chunks (chunk_list) and convert the results into
(...)
39 :rtype: list
40 """
41 kwargs = kwargs or {}
---> 42 results = (map_function(chunk, **kwargs) for chunk in chunk_list)
43 results = list(itertools.chain.from_iterable(results))
44 return results
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tsfresh/feature_extraction/extraction.py:386, in _do_extraction_on_chunk()
383 else:
384 warnings.simplefilter("default")
--> 386 return list(_f())
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tsfresh/feature_extraction/extraction.py:364, in _f()
361 x = data.values
363 if getattr(func, "fctype", None) == "combiner":
--> 364 result = func(x, param=parameter_list)
365 else:
366 if parameter_list:
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tsfresh/feature_extraction/feature_calculators.py:2103, in friedrich_coefficients()
2101 # calculate the current friedrich coefficients if they do not exist yet
2102 if m not in calculated or r not in calculated[m]:
-> 2103 calculated[m][r] = _estimate_friedrich_coefficients(x, m, r)
2105 try:
2106 res["coeff_{}__m_{}__r_{}".format(coeff, m, r)] = calculated[m][r][coeff]
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/tsfresh/feature_extraction/feature_calculators.py:152, in _estimate_friedrich_coefficients()
150 df = pd.DataFrame({"signal": x[:-1], "delta": np.diff(x)})
151 try:
--> 152 df["quantiles"] = pd.qcut(df.signal, r)
153 except ValueError:
154 return [np.NaN] * (m + 1)
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/pandas/core/reshape/tile.py:377, in qcut()
375 x_np = np.asarray(x)
376 x_np = x_np[~np.isnan(x_np)]
--> 377 bins = np.quantile(x_np, quantiles)
379 fac, bins = _bins_to_cuts(
380 x,
381 bins,
(...)
386 duplicates=duplicates,
387 )
389 return _postprocess_for_cut(fac, bins, retbins, dtype, original)
File <__array_function__ internals>:180, in quantile()
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/numpy/lib/function_base.py:4412, in quantile()
4410 if not _quantile_is_valid(q):
4411 raise ValueError("Quantiles must be in the range [0, 1]")
-> 4412 return _quantile_unchecked(
4413 a, q, axis, out, overwrite_input, method, keepdims)
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/numpy/lib/function_base.py:4424, in _quantile_unchecked()
4416 def _quantile_unchecked(a,
4417 q,
4418 axis=None,
(...)
4421 method="linear",
4422 keepdims=False):
4423 """Assumes that q is in [0, 1], and is an ndarray"""
-> 4424 r, k = _ureduce(a,
4425 func=_quantile_ureduce_func,
4426 q=q,
4427 axis=axis,
4428 out=out,
4429 overwrite_input=overwrite_input,
4430 method=method)
4431 if keepdims:
4432 return r.reshape(q.shape + k)
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/numpy/lib/function_base.py:3725, in _ureduce()
3722 else:
3723 keepdim = (1,) * a.ndim
-> 3725 r = func(a, **kwargs)
3726 return r, keepdim
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/numpy/lib/function_base.py:4593, in _quantile_ureduce_func()
4591 else:
4592 arr = a.copy()
-> 4593 result = _quantile(arr,
4594 quantiles=q,
4595 axis=axis,
4596 method=method,
4597 out=out)
4598 return result
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/numpy/lib/function_base.py:4704, in _quantile()
4702 slices_having_nans = None
4703 # --- Get values from indexes
-> 4704 previous = np.take(arr, previous_indexes, axis=DATA_AXIS)
4705 next = np.take(arr, next_indexes, axis=DATA_AXIS)
4706 # --- Linear interpolation
File <__array_function__ internals>:180, in take()
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/numpy/core/fromnumeric.py:190, in take()
93 @array_function_dispatch(_take_dispatcher)
94 def take(a, indices, axis=None, out=None, mode='raise'):
95 """
96 Take elements from an array along an axis.
97
(...)
188 [5, 7]])
189 """
--> 190 return _wrapfunc(a, 'take', indices, axis=axis, out=out, mode=mode)
File ~/opt/anaconda3/envs/p38/lib/python3.8/site-packages/numpy/core/fromnumeric.py:57, in _wrapfunc()
54 return _wrapit(obj, method, *args, **kwds)
56 try:
---> 57 return bound(*args, **kwds)
58 except TypeError:
59 # A TypeError occurs if the object does have such a method in its
60 # class, but its signature is not identical to that of NumPy's. This
(...)
64 # Call _wrapit from within the except clause to ensure a potential
65 # exception has a traceback chain.
66 return _wrapit(obj, method, *args, **kwds)
IndexError: cannot do a non-empty take from an empty axes.