Skip to content

Commit 2c6a776

Browse files
authored
BinGrouper: reduce indirection (#10270)
1 parent a5d296e commit 2c6a776

File tree

1 file changed

+13
-10
lines changed

1 file changed

+13
-10
lines changed

xarray/groupers.py

+13-10
Original file line numberDiff line numberDiff line change
@@ -365,15 +365,12 @@ def _cut(self, data):
365365
retbins=True,
366366
)
367367

368-
def _factorize_lazy(self, group: T_Group) -> DataArray:
369-
def _wrapper(data, **kwargs):
370-
binned, bins = self._cut(data)
371-
if isinstance(self.bins, int):
372-
# we are running eagerly, update self.bins with actual edges instead
373-
self.bins = bins
374-
return binned.codes.reshape(data.shape)
375-
376-
return apply_ufunc(_wrapper, group, dask="parallelized", keep_attrs=True)
368+
def _pandas_cut_wrapper(self, data, **kwargs):
369+
binned, bins = self._cut(data)
370+
if isinstance(self.bins, int):
371+
# we are running eagerly, update self.bins with actual edges instead
372+
self.bins = bins
373+
return binned.codes.reshape(data.shape)
377374

378375
def factorize(self, group: T_Group) -> EncodedGroups:
379376
if isinstance(group, _DummyGroup):
@@ -383,7 +380,13 @@ def factorize(self, group: T_Group) -> EncodedGroups:
383380
raise ValueError(
384381
f"Bin edges must be provided when grouping by chunked arrays. Received {self.bins=!r} instead"
385382
)
386-
codes = self._factorize_lazy(group)
383+
codes = apply_ufunc(
384+
self._pandas_cut_wrapper,
385+
group,
386+
dask="parallelized",
387+
keep_attrs=True,
388+
output_dtypes=[np.int64],
389+
)
387390
if not by_is_chunked and array_all(codes == -1):
388391
raise ValueError(
389392
f"None of the data falls within bins with edges {self.bins!r}"

0 commit comments

Comments
 (0)