@@ -365,15 +365,12 @@ def _cut(self, data):
365
365
retbins = True ,
366
366
)
367
367
368
- def _factorize_lazy (self , group : T_Group ) -> DataArray :
369
- def _wrapper (data , ** kwargs ):
370
- binned , bins = self ._cut (data )
371
- if isinstance (self .bins , int ):
372
- # we are running eagerly, update self.bins with actual edges instead
373
- self .bins = bins
374
- return binned .codes .reshape (data .shape )
375
-
376
- return apply_ufunc (_wrapper , group , dask = "parallelized" , keep_attrs = True )
368
+ def _pandas_cut_wrapper (self , data , ** kwargs ):
369
+ binned , bins = self ._cut (data )
370
+ if isinstance (self .bins , int ):
371
+ # we are running eagerly, update self.bins with actual edges instead
372
+ self .bins = bins
373
+ return binned .codes .reshape (data .shape )
377
374
378
375
def factorize (self , group : T_Group ) -> EncodedGroups :
379
376
if isinstance (group , _DummyGroup ):
@@ -383,7 +380,13 @@ def factorize(self, group: T_Group) -> EncodedGroups:
383
380
raise ValueError (
384
381
f"Bin edges must be provided when grouping by chunked arrays. Received { self .bins = !r} instead"
385
382
)
386
- codes = self ._factorize_lazy (group )
383
+ codes = apply_ufunc (
384
+ self ._pandas_cut_wrapper ,
385
+ group ,
386
+ dask = "parallelized" ,
387
+ keep_attrs = True ,
388
+ output_dtypes = [np .int64 ],
389
+ )
387
390
if not by_is_chunked and array_all (codes == - 1 ):
388
391
raise ValueError (
389
392
f"None of the data falls within bins with edges { self .bins !r} "
0 commit comments