BinGrouper: Support setting labels when provided with IntervalIndex

dcherian · dcherian · commit 06818d61ac94 · 2025-04-26T13:09:23.000-06:00
Removes a pandas limitation that we don't need.
diff --git a/xarray/groupers.py b/xarray/groupers.py
@@ -319,7 +319,7 @@ class BinGrouper(Grouper):
         the resulting bins. If False, returns only integer indicators of the
         bins. This affects the type of the output container (see below).
         This argument is ignored when `bins` is an IntervalIndex. If True,
-        raises an error. When `ordered=False`, labels must be provided.
+        raises an error.
     retbins : bool, default False
         Whether to return the bins or not. Useful when bins is provided
         as a scalar.
@@ -394,17 +394,19 @@ def factorize(self, group: T_Group) -> EncodedGroups:
 
         # This seems silly, but it lets us have Pandas handle the complexity
         # of `labels`, `precision`, and `include_lowest`, even when group is a chunked array
-        dummy, _ = self._cut(np.array([0]).astype(group.dtype))
-        full_index = dummy.categories
+        if self.labels is None:
+            dummy, _ = self._cut(np.array([0]).astype(group.dtype))
+            full_index = dummy.categories
+        else:
+            full_index = pd.CategoricalIndex(self.labels)
+
         if not by_is_chunked:
             uniques = np.sort(pd.unique(codes.data.ravel()))
             unique_values = full_index[uniques[uniques != -1]]
         else:
             unique_values = full_index
 
-        unique_coord = Variable(
-            dims=new_dim_name, data=unique_values, attrs=group.attrs
-        )
+        unique_coord = Variable(dims=self.name, data=unique_values, attrs=group.attrs)
         return EncodedGroups(
             codes=codes,
             full_index=full_index,
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
@@ -1062,6 +1062,22 @@ def test_groupby_bins_cut_kwargs(use_flox: bool) -> None:
         ).mean()
     assert_identical(expected, actual)
 
+    with xr.set_options(use_flox=use_flox):
+        bins_index = pd.IntervalIndex.from_breaks(x_bins)
+        labels = ["one", "two", "three"]
+        actual = da.groupby(x=BinGrouper(bins=bins_index, labels=labels)).sum()
+        assert actual.xindexes["x_bins"].index.equals(pd.CategoricalIndex(labels))
+
+
+def test_groupby_bins_name_kwarg() -> None:
+    da = xr.DataArray(np.arange(12).reshape(6, 2), dims=("x", "y"))
+    x_bins = (0, 2, 4, 6)
+    actual = da.groupby_bins("x", bins=x_bins, name="foo").sum()
+    assert "foo" in actual.dims
+
+    actual = da.groupby(x=BinGrouper(bins=x_bins, name="foo")).sum()
+    assert "foo" in actual.dims
+
 
 @pytest.mark.parametrize("indexed_coord", [True, False])
 @pytest.mark.parametrize(