Skip to content

(fix): numeric arrow dtype deep copies #10315

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions xarray/core/extension_array.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import copy
from collections.abc import Callable, Sequence
from dataclasses import dataclass
from typing import Any, Generic, cast
Expand Down Expand Up @@ -148,4 +149,14 @@ def __getattr__(self, attr: str) -> Any:
# Thus, if we didn't have `super().__getattribute__("array")` this method would call `self.array` (i.e., `getattr(self, "array")`) again while looking for `__setstate__`
# (which is apparently the first thing sought in copy.copy from the under-construction copied object),
# which would cause a recursion error since `array` is not present on the object when it is being constructed during `__{deep}copy__`.
# Even though we have defined these two methods now below due to `test_extension_array_copy_arrow_type` (cause unknown)
# we leave this here as it more robust than self.array
return getattr(super().__getattribute__("array"), attr)

def __copy__(self) -> PandasExtensionArray[T_ExtensionArray]:
return PandasExtensionArray(copy.copy(self.array))

def __deepcopy__(
self, memo: dict[int, Any] | None = None
) -> PandasExtensionArray[T_ExtensionArray]:
return PandasExtensionArray(copy.deepcopy(self.array, memo=memo))
11 changes: 11 additions & 0 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import copy
import datetime as dt
import pickle
import warnings
Expand Down Expand Up @@ -200,6 +201,16 @@ def test_extension_array_pyarrow_concatenate(self, arrow1, arrow2):
assert concatenated[2].array[0]["x"] == 3
assert concatenated[3].array[0]["y"]

@requires_pyarrow
def test_extension_array_copy_arrow_type(self):
arr = pd.array([pd.NA, 1, 2], dtype="int64[pyarrow]")
# Relying on the `__getattr__` of `PandasExtensionArray` to do the deep copy
# recursively only fails for `int64[pyarrow]` and similar types so this
# test ensures that copying still works there.
assert isinstance(
copy.deepcopy(PandasExtensionArray(arr), memo=None).array, type(arr)
)

def test___getitem__extension_duck_array(self, categorical1):
extension_duck_array = PandasExtensionArray(categorical1)
assert (extension_duck_array[0:2] == categorical1[0:2]).all()
Expand Down
Loading