|
2 | 2 | import re
|
3 | 3 | from collections.abc import Sequence
|
4 | 4 | from dataclasses import dataclass
|
5 |
| -from typing import Any, ClassVar, Self, TypeGuard, cast |
| 5 | +from typing import Any, Self, TypeGuard, cast |
6 | 6 |
|
7 | 7 | import numpy as np
|
8 | 8 |
|
9 | 9 | from zarr.core.common import JSON, ZarrFormat
|
10 | 10 | from zarr.core.dtype.common import (
|
11 | 11 | DataTypeValidationError,
|
12 |
| - HasEndianness, |
13 | 12 | HasItemSize,
|
14 | 13 | HasLength,
|
15 | 14 | v3_unstable_dtype_warning,
|
16 | 15 | )
|
17 | 16 | from zarr.core.dtype.npy.common import (
|
18 |
| - EndiannessNumpy, |
19 | 17 | bytes_from_json,
|
20 | 18 | bytes_to_json,
|
21 | 19 | check_json_str,
|
22 |
| - endianness_from_numpy_str, |
23 |
| - endianness_to_numpy_str, |
24 | 20 | )
|
25 | 21 | from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar, ZDType
|
26 | 22 |
|
27 | 23 |
|
28 |
| -@dataclass(frozen=True, kw_only=True) |
29 |
| -class FixedLengthASCII(ZDType[np.dtypes.BytesDType[int], np.bytes_], HasLength, HasItemSize): |
30 |
| - dtype_cls = np.dtypes.BytesDType |
31 |
| - _zarr_v3_name = "numpy.fixed_length_ascii" |
32 |
| - |
33 |
| - @classmethod |
34 |
| - def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self: |
35 |
| - return cls(length=dtype.itemsize) |
36 |
| - |
37 |
| - def to_dtype(self) -> np.dtypes.BytesDType[int]: |
38 |
| - return self.dtype_cls(self.length) |
39 |
| - |
40 |
| - @classmethod |
41 |
| - def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]: |
42 |
| - """ |
43 |
| - Check that the input is a valid JSON representation of a numpy S dtype. |
44 |
| - """ |
45 |
| - if zarr_format == 2: |
46 |
| - # match |S1, |S2, etc |
47 |
| - return isinstance(data, str) and re.match(r"^\|S\d+$", data) is not None |
48 |
| - elif zarr_format == 3: |
49 |
| - return ( |
50 |
| - isinstance(data, dict) |
51 |
| - and set(data.keys()) == {"name", "configuration"} |
52 |
| - and data["name"] == cls._zarr_v3_name |
53 |
| - and isinstance(data["configuration"], dict) |
54 |
| - and "length_bytes" in data["configuration"] |
55 |
| - ) |
56 |
| - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
57 |
| - |
58 |
| - def to_json(self, zarr_format: ZarrFormat) -> JSON: |
59 |
| - if zarr_format == 2: |
60 |
| - return self.to_dtype().str |
61 |
| - elif zarr_format == 3: |
62 |
| - return { |
63 |
| - "name": self._zarr_v3_name, |
64 |
| - "configuration": {"length_bytes": self.length}, |
65 |
| - } |
66 |
| - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
67 |
| - |
68 |
| - @classmethod |
69 |
| - def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self: |
70 |
| - if zarr_format == 2: |
71 |
| - return cls.from_dtype(np.dtype(data)) # type: ignore[arg-type] |
72 |
| - elif zarr_format == 3: |
73 |
| - return cls(length=data["configuration"]["length_bytes"]) # type: ignore[arg-type, index, call-overload] |
74 |
| - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
75 |
| - |
76 |
| - def default_value(self) -> np.bytes_: |
77 |
| - return np.bytes_(b"") |
78 |
| - |
79 |
| - def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str: |
80 |
| - return base64.standard_b64encode(data).decode("ascii") # type: ignore[arg-type] |
81 |
| - |
82 |
| - def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.bytes_: |
83 |
| - if check_json_str(data): |
84 |
| - return self.to_dtype().type(base64.standard_b64decode(data.encode("ascii"))) |
85 |
| - raise TypeError(f"Invalid type: {data}. Expected a string.") # pragma: no cover |
86 |
| - |
87 |
| - def check_value(self, data: object) -> bool: |
88 |
| - # this is generous for backwards compatibility |
89 |
| - return isinstance(data, np.bytes_ | str | bytes | int) |
90 |
| - |
91 |
| - def _cast_value_unsafe(self, value: object) -> np.bytes_: |
92 |
| - return self.to_dtype().type(value) |
93 |
| - |
94 |
| - @property |
95 |
| - def item_size(self) -> int: |
96 |
| - return self.length |
97 |
| - |
98 |
| - |
99 | 24 | @dataclass(frozen=True, kw_only=True)
|
100 | 25 | class FixedLengthBytes(ZDType[np.dtypes.VoidDType[int], np.void], HasLength, HasItemSize):
|
101 | 26 | # np.dtypes.VoidDType is specified in an odd way in numpy
|
@@ -190,87 +115,6 @@ def item_size(self) -> int:
|
190 | 115 | return self.length
|
191 | 116 |
|
192 | 117 |
|
193 |
| -@dataclass(frozen=True, kw_only=True) |
194 |
| -class FixedLengthUTF32( |
195 |
| - ZDType[np.dtypes.StrDType[int], np.str_], HasEndianness, HasLength, HasItemSize |
196 |
| -): |
197 |
| - dtype_cls = np.dtypes.StrDType |
198 |
| - _zarr_v3_name = "numpy.fixed_length_utf32" |
199 |
| - code_point_bytes: ClassVar[int] = 4 # utf32 is 4 bytes per code point |
200 |
| - |
201 |
| - @classmethod |
202 |
| - def _from_dtype_unsafe(cls, dtype: TBaseDType) -> Self: |
203 |
| - byte_order = cast("EndiannessNumpy", dtype.byteorder) |
204 |
| - return cls( |
205 |
| - length=dtype.itemsize // (cls.code_point_bytes), |
206 |
| - endianness=endianness_from_numpy_str(byte_order), |
207 |
| - ) |
208 |
| - |
209 |
| - def to_dtype(self) -> np.dtypes.StrDType[int]: |
210 |
| - byte_order = endianness_to_numpy_str(self.endianness) |
211 |
| - return self.dtype_cls(self.length).newbyteorder(byte_order) |
212 |
| - |
213 |
| - @classmethod |
214 |
| - def check_json(cls, data: JSON, zarr_format: ZarrFormat) -> TypeGuard[JSON]: |
215 |
| - """ |
216 |
| - Check that the input is a valid JSON representation of a numpy S dtype. |
217 |
| - """ |
218 |
| - if zarr_format == 2: |
219 |
| - # match >U1, <U2, etc |
220 |
| - return isinstance(data, str) and re.match(r"^[><]U\d+$", data) is not None |
221 |
| - elif zarr_format == 3: |
222 |
| - return ( |
223 |
| - isinstance(data, dict) |
224 |
| - and set(data.keys()) == {"name", "configuration"} |
225 |
| - and data["name"] == cls._zarr_v3_name |
226 |
| - and "configuration" in data |
227 |
| - and isinstance(data["configuration"], dict) |
228 |
| - and set(data["configuration"].keys()) == {"length_bytes"} |
229 |
| - and isinstance(data["configuration"]["length_bytes"], int) |
230 |
| - ) |
231 |
| - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
232 |
| - |
233 |
| - def to_json(self, zarr_format: ZarrFormat) -> JSON: |
234 |
| - if zarr_format == 2: |
235 |
| - return self.to_dtype().str |
236 |
| - elif zarr_format == 3: |
237 |
| - return { |
238 |
| - "name": self._zarr_v3_name, |
239 |
| - "configuration": {"length_bytes": self.length * self.code_point_bytes}, |
240 |
| - } |
241 |
| - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
242 |
| - |
243 |
| - @classmethod |
244 |
| - def _from_json_unsafe(cls, data: JSON, zarr_format: ZarrFormat) -> Self: |
245 |
| - if zarr_format == 2: |
246 |
| - return cls.from_dtype(np.dtype(data)) # type: ignore[arg-type] |
247 |
| - elif zarr_format == 3: |
248 |
| - return cls(length=data["configuration"]["length_bytes"] // cls.code_point_bytes) # type: ignore[arg-type, index, call-overload, operator] |
249 |
| - raise ValueError(f"zarr_format must be 2 or 3, got {zarr_format}") # pragma: no cover |
250 |
| - |
251 |
| - def default_value(self) -> np.str_: |
252 |
| - return np.str_("") |
253 |
| - |
254 |
| - def to_json_value(self, data: object, *, zarr_format: ZarrFormat) -> str: |
255 |
| - return str(data) |
256 |
| - |
257 |
| - def from_json_value(self, data: JSON, *, zarr_format: ZarrFormat) -> np.str_: |
258 |
| - if check_json_str(data): |
259 |
| - return self.to_dtype().type(data) |
260 |
| - raise TypeError(f"Invalid type: {data}. Expected a string.") # pragma: no cover |
261 |
| - |
262 |
| - def check_value(self, data: object) -> bool: |
263 |
| - # this is generous for backwards compatibility |
264 |
| - return isinstance(data, str | np.str_ | bytes | int) |
265 |
| - |
266 |
| - def _cast_value_unsafe(self, data: object) -> np.str_: |
267 |
| - return self.to_dtype().type(data) |
268 |
| - |
269 |
| - @property |
270 |
| - def item_size(self) -> int: |
271 |
| - return self.length * self.code_point_bytes |
272 |
| - |
273 |
| - |
274 | 118 | @dataclass(frozen=True, kw_only=True)
|
275 | 119 | class Structured(ZDType[np.dtypes.VoidDType[int], np.void], HasItemSize):
|
276 | 120 | dtype_cls = np.dtypes.VoidDType # type: ignore[assignment]
|
|
0 commit comments