Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3559.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Create `Bytes`, a new data type for variable-length bytes. This data type is a drop-in replacement for `VariableLengthBytes` that complies with the published [`Bytes`](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/bytes) data type spec.
2 changes: 1 addition & 1 deletion examples/custom_dtype/custom_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
from zarr.core.common import JSON, ZarrFormat
from zarr.core.dtype import ZDType, data_type_registry
from zarr.core.dtype.common import (
DataTypeValidationError,
DTypeConfig_V2,
DTypeJSON,
check_dtype_spec_v2,
)
from zarr.errors import DataTypeValidationError

# This is the int2 array data type
int2_dtype_cls = type(np.dtype("int2"))
Expand Down
47 changes: 42 additions & 5 deletions src/zarr/core/dtype/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
from typing import TYPE_CHECKING, Final, TypeAlias

from zarr.core.dtype.common import (
DataTypeValidationError,
DTypeJSON,
)
from zarr.core.dtype.npy.bool import Bool
from zarr.core.dtype.npy.bytes import (
Bytes,
BytesJSON_V2,
BytesJSON_V3,
NullTerminatedBytes,
NullterminatedBytesJSON_V2,
NullTerminatedBytesJSON_V3,
Expand All @@ -30,6 +32,7 @@
TimeDelta64JSON_V2,
TimeDelta64JSON_V3,
)
from zarr.errors import DataTypeValidationError

if TYPE_CHECKING:
from zarr.core.common import ZarrFormat
Expand All @@ -52,8 +55,12 @@

__all__ = [
"Bool",
"Bytes",
"BytesJSON_V2",
"BytesJSON_V3",
"Complex64",
"Complex128",
"DTypeJSON",
"DataTypeRegistry",
"DataTypeValidationError",
"DateTime64",
Expand Down Expand Up @@ -94,6 +101,8 @@
"VariableLengthUTF8JSON_V2",
"ZDType",
"data_type_registry",
"disable_legacy_bytes_dtype",
"enable_legacy_bytes_dtype",
"parse_data_type",
"parse_dtype",
]
Expand All @@ -115,8 +124,8 @@
TimeDType = DateTime64 | TimeDelta64
TIME_DTYPE: Final = DateTime64, TimeDelta64

BytesDType = RawBytes | NullTerminatedBytes | VariableLengthBytes
BYTES_DTYPE: Final = RawBytes, NullTerminatedBytes, VariableLengthBytes
BytesDType = RawBytes | NullTerminatedBytes | Bytes
BYTES_DTYPE: Final = RawBytes, NullTerminatedBytes, Bytes

AnyDType = (
Bool
Expand All @@ -127,7 +136,6 @@
| BytesDType
| Structured
| TimeDType
| VariableLengthBytes
)
# mypy has trouble inferring the type of variablelengthstring dtype, because its class definition
# depends on the installed numpy version. That's why the type: ignore statement is needed here.
Expand All @@ -140,7 +148,6 @@
*BYTES_DTYPE,
Structured,
*TIME_DTYPE,
VariableLengthBytes,
)

# These are aliases for variable-length UTF-8 strings
Expand Down Expand Up @@ -277,6 +284,36 @@ def parse_dtype(
# If the dtype request is one of the aliases for variable-length UTF-8 strings,
# return that dtype.
return VariableLengthUTF8() # type: ignore[return-value]
if dtype_spec is bytes:
# Treat the bytes type as a request for the Bytes dtype
return Bytes()
Comment on lines +287 to +289
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flagging this change -- parse_dtype(bytes, zarr_format = 3) will now return an instance of Bytes


# otherwise, we have either a numpy dtype string, or a zarr v3 dtype string, and in either case
# we can create a native dtype from it, and do the dtype inference from that
return get_data_type_from_native_dtype(dtype_spec) # type: ignore[arg-type]


def enable_legacy_bytes_dtype() -> None:
"""
Unregister the new Bytes data type from the registry, and replace it with the
VariableLengthBytes dtype instead. Used for backwards compatibility.
"""
if (
"bytes" in data_type_registry.contents
and "variable_length_bytes" not in data_type_registry.contents
):
data_type_registry.unregister("bytes")
data_type_registry.register("variable_length_bytes", VariableLengthBytes)


def disable_legacy_bytes_dtype() -> None:
"""
Unregister the old VariableLengthBytes dtype from the registry, and replace it with
the new Bytes dtype. Used to reverse the effect of enable_legacy_bytes_dtype
"""
if (
"variable_length_bytes" in data_type_registry.contents
and "bytes" not in data_type_registry.contents
):
data_type_registry.unregister("variable_length_bytes")
data_type_registry.register("bytes", Bytes)
Comment on lines +296 to +319
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these functions let users effectively disable the changes in this PR

6 changes: 0 additions & 6 deletions src/zarr/core/dtype/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,6 @@ def unpack_dtype_json(data: DTypeSpec_V2 | DTypeSpec_V3) -> DTypeJSON:
return data


class DataTypeValidationError(ValueError): ...


class ScalarTypeValidationError(ValueError): ...


Comment on lines -154 to -159
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these errors were moved to the main errors module

@dataclass(frozen=True, kw_only=True)
class HasLength:
"""
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/core/dtype/npy/bool.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
import numpy as np

from zarr.core.dtype.common import (
DataTypeValidationError,
DTypeConfig_V2,
DTypeJSON,
HasItemSize,
check_dtype_spec_v2,
)
from zarr.core.dtype.wrapper import TBaseDType, ZDType
from zarr.errors import DataTypeValidationError

if TYPE_CHECKING:
from zarr.core.common import JSON, ZarrFormat
Expand Down
Loading