Source code for zarr.core.array

from __future__ import annotations

import json
import warnings
from asyncio import gather
from collections.abc import Iterable
from dataclasses import dataclass, field, replace
from itertools import starmap
from logging import getLogger
from typing import (
    TYPE_CHECKING,
    Any,
    Generic,
    Literal,
    TypeAlias,
    TypedDict,
    cast,
    overload,
)
from warnings import warn

import numcodecs
import numcodecs.abc
import numpy as np
import numpy.typing as npt
from typing_extensions import deprecated

from zarr._compat import _deprecate_positional_args
from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec, BytesBytesCodec, Codec
from zarr.abc.store import Store, set_or_delete
from zarr.codecs._v2 import V2Codec
from zarr.core._info import ArrayInfo
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, parse_array_config
from zarr.core.attributes import Attributes
from zarr.core.buffer import (
    BufferPrototype,
    NDArrayLike,
    NDBuffer,
    default_buffer_prototype,
)
from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition, normalize_chunks
from zarr.core.chunk_key_encodings import (
    ChunkKeyEncoding,
    ChunkKeyEncodingLike,
    DefaultChunkKeyEncoding,
    V2ChunkKeyEncoding,
)
from zarr.core.common import (
    JSON,
    ZARR_JSON,
    ZARRAY_JSON,
    ZATTRS_JSON,
    ChunkCoords,
    MemoryOrder,
    ShapeLike,
    ZarrFormat,
    _default_zarr_format,
    _warn_order_kwarg,
    concurrent_map,
    parse_dtype,
    parse_order,
    parse_shapelike,
    product,
)
from zarr.core.config import config as zarr_config
from zarr.core.indexing import (
    BasicIndexer,
    BasicSelection,
    BlockIndex,
    BlockIndexer,
    CoordinateIndexer,
    CoordinateSelection,
    Fields,
    Indexer,
    MaskIndexer,
    MaskSelection,
    OIndex,
    OrthogonalIndexer,
    OrthogonalSelection,
    Selection,
    VIndex,
    _iter_grid,
    ceildiv,
    check_fields,
    check_no_multi_fields,
    is_pure_fancy_indexing,
    is_pure_orthogonal_indexing,
    is_scalar,
    pop_fields,
)
from zarr.core.metadata import (
    ArrayMetadata,
    ArrayMetadataDict,
    ArrayV2Metadata,
    ArrayV2MetadataDict,
    ArrayV3Metadata,
    ArrayV3MetadataDict,
    T_ArrayMetadata,
)
from zarr.core.metadata.v2 import (
    _default_compressor,
    _default_filters,
    parse_compressor,
    parse_filters,
)
from zarr.core.metadata.v3 import DataType, parse_node_type_array
from zarr.core.sync import sync
from zarr.errors import MetadataValidationError
from zarr.registry import (
    _parse_array_array_codec,
    _parse_array_bytes_codec,
    _parse_bytes_bytes_codec,
    get_pipeline_class,
)
from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path

if TYPE_CHECKING:
    from collections.abc import Iterator, Sequence
    from typing import Self

    from zarr.abc.codec import CodecPipeline
    from zarr.codecs.sharding import ShardingCodecIndexLocation
    from zarr.core.group import AsyncGroup
    from zarr.storage import StoreLike


# Array and AsyncArray are defined in the base ``zarr`` namespace
__all__ = ["create_codec_pipeline", "parse_array_metadata"]

logger = getLogger(__name__)


def parse_array_metadata(data: Any) -> ArrayMetadata:
    if isinstance(data, ArrayMetadata):
        return data
    elif isinstance(data, dict):
        if data["zarr_format"] == 3:
            meta_out = ArrayV3Metadata.from_dict(data)
            if len(meta_out.storage_transformers) > 0:
                msg = (
                    f"Array metadata contains storage transformers: {meta_out.storage_transformers}."
                    "Arrays with storage transformers are not supported in zarr-python at this time."
                )
                raise ValueError(msg)
            return meta_out
        elif data["zarr_format"] == 2:
            return ArrayV2Metadata.from_dict(data)
    raise TypeError


def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline:
    if isinstance(metadata, ArrayV3Metadata):
        return get_pipeline_class().from_codecs(metadata.codecs)
    elif isinstance(metadata, ArrayV2Metadata):
        v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
        return get_pipeline_class().from_codecs([v2_codec])
    else:
        raise TypeError


async def get_array_metadata(
    store_path: StorePath, zarr_format: ZarrFormat | None = 3
) -> dict[str, JSON]:
    if zarr_format == 2:
        zarray_bytes, zattrs_bytes = await gather(
            (store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get()
        )
        if zarray_bytes is None:
            raise FileNotFoundError(store_path)
    elif zarr_format == 3:
        zarr_json_bytes = await (store_path / ZARR_JSON).get()
        if zarr_json_bytes is None:
            raise FileNotFoundError(store_path)
    elif zarr_format is None:
        zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather(
            (store_path / ZARR_JSON).get(),
            (store_path / ZARRAY_JSON).get(),
            (store_path / ZATTRS_JSON).get(),
        )
        if zarr_json_bytes is not None and zarray_bytes is not None:
            # warn and favor v3
            msg = f"Both zarr.json (Zarr format 3) and .zarray (Zarr format 2) metadata objects exist at {store_path}. Zarr v3 will be used."
            warnings.warn(msg, stacklevel=1)
        if zarr_json_bytes is None and zarray_bytes is None:
            raise FileNotFoundError(store_path)
        # set zarr_format based on which keys were found
        if zarr_json_bytes is not None:
            zarr_format = 3
        else:
            zarr_format = 2
    else:
        raise MetadataValidationError("zarr_format", "2, 3, or None", zarr_format)

    metadata_dict: dict[str, JSON]
    if zarr_format == 2:
        # V2 arrays are comprised of a .zarray and .zattrs objects
        assert zarray_bytes is not None
        metadata_dict = json.loads(zarray_bytes.to_bytes())
        zattrs_dict = json.loads(zattrs_bytes.to_bytes()) if zattrs_bytes is not None else {}
        metadata_dict["attributes"] = zattrs_dict
    else:
        # V3 arrays are comprised of a zarr.json object
        assert zarr_json_bytes is not None
        metadata_dict = json.loads(zarr_json_bytes.to_bytes())

        parse_node_type_array(metadata_dict.get("node_type"))

    return metadata_dict


[docs] @dataclass(frozen=True) class AsyncArray(Generic[T_ArrayMetadata]): """ An asynchronous array class representing a chunked array stored in a Zarr store. Parameters ---------- metadata : ArrayMetadata The metadata of the array. store_path : StorePath The path to the Zarr store. config : ArrayConfig, optional The runtime configuration of the array, by default None. Attributes ---------- metadata : ArrayMetadata The metadata of the array. store_path : StorePath The path to the Zarr store. codec_pipeline : CodecPipeline The codec pipeline used for encoding and decoding chunks. _config : ArrayConfig The runtime configuration of the array. """ metadata: T_ArrayMetadata store_path: StorePath codec_pipeline: CodecPipeline = field(init=False) _config: ArrayConfig @overload def __init__( self: AsyncArray[ArrayV2Metadata], metadata: ArrayV2Metadata | ArrayV2MetadataDict, store_path: StorePath, config: ArrayConfig | None = None, ) -> None: ... @overload def __init__( self: AsyncArray[ArrayV3Metadata], metadata: ArrayV3Metadata | ArrayV3MetadataDict, store_path: StorePath, config: ArrayConfig | None = None, ) -> None: ... def __init__( self, metadata: ArrayMetadata | ArrayMetadataDict, store_path: StorePath, config: ArrayConfig | None = None, ) -> None: if isinstance(metadata, dict): zarr_format = metadata["zarr_format"] # TODO: remove this when we extensively type the dict representation of metadata _metadata = cast(dict[str, JSON], metadata) if zarr_format == 2: metadata = ArrayV2Metadata.from_dict(_metadata) elif zarr_format == 3: metadata = ArrayV3Metadata.from_dict(_metadata) else: raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3") metadata_parsed = parse_array_metadata(metadata) config = ArrayConfig.from_dict({}) if config is None else config object.__setattr__(self, "metadata", metadata_parsed) object.__setattr__(self, "store_path", store_path) object.__setattr__(self, "_config", config) object.__setattr__(self, "codec_pipeline", create_codec_pipeline(metadata=metadata_parsed)) # this overload defines the function signature when zarr_format is 2 @overload @classmethod async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: npt.DTypeLike, zarr_format: Literal[2], fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfig | ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata]: ... # this overload defines the function signature when zarr_format is 3 @overload @classmethod async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: npt.DTypeLike, zarr_format: Literal[3], fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfig | ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV3Metadata]: ... @overload @classmethod async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: npt.DTypeLike, zarr_format: Literal[3] = 3, fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfig | ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV3Metadata]: ... @overload @classmethod async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: npt.DTypeLike, zarr_format: ZarrFormat, fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfig | ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: ...
[docs] @classmethod @deprecated("Use zarr.api.asynchronous.create_array instead.") @_deprecate_positional_args async def create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: npt.DTypeLike, zarr_format: ZarrFormat = 3, fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfig | ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Method to create a new asynchronous array instance. .. deprecated:: 3.0.0 Deprecated in favor of :func:`zarr.api.asynchronous.create_array`. Parameters ---------- store : StoreLike The store where the array will be created. shape : ShapeLike The shape of the array. dtype : npt.DTypeLike The data type of the array. zarr_format : ZarrFormat, optional The Zarr format version (default is 3). fill_value : Any, optional The fill value of the array (default is None). attributes : dict[str, JSON], optional The attributes of the array (default is None). chunk_shape : ChunkCoords, optional The shape of the array's chunks Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. If not specified, default are guessed based on the shape and dtype. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. Default is ``("default", "/")``. codecs : Sequence of Codecs or dicts, optional An iterable of Codec or dict serializations of Codecs. The elements of this collection specify the transformation from array values to stored bytes. Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. If no codecs are provided, default codecs will be used: - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``. - For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``. - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``. These defaults can be changed by modifying the value of ``array.v3_default_filters``, ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. chunks : ShapeLike, optional The shape of the array's chunks. Zarr format 2 only. Zarr format 3 arrays should use ``chunk_shape`` instead. If not specified, default are guessed based on the shape and dtype. dimension_separator : Literal[".", "/"], optional The dimension separator (default is "."). Zarr format 2 only. Zarr format 3 arrays should use ``chunk_key_encoding`` instead. order : Literal["C", "F"], optional The memory of the array (default is "C"). If ``zarr_format`` is 2, this parameter sets the memory order of the array. If `zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. filters : list[dict[str, JSON]], optional Sequence of filters to use to encode chunk data prior to compression. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`. compressor : dict[str, JSON], optional The compressor used to compress the data (default is None). Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``compressor`` is provided, a default compressor will be used: - For numeric arrays, the default is ``ZstdCodec``. - For Unicode strings, the default is ``VLenUTF8Codec``. - For bytes or objects, the default is ``VLenBytesCodec``. These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. overwrite : bool, optional Whether to raise an error if the store already exists (default is False). data : npt.ArrayLike, optional The data to be inserted into the array (default is None). config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. Returns ------- AsyncArray The created asynchronous array instance. """ return await cls._create( store, # v2 and v3 shape=shape, dtype=dtype, zarr_format=zarr_format, fill_value=fill_value, attributes=attributes, # v3 only chunk_shape=chunk_shape, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, # v2 only chunks=chunks, dimension_separator=dimension_separator, order=order, filters=filters, compressor=compressor, # runtime overwrite=overwrite, data=data, config=config, )
@classmethod async def _create( cls, store: StoreLike, *, # v2 and v3 shape: ShapeLike, dtype: npt.DTypeLike, zarr_format: ZarrFormat = 3, fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime overwrite: bool = False, data: npt.ArrayLike | None = None, config: ArrayConfig | ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Method to create a new asynchronous array instance. See :func:`AsyncArray.create` for more details. Deprecated in favor of :func:`zarr.api.asynchronous.create_array`. """ store_path = await make_store_path(store) dtype_parsed = parse_dtype(dtype, zarr_format) shape = parse_shapelike(shape) if chunks is not None and chunk_shape is not None: raise ValueError("Only one of chunk_shape or chunks can be provided.") if chunks: _chunks = normalize_chunks(chunks, shape, dtype_parsed.itemsize) else: _chunks = normalize_chunks(chunk_shape, shape, dtype_parsed.itemsize) config_parsed = parse_array_config(config) result: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata] if zarr_format == 3: if dimension_separator is not None: raise ValueError( "dimension_separator cannot be used for arrays with zarr_format 3. Use chunk_key_encoding instead." ) if filters is not None: raise ValueError( "filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead." ) if compressor is not None: raise ValueError( "compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead." ) if order is not None: _warn_order_kwarg() result = await cls._create_v3( store_path, shape=shape, dtype=dtype_parsed, chunk_shape=_chunks, fill_value=fill_value, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, attributes=attributes, overwrite=overwrite, config=config_parsed, ) elif zarr_format == 2: if codecs is not None: raise ValueError( "codecs cannot be used for arrays with zarr_format 2. Use filters and compressor instead." ) if chunk_key_encoding is not None: raise ValueError( "chunk_key_encoding cannot be used for arrays with zarr_format 2. Use dimension_separator instead." ) if dimension_names is not None: raise ValueError("dimension_names cannot be used for arrays with zarr_format 2.") if order is None: order_parsed = parse_order(zarr_config.get("array.order")) else: order_parsed = order result = await cls._create_v2( store_path, shape=shape, dtype=dtype_parsed, chunks=_chunks, dimension_separator=dimension_separator, fill_value=fill_value, order=order_parsed, config=config_parsed, filters=filters, compressor=compressor, attributes=attributes, overwrite=overwrite, ) else: raise ValueError(f"Insupported zarr_format. Got: {zarr_format}") if data is not None: # insert user-provided data await result.setitem(..., data) return result @classmethod async def _create_v3( cls, store_path: StorePath, *, shape: ShapeLike, dtype: np.dtype[Any], chunk_shape: ChunkCoords, config: ArrayConfig, fill_value: Any | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, attributes: dict[str, JSON] | None = None, overwrite: bool = False, ) -> AsyncArray[ArrayV3Metadata]: if overwrite: if store_path.store.supports_deletes: await store_path.delete_dir() else: await ensure_no_existing_node(store_path, zarr_format=3) else: await ensure_no_existing_node(store_path, zarr_format=3) shape = parse_shapelike(shape) codecs = list(codecs) if codecs is not None else _get_default_codecs(np.dtype(dtype)) if chunk_key_encoding is None: chunk_key_encoding = ("default", "/") assert chunk_key_encoding is not None if isinstance(chunk_key_encoding, tuple): chunk_key_encoding = ( V2ChunkKeyEncoding(separator=chunk_key_encoding[1]) if chunk_key_encoding[0] == "v2" else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) ) if dtype.kind in "UTS": warn( f"The dtype `{dtype}` is currently not part in the Zarr format 3 specification. It " "may not be supported by other zarr implementations and may change in the future.", category=UserWarning, stacklevel=2, ) metadata = ArrayV3Metadata( shape=shape, data_type=dtype, chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), chunk_key_encoding=chunk_key_encoding, fill_value=fill_value, codecs=codecs, dimension_names=tuple(dimension_names) if dimension_names else None, attributes=attributes or {}, ) array = cls(metadata=metadata, store_path=store_path, config=config) await array._save_metadata(metadata, ensure_parents=True) return array @classmethod async def _create_v2( cls, store_path: StorePath, *, shape: ChunkCoords, dtype: np.dtype[Any], chunks: ChunkCoords, order: MemoryOrder, config: ArrayConfig, dimension_separator: Literal[".", "/"] | None = None, fill_value: float | None = None, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None, attributes: dict[str, JSON] | None = None, overwrite: bool = False, ) -> AsyncArray[ArrayV2Metadata]: if overwrite: if store_path.store.supports_deletes: await store_path.delete_dir() else: await ensure_no_existing_node(store_path, zarr_format=2) else: await ensure_no_existing_node(store_path, zarr_format=2) if dimension_separator is None: dimension_separator = "." dtype = parse_dtype(dtype, zarr_format=2) # inject VLenUTF8 for str dtype if not already present if np.issubdtype(dtype, np.str_): filters = filters or [] from numcodecs.vlen import VLenUTF8 if not any(isinstance(x, VLenUTF8) or x["id"] == "vlen-utf8" for x in filters): filters = list(filters) + [VLenUTF8()] metadata = ArrayV2Metadata( shape=shape, dtype=np.dtype(dtype), chunks=chunks, order=order, dimension_separator=dimension_separator, fill_value=fill_value, compressor=compressor, filters=filters, attributes=attributes, ) array = cls(metadata=metadata, store_path=store_path, config=config) await array._save_metadata(metadata, ensure_parents=True) return array
[docs] @classmethod def from_dict( cls, store_path: StorePath, data: dict[str, JSON], ) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: """ Create a Zarr array from a dictionary, with support for both Zarr format 2 and 3 metadata. Parameters ---------- store_path : StorePath The path within the store where the array should be created. data : dict A dictionary representing the array data. This dictionary should include necessary metadata for the array, such as shape, dtype, and other attributes. The format of the metadata will determine whether a Zarr format 2 or 3 array is created. Returns ------- AsyncArray[ArrayV3Metadata] or AsyncArray[ArrayV2Metadata] The created Zarr array, either using Zarr format 2 or 3 metadata based on the provided data. Raises ------ ValueError If the dictionary data is invalid or incompatible with either Zarr format 2 or 3 array creation. """ metadata = parse_array_metadata(data) return cls(metadata=metadata, store_path=store_path)
[docs] @classmethod async def open( cls, store: StoreLike, zarr_format: ZarrFormat | None = 3, ) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: """ Async method to open an existing Zarr array from a given store. Parameters ---------- store : StoreLike The store containing the Zarr array. zarr_format : ZarrFormat | None, optional The Zarr format version (default is 3). Returns ------- AsyncArray The opened Zarr array. Examples -------- >>> import zarr >>> store = zarr.storage.MemoryStore(mode='w') >>> async_arr = await AsyncArray.open(store) # doctest: +ELLIPSIS <AsyncArray memory://... shape=(100, 100) dtype=int32> """ store_path = await make_store_path(store) metadata_dict = await get_array_metadata(store_path, zarr_format=zarr_format) # TODO: remove this cast when we have better type hints _metadata_dict = cast(ArrayV3MetadataDict, metadata_dict) return cls(store_path=store_path, metadata=_metadata_dict)
@property def store(self) -> Store: return self.store_path.store @property def ndim(self) -> int: """Returns the number of dimensions in the Array. Returns ------- int The number of dimensions in the Array. """ return len(self.metadata.shape) @property def shape(self) -> ChunkCoords: """Returns the shape of the Array. Returns ------- tuple The shape of the Array. """ return self.metadata.shape @property def chunks(self) -> ChunkCoords: """Returns the chunk shape of the Array. If sharding is used the inner chunk shape is returned. Only defined for arrays using using `RegularChunkGrid`. If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. Returns ------- ChunkCoords: The chunk shape of the Array. """ return self.metadata.chunks @property def shards(self) -> ChunkCoords | None: """Returns the shard shape of the Array. Returns None if sharding is not used. Only defined for arrays using using `RegularChunkGrid`. If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. Returns ------- ChunkCoords: The shard shape of the Array. """ return self.metadata.shards @property def size(self) -> int: """Returns the total number of elements in the array Returns ------- int Total number of elements in the array """ return np.prod(self.metadata.shape).item() @property def filters(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...]: """ Filters that are applied to each chunk of the array, in order, before serializing that chunk to bytes. """ if self.metadata.zarr_format == 2: filters = self.metadata.filters if filters is None: return () return filters return tuple( codec for codec in self.metadata.inner_codecs if isinstance(codec, ArrayArrayCodec) ) @property def serializer(self) -> ArrayBytesCodec | None: """ Array-to-bytes codec to use for serializing the chunks into bytes. """ if self.metadata.zarr_format == 2: return None return next( codec for codec in self.metadata.inner_codecs if isinstance(codec, ArrayBytesCodec) ) @property @deprecated("Use AsyncArray.compressors instead.") def compressor(self) -> numcodecs.abc.Codec | None: """ Compressor that is applied to each chunk of the array. .. deprecated:: 3.0.0 `array.compressor` is deprecated and will be removed in a future release. Use `array.compressors` instead. """ if self.metadata.zarr_format == 2: return self.metadata.compressor raise TypeError("`compressor` is not available for Zarr format 3 arrays.") @property def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...]: """ Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. """ if self.metadata.zarr_format == 2: if self.metadata.compressor is not None: return (self.metadata.compressor,) return () return tuple( codec for codec in self.metadata.inner_codecs if isinstance(codec, BytesBytesCodec) ) @property def dtype(self) -> np.dtype[Any]: """Returns the data type of the array. Returns ------- np.dtype Data type of the array """ return self.metadata.dtype @property def order(self) -> MemoryOrder: """Returns the memory order of the array. Returns ------- bool Memory order of the array """ return self._config.order @property def attrs(self) -> dict[str, JSON]: """Returns the attributes of the array. Returns ------- dict Attributes of the array """ return self.metadata.attributes @property def read_only(self) -> bool: """Returns True if the array is read-only. Returns ------- bool True if the array is read-only """ # Backwards compatibility for 2.x return self.store_path.read_only @property def path(self) -> str: """Storage path. Returns ------- str The path to the array in the Zarr store. """ return self.store_path.path @property def name(self) -> str: """Array name following h5py convention. Returns ------- str The name of the array. """ # follow h5py convention: add leading slash name = self.path if not name.startswith("/"): name = "/" + name return name @property def basename(self) -> str: """Final component of name. Returns ------- str The basename or final component of the array name. """ return self.name.split("/")[-1] @property def cdata_shape(self) -> ChunkCoords: """ The shape of the chunk grid for this array. Returns ------- Tuple[int] The shape of the chunk grid for this array. """ return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=False))) @property def nchunks(self) -> int: """ The number of chunks in the stored representation of this array. Returns ------- int The total number of chunks in the array. """ return product(self.cdata_shape)
[docs] async def nchunks_initialized(self) -> int: """ Calculate the number of chunks that have been initialized, i.e. the number of chunks that have been persisted to the storage backend. Returns ------- nchunks_initialized : int The number of chunks that have been initialized. Notes ----- On :class:`AsyncArray` this is an asynchronous method, unlike the (synchronous) property :attr:`Array.nchunks_initialized`. Examples -------- >>> arr = await zarr.api.asynchronous.create(shape=(10,), chunks=(2,)) >>> await arr.nchunks_initialized() 0 >>> await arr.setitem(slice(5), 1) >>> await arr.nchunks_initialized() 3 """ return len(await chunks_initialized(self))
[docs] async def nbytes_stored(self) -> int: return await self.store_path.store.getsize_prefix(self.store_path.path)
def _iter_chunk_coords( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[ChunkCoords]: """ Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. The default behavior is to start at the origin of the grid coordinate space. If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region ranging from `[origin, origin selection_shape]`, where the upper bound is exclusive as per python indexing conventions. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ chunk_coords: ChunkCoords The coordinates of each chunk in the selection. """ return _iter_grid(self.cdata_shape, origin=origin, selection_shape=selection_shape) def _iter_chunk_keys( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: """ Iterate over the storage keys of each chunk, relative to an optional origin, and optionally limited to a contiguous region in chunk grid coordinates. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ key: str The storage key of each chunk in the selection. """ # Iterate over the coordinates of chunks in chunk grid space. for k in self._iter_chunk_coords(origin=origin, selection_shape=selection_shape): # Encode the chunk key from the chunk coordinates. yield self.metadata.encode_chunk_key(k) def _iter_chunk_regions( self, *, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[slice, ...]]: """ Iterate over the regions spanned by each chunk. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ for cgrid_position in self._iter_chunk_coords( origin=origin, selection_shape=selection_shape ): out: tuple[slice, ...] = () for c_pos, c_shape in zip(cgrid_position, self.chunks, strict=False): start = c_pos * c_shape stop = start + c_shape out += (slice(start, stop, 1),) yield out @property def nbytes(self) -> int: """ The total number of bytes that can be stored in the chunks of this array. Notes ----- This value is calculated by multiplying the number of elements in the array and the size of each element, the latter of which is determined by the dtype of the array. For this reason, ``nbytes`` will likely be inaccurate for arrays with variable-length dtypes. It is not possible to determine the size of an array with variable-length elements from the shape and dtype alone. """ return self.size * self.dtype.itemsize async def _get_selection( self, indexer: Indexer, *, prototype: BufferPrototype, out: NDBuffer | None = None, fields: Fields | None = None, ) -> NDArrayLike: # check fields are sensible out_dtype = check_fields(fields, self.dtype) # setup output buffer if out is not None: if isinstance(out, NDBuffer): out_buffer = out else: raise TypeError(f"out argument needs to be an NDBuffer. Got {type(out)!r}") if out_buffer.shape != indexer.shape: raise ValueError( f"shape of out argument doesn't match. Expected {indexer.shape}, got {out.shape}" ) else: out_buffer = prototype.nd_buffer.create( shape=indexer.shape, dtype=out_dtype, order=self._config.order, fill_value=self.metadata.fill_value, ) if product(indexer.shape) > 0: # need to use the order from the metadata for v2 _config = self._config if self.metadata.zarr_format == 2: _config = replace(_config, order=self.metadata.order) # reading chunks and decoding them await self.codec_pipeline.read( [ ( self.store_path / self.metadata.encode_chunk_key(chunk_coords), self.metadata.get_chunk_spec(chunk_coords, _config, prototype=prototype), chunk_selection, out_selection, ) for chunk_coords, chunk_selection, out_selection in indexer ], out_buffer, drop_axes=indexer.drop_axes, ) return out_buffer.as_ndarray_like()
[docs] async def getitem( self, selection: BasicSelection, *, prototype: BufferPrototype | None = None, ) -> NDArrayLike: """ Asynchronous function that retrieves a subset of the array's data based on the provided selection. Parameters ---------- selection : BasicSelection A selection object specifying the subset of data to retrieve. prototype : BufferPrototype, optional A buffer prototype to use for the retrieved data (default is None). Returns ------- NDArrayLike The retrieved subset of the array's data. Examples -------- >>> import zarr >>> store = zarr.storage.MemoryStore(mode='w') >>> async_arr = await zarr.api.asynchronous.create_array( ... store=store, ... shape=(100,100), ... chunks=(10,10), ... dtype='i4', ... fill_value=0) <AsyncArray memory://... shape=(100, 100) dtype=int32> >>> await async_arr.getitem((0,1)) # doctest: +ELLIPSIS array(0, dtype=int32) """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer( selection, shape=self.metadata.shape, chunk_grid=self.metadata.chunk_grid, ) return await self._get_selection(indexer, prototype=prototype)
async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = False) -> None: """ Asynchronously save the array metadata. """ to_save = metadata.to_buffer_dict(default_buffer_prototype()) awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()] if ensure_parents: # To enable zarr.create(store, path="a/b/c"), we need to create all the intermediate groups. parents = _build_parents(self) for parent in parents: awaitables.extend( [ (parent.store_path / key).set_if_not_exists(value) for key, value in parent.metadata.to_buffer_dict( default_buffer_prototype() ).items() ] ) await gather(*awaitables) async def _set_selection( self, indexer: Indexer, value: npt.ArrayLike, *, prototype: BufferPrototype, fields: Fields | None = None, ) -> None: # check fields are sensible check_fields(fields, self.dtype) fields = check_no_multi_fields(fields) # check value shape if np.isscalar(value): array_like = prototype.buffer.create_zero_length().as_array_like() if isinstance(array_like, np._typing._SupportsArrayFunc): # TODO: need to handle array types that don't support __array_function__ # like PyTorch and JAX array_like_ = cast(np._typing._SupportsArrayFunc, array_like) value = np.asanyarray(value, dtype=self.metadata.dtype, like=array_like_) else: if not hasattr(value, "shape"): value = np.asarray(value, self.metadata.dtype) # assert ( # value.shape == indexer.shape # ), f"shape of value doesn't match indexer shape. Expected {indexer.shape}, got {value.shape}" if not hasattr(value, "dtype") or value.dtype.name != self.metadata.dtype.name: if hasattr(value, "astype"): # Handle things that are already NDArrayLike more efficiently value = value.astype(dtype=self.metadata.dtype, order="A") else: value = np.array(value, dtype=self.metadata.dtype, order="A") value = cast(NDArrayLike, value) # We accept any ndarray like object from the user and convert it # to a NDBuffer (or subclass). From this point onwards, we only pass # Buffer and NDBuffer between components. value_buffer = prototype.nd_buffer.from_ndarray_like(value) # need to use the order from the metadata for v2 _config = self._config if self.metadata.zarr_format == 2: _config = replace(_config, order=self.metadata.order) # merging with existing data and encoding chunks await self.codec_pipeline.write( [ ( self.store_path / self.metadata.encode_chunk_key(chunk_coords), self.metadata.get_chunk_spec(chunk_coords, _config, prototype), chunk_selection, out_selection, ) for chunk_coords, chunk_selection, out_selection in indexer ], value_buffer, drop_axes=indexer.drop_axes, )
[docs] async def setitem( self, selection: BasicSelection, value: npt.ArrayLike, prototype: BufferPrototype | None = None, ) -> None: """ Asynchronously set values in the array using basic indexing. Parameters ---------- selection : BasicSelection The selection defining the region of the array to set. value : numpy.typing.ArrayLike The values to be written into the selected region of the array. prototype : BufferPrototype or None, optional A prototype buffer that defines the structure and properties of the array chunks being modified. If None, the default buffer prototype is used. Default is None. Returns ------- None This method does not return any value. Raises ------ IndexError If the selection is out of bounds for the array. ValueError If the values are not compatible with the array's dtype or shape. Notes ----- - This method is asynchronous and should be awaited. - Supports basic indexing, where the selection is contiguous and does not involve advanced indexing. """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer( selection, shape=self.metadata.shape, chunk_grid=self.metadata.chunk_grid, ) return await self._set_selection(indexer, value, prototype=prototype)
[docs] async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: """ Asynchronously resize the array to a new shape. Parameters ---------- new_shape : ChunkCoords The desired new shape of the array. delete_outside_chunks : bool, optional If True (default), chunks that fall outside the new shape will be deleted. If False, the data in those chunks will be preserved. Returns ------- AsyncArray The resized array. Raises ------ ValueError If the new shape is incompatible with the current array's chunking configuration. Notes ----- - This method is asynchronous and should be awaited. """ new_shape = parse_shapelike(new_shape) assert len(new_shape) == len(self.metadata.shape) new_metadata = self.metadata.update_shape(new_shape) if delete_outside_chunks: # Remove all chunks outside of the new shape old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape)) new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape)) async def _delete_key(key: str) -> None: await (self.store_path / key).delete() await concurrent_map( [ (self.metadata.encode_chunk_key(chunk_coords),) for chunk_coords in old_chunk_coords.difference(new_chunk_coords) ], _delete_key, zarr_config.get("async.concurrency"), ) # Write new metadata await self._save_metadata(new_metadata) # Update metadata (in place) object.__setattr__(self, "metadata", new_metadata)
[docs] async def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords: """Append `data` to `axis`. Parameters ---------- data : array-like Data to be appended. axis : int Axis along which to append. Returns ------- new_shape : tuple Notes ----- The size of all dimensions other than `axis` must match between this array and `data`. """ # ensure data is array-like if not hasattr(data, "shape"): data = np.asanyarray(data) self_shape_preserved = tuple(s for i, s in enumerate(self.shape) if i != axis) data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis) if self_shape_preserved != data_shape_preserved: raise ValueError( f"shape of data to append is not compatible with the array. " f"The shape of the data is ({data_shape_preserved})" f"and the shape of the array is ({self_shape_preserved})." "All dimensions must match except for the dimension being " "appended." ) # remember old shape old_shape = self.shape # determine new shape new_shape = tuple( self.shape[i] if i != axis else self.shape[i] + data.shape[i] for i in range(len(self.shape)) ) # resize await self.resize(new_shape) # store data append_selection = tuple( slice(None) if i != axis else slice(old_shape[i], new_shape[i]) for i in range(len(self.shape)) ) await self.setitem(append_selection, data) return new_shape
[docs] async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: """ Asynchronously update the array's attributes. Parameters ---------- new_attributes : dict of str to JSON A dictionary of new attributes to update or add to the array. The keys represent attribute names, and the values must be JSON-compatible. Returns ------- AsyncArray The array with the updated attributes. Raises ------ ValueError If the attributes are invalid or incompatible with the array's metadata. Notes ----- - This method is asynchronous and should be awaited. - The updated attributes will be merged with existing attributes, and any conflicts will be overwritten by the new values. """ # metadata.attributes is "frozen" so we simply clear and update the dict self.metadata.attributes.clear() self.metadata.attributes.update(new_attributes) # Write new metadata await self._save_metadata(self.metadata) return self
def __repr__(self) -> str: return f"<AsyncArray {self.store_path} shape={self.shape} dtype={self.dtype}>" @property def info(self) -> Any: """ Return the statically known information for an array. Returns ------- ArrayInfo See Also -------- AsyncArray.info_complete All information about a group, including dynamic information like the number of bytes and chunks written. Examples -------- >>> arr = await zarr.api.asynchronous.create( ... path="array", shape=(3, 4, 5), chunks=(2, 2, 2)) ... ) >>> arr.info Type : Array Zarr format : 3 Data type : DataType.float64 Shape : (3, 4, 5) Chunk shape : (2, 2, 2) Order : C Read-only : False Store type : MemoryStore Codecs : [{'endian': <Endian.little: 'little'>}] No. bytes : 480 """ return self._info()
[docs] async def info_complete(self) -> Any: """ Return all the information for an array, including dynamic information like a storage size. In addition to the static information, this provides - The count of chunks initialized - The sum of the bytes written Returns ------- ArrayInfo See Also -------- AsyncArray.info A property giving just the statically known information about an array. """ return self._info( await self.nchunks_initialized(), await self.store_path.store.getsize_prefix(self.store_path.path), )
def _info( self, count_chunks_initialized: int | None = None, count_bytes_stored: int | None = None ) -> Any: _data_type: np.dtype[Any] | DataType if isinstance(self.metadata, ArrayV2Metadata): _data_type = self.metadata.dtype else: _data_type = self.metadata.data_type return ArrayInfo( _zarr_format=self.metadata.zarr_format, _data_type=_data_type, _shape=self.shape, _order=self.order, _shard_shape=self.shards, _chunk_shape=self.chunks, _read_only=self.read_only, _compressors=self.compressors, _filters=self.filters, _serializer=self.serializer, _store_type=type(self.store_path.store).__name__, _count_bytes=self.nbytes, _count_bytes_stored=count_bytes_stored, _count_chunks_initialized=count_chunks_initialized, )
# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed
[docs] @dataclass(frozen=False) class Array: """Instantiate an array from an initialized store.""" _async_array: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]
[docs] @classmethod @deprecated("Use zarr.create_array instead.") @_deprecate_positional_args def create( cls, store: StoreLike, *, # v2 and v3 shape: ChunkCoords, dtype: npt.DTypeLike, zarr_format: ZarrFormat = 3, fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ChunkCoords | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only chunks: ChunkCoords | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime overwrite: bool = False, config: ArrayConfig | ArrayConfigLike | None = None, ) -> Array: """Creates a new Array instance from an initialized store. .. deprecated:: 3.0.0 Deprecated in favor of :func:`zarr.create_array`. Parameters ---------- store : StoreLike The array store that has already been initialized. shape : ChunkCoords The shape of the array. dtype : npt.DTypeLike The data type of the array. chunk_shape : ChunkCoords, optional The shape of the Array's chunks. Zarr format 3 only. Zarr format 2 arrays should use `chunks` instead. If not specified, default are guessed based on the shape and dtype. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. Zarr format 3 only. Zarr format 2 arrays should use `dimension_separator` instead. Default is ``("default", "/")``. codecs : Sequence of Codecs or dicts, optional An iterable of Codec or dict serializations of Codecs. The elements of this collection specify the transformation from array values to stored bytes. Zarr format 3 only. Zarr format 2 arrays should use ``filters`` and ``compressor`` instead. If no codecs are provided, default codecs will be used: - For numeric arrays, the default is ``BytesCodec`` and ``ZstdCodec``. - For Unicode strings, the default is ``VLenUTF8Codec`` and ``ZstdCodec``. - For bytes or objects, the default is ``VLenBytesCodec`` and ``ZstdCodec``. These defaults can be changed by modifying the value of ``array.v3_default_filters``, ``array.v3_default_serializer`` and ``array.v3_default_compressors`` in :mod:`zarr.core.config`. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. chunks : ChunkCoords, optional The shape of the array's chunks. Zarr format 2 only. Zarr format 3 arrays should use ``chunk_shape`` instead. If not specified, default are guessed based on the shape and dtype. dimension_separator : Literal[".", "/"], optional The dimension separator (default is "."). Zarr format 2 only. Zarr format 3 arrays should use ``chunk_key_encoding`` instead. order : Literal["C", "F"], optional The memory of the array (default is "C"). If ``zarr_format`` is 2, this parameter sets the memory order of the array. If `zarr_format`` is 3, then this parameter is deprecated, because memory order is a runtime parameter for Zarr 3 arrays. The recommended way to specify the memory order for Zarr 3 arrays is via the ``config`` parameter, e.g. ``{'order': 'C'}``. filters : list[dict[str, JSON]], optional Sequence of filters to use to encode chunk data prior to compression. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`. compressor : dict[str, JSON], optional Primary compressor to compress chunk data. Zarr format 2 only. Zarr format 3 arrays should use ``codecs`` instead. If no ``compressor`` is provided, a default compressor will be used: - For numeric arrays, the default is ``ZstdCodec``. - For Unicode strings, the default is ``VLenUTF8Codec``. - For bytes or objects, the default is ``VLenBytesCodec``. These defaults can be changed by modifying the value of ``array.v2_default_compressor`` in :mod:`zarr.core.config`. overwrite : bool, optional Whether to raise an error if the store already exists (default is False). Returns ------- Array Array created from the store. """ return cls._create( store, # v2 and v3 shape=shape, dtype=dtype, zarr_format=zarr_format, attributes=attributes, fill_value=fill_value, # v3 only chunk_shape=chunk_shape, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, # v2 only chunks=chunks, dimension_separator=dimension_separator, order=order, filters=filters, compressor=compressor, # runtime overwrite=overwrite, config=config, )
@classmethod def _create( cls, store: StoreLike, *, # v2 and v3 shape: ChunkCoords, dtype: npt.DTypeLike, zarr_format: ZarrFormat = 3, fill_value: Any | None = None, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ChunkCoords | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None ) = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: Iterable[str] | None = None, # v2 only chunks: ChunkCoords | None = None, dimension_separator: Literal[".", "/"] | None = None, order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime overwrite: bool = False, config: ArrayConfig | ArrayConfigLike | None = None, ) -> Array: """Creates a new Array instance from an initialized store. See :func:`Array.create` for more details. Deprecated in favor of :func:`zarr.create_array`. """ async_array = sync( AsyncArray._create( store=store, shape=shape, dtype=dtype, zarr_format=zarr_format, attributes=attributes, fill_value=fill_value, chunk_shape=chunk_shape, chunk_key_encoding=chunk_key_encoding, codecs=codecs, dimension_names=dimension_names, chunks=chunks, dimension_separator=dimension_separator, order=order, filters=filters, compressor=compressor, overwrite=overwrite, config=config, ), ) return cls(async_array)
[docs] @classmethod def from_dict( cls, store_path: StorePath, data: dict[str, JSON], ) -> Array: """ Create a Zarr array from a dictionary. Parameters ---------- store_path : StorePath The path within the store where the array should be created. data : dict A dictionary representing the array data. This dictionary should include necessary metadata for the array, such as shape, dtype, fill value, and attributes. Returns ------- Array The created Zarr array. Raises ------ ValueError If the dictionary data is invalid or missing required fields for array creation. """ async_array = AsyncArray.from_dict(store_path=store_path, data=data) return cls(async_array)
[docs] @classmethod def open( cls, store: StoreLike, ) -> Array: """Opens an existing Array from a store. Parameters ---------- store : Store Store containing the Array. Returns ------- Array Array opened from the store. """ async_array = sync(AsyncArray.open(store)) return cls(async_array)
@property def store(self) -> Store: return self._async_array.store @property def ndim(self) -> int: """Returns the number of dimensions in the array. Returns ------- int The number of dimensions in the array. """ return self._async_array.ndim @property def shape(self) -> ChunkCoords: """Returns the shape of the array. Returns ------- ChunkCoords The shape of the array. """ return self._async_array.shape @shape.setter def shape(self, value: ChunkCoords) -> None: """Sets the shape of the array by calling resize.""" self.resize(value) @property def chunks(self) -> ChunkCoords: """Returns a tuple of integers describing the length of each dimension of a chunk of the array. If sharding is used the inner chunk shape is returned. Only defined for arrays using using `RegularChunkGrid`. If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. Returns ------- tuple A tuple of integers representing the length of each dimension of a chunk. """ return self._async_array.chunks @property def shards(self) -> ChunkCoords | None: """Returns a tuple of integers describing the length of each dimension of a shard of the array. Returns None if sharding is not used. Only defined for arrays using using `RegularChunkGrid`. If array doesn't use `RegularChunkGrid`, `NotImplementedError` is raised. Returns ------- tuple | None A tuple of integers representing the length of each dimension of a shard or None if sharding is not used. """ return self._async_array.shards @property def size(self) -> int: """Returns the total number of elements in the array. Returns ------- int Total number of elements in the array. """ return self._async_array.size @property def dtype(self) -> np.dtype[Any]: """Returns the NumPy data type. Returns ------- np.dtype The NumPy data type. """ return self._async_array.dtype @property def attrs(self) -> Attributes: """Returns a MutableMapping containing user-defined attributes. Returns ------- attrs : MutableMapping A MutableMapping object containing user-defined attributes. Notes ----- Note that attribute values must be JSON serializable. """ return Attributes(self) @property def path(self) -> str: """Storage path.""" return self._async_array.path @property def name(self) -> str: """Array name following h5py convention.""" return self._async_array.name @property def basename(self) -> str: """Final component of name.""" return self._async_array.basename @property def metadata(self) -> ArrayMetadata: return self._async_array.metadata @property def store_path(self) -> StorePath: return self._async_array.store_path @property def order(self) -> MemoryOrder: return self._async_array.order @property def read_only(self) -> bool: return self._async_array.read_only @property def fill_value(self) -> Any: return self.metadata.fill_value @property def filters(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[ArrayArrayCodec, ...]: """ Filters that are applied to each chunk of the array, in order, before serializing that chunk to bytes. """ return self._async_array.filters @property def serializer(self) -> None | ArrayBytesCodec: """ Array-to-bytes codec to use for serializing the chunks into bytes. """ return self._async_array.serializer @property @deprecated("Use Array.compressors instead.") def compressor(self) -> numcodecs.abc.Codec | None: """ Compressor that is applied to each chunk of the array. .. deprecated:: 3.0.0 `array.compressor` is deprecated and will be removed in a future release. Use `array.compressors` instead. """ return self._async_array.compressor @property def compressors(self) -> tuple[numcodecs.abc.Codec, ...] | tuple[BytesBytesCodec, ...]: """ Compressors that are applied to each chunk of the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. """ return self._async_array.compressors @property def cdata_shape(self) -> ChunkCoords: """ The shape of the chunk grid for this array. """ return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=False))) @property def nchunks(self) -> int: """ The number of chunks in the stored representation of this array. """ return self._async_array.nchunks def _iter_chunk_coords( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[ChunkCoords]: """ Create an iterator over the coordinates of chunks in chunk grid space. If the `origin` keyword is used, iteration will start at the chunk index specified by `origin`. The default behavior is to start at the origin of the grid coordinate space. If the `selection_shape` keyword is used, iteration will be bounded over a contiguous region ranging from `[origin, origin + selection_shape]`, where the upper bound is exclusive as per python indexing conventions. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ chunk_coords: ChunkCoords The coordinates of each chunk in the selection. """ yield from self._async_array._iter_chunk_coords( origin=origin, selection_shape=selection_shape ) @property def nbytes(self) -> int: """ The total number of bytes that can be stored in the chunks of this array. Notes ----- This value is calculated by multiplying the number of elements in the array and the size of each element, the latter of which is determined by the dtype of the array. For this reason, ``nbytes`` will likely be inaccurate for arrays with variable-length dtypes. It is not possible to determine the size of an array with variable-length elements from the shape and dtype alone. """ return self._async_array.nbytes @property def nchunks_initialized(self) -> int: """ Calculate the number of chunks that have been initialized, i.e. the number of chunks that have been persisted to the storage backend. Returns ------- nchunks_initialized : int The number of chunks that have been initialized. Notes ----- On :class:`Array` this is a (synchronous) property, unlike asynchronous function :meth:`AsyncArray.nchunks_initialized`. Examples -------- >>> arr = await zarr.create(shape=(10,), chunks=(2,)) >>> arr.nchunks_initialized 0 >>> arr[:5] = 1 >>> arr.nchunks_initialized 3 """ return sync(self._async_array.nchunks_initialized())
[docs] def nbytes_stored(self) -> int: """ Determine the size, in bytes, of the array actually written to the store. Returns ------- size : int """ return sync(self._async_array.nbytes_stored())
def _iter_chunk_keys( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[str]: """ Iterate over the storage keys of each chunk, relative to an optional origin, and optionally limited to a contiguous region in chunk grid coordinates. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ key: str The storage key of each chunk in the selection. """ yield from self._async_array._iter_chunk_keys( origin=origin, selection_shape=selection_shape ) def _iter_chunk_regions( self, origin: Sequence[int] | None = None, selection_shape: Sequence[int] | None = None ) -> Iterator[tuple[slice, ...]]: """ Iterate over the regions spanned by each chunk. Parameters ---------- origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields ------ region: tuple[slice, ...] A tuple of slice objects representing the region spanned by each chunk in the selection. """ yield from self._async_array._iter_chunk_regions( origin=origin, selection_shape=selection_shape ) def __array__( self, dtype: npt.DTypeLike | None = None, copy: bool | None = None ) -> NDArrayLike: """ This method is used by numpy when converting zarr.Array into a numpy array. For more information, see https://numpy.org/devdocs/user/basics.interoperability.html#the-array-method """ if copy is False: msg = "`copy=False` is not supported. This method always creates a copy." raise ValueError(msg) arr_np = self[...] if dtype is not None: arr_np = arr_np.astype(dtype) return arr_np def __getitem__(self, selection: Selection) -> NDArrayLike: """Retrieve data for an item or region of the array. Parameters ---------- selection : tuple An integer index or slice or tuple of int/slice objects specifying the requested item or region for each dimension of the array. Returns ------- NDArrayLike An array-like containing the data for the requested region. Examples -------- Setup a 1-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(100, dtype="uint16") >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(10,), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve a single item:: >>> z[5] 5 Retrieve a region via slicing:: >>> z[:5] array([0, 1, 2, 3, 4]) >>> z[-5:] array([95, 96, 97, 98, 99]) >>> z[5:10] array([5, 6, 7, 8, 9]) >>> z[5:10:2] array([5, 7, 9]) >>> z[::2] array([ 0, 2, 4, ..., 94, 96, 98]) Load the entire array into memory:: >>> z[...] array([ 0, 1, 2, ..., 97, 98, 99]) Setup a 2-dimensional array:: >>> data = np.arange(100, dtype="uint16").reshape(10, 10) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(10, 10), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve an item:: >>> z[2, 2] 22 Retrieve a region via slicing:: >>> z[1:3, 1:3] array([[11, 12], [21, 22]]) >>> z[1:3, :] array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) >>> z[:, 1:3] array([[ 1, 2], [11, 12], [21, 22], [31, 32], [41, 42], [51, 52], [61, 62], [71, 72], [81, 82], [91, 92]]) >>> z[0:5:2, 0:5:2] array([[ 0, 2, 4], [20, 22, 24], [40, 42, 44]]) >>> z[::2, ::2] array([[ 0, 2, 4, 6, 8], [20, 22, 24, 26, 28], [40, 42, 44, 46, 48], [60, 62, 64, 66, 68], [80, 82, 84, 86, 88]]) Load the entire array into memory:: >>> z[...] array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59], [60, 61, 62, 63, 64, 65, 66, 67, 68, 69], [70, 71, 72, 73, 74, 75, 76, 77, 78, 79], [80, 81, 82, 83, 84, 85, 86, 87, 88, 89], [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]) Notes ----- Slices with step > 1 are supported, but slices with negative step are not. For arrays with a structured dtype, see Zarr format 2 for examples of how to use fields Currently the implementation for __getitem__ is provided by :func:`vindex` if the indexing is pure fancy indexing (ie a broadcast-compatible tuple of integer array indices), or by :func:`set_basic_selection` otherwise. Effectively, this means that the following indexing modes are supported: - integer indexing - slice indexing - mixed slice and integer indexing - boolean indexing - fancy indexing (vectorized list of integers) For specific indexing options including outer indexing, see the methods listed under See Also. See Also -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, set_orthogonal_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __setitem__ """ fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): return self.vindex[cast(CoordinateSelection | MaskSelection, selection)] elif is_pure_orthogonal_indexing(pure_selection, self.ndim): return self.get_orthogonal_selection(pure_selection, fields=fields) else: return self.get_basic_selection(cast(BasicSelection, pure_selection), fields=fields) def __setitem__(self, selection: Selection, value: npt.ArrayLike) -> None: """Modify data for an item or region of the array. Parameters ---------- selection : tuple An integer index or slice or tuple of int/slice specifying the requested region for each dimension of the array. value : npt.ArrayLike An array-like containing the data to be stored in the selection. Examples -------- Setup a 1-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(100,), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5,), >>> dtype="i4", >>> ) Set all array elements to the same scalar value:: >>> z[...] = 42 >>> z[...] array([42, 42, 42, ..., 42, 42, 42]) Set a portion of the array:: >>> z[:10] = np.arange(10) >>> z[-10:] = np.arange(10)[::-1] >>> z[...] array([ 0, 1, 2, ..., 2, 1, 0]) Setup a 2-dimensional array:: >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set all array elements to the same scalar value:: >>> z[...] = 42 Set a portion of the array:: >>> z[0, :] = np.arange(z.shape[1]) >>> z[:, 0] = np.arange(z.shape[0]) >>> z[...] array([[ 0, 1, 2, 3, 4], [ 1, 42, 42, 42, 42], [ 2, 42, 42, 42, 42], [ 3, 42, 42, 42, 42], [ 4, 42, 42, 42, 42]]) Notes ----- Slices with step > 1 are supported, but slices with negative step are not. For arrays with a structured dtype, see Zarr format 2 for examples of how to use fields Currently the implementation for __setitem__ is provided by :func:`vindex` if the indexing is pure fancy indexing (ie a broadcast-compatible tuple of integer array indices), or by :func:`set_basic_selection` otherwise. Effectively, this means that the following indexing modes are supported: - integer indexing - slice indexing - mixed slice and integer indexing - boolean indexing - fancy indexing (vectorized list of integers) For specific indexing options including outer indexing, see the methods listed under See Also. See Also -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, set_orthogonal_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__ """ fields, pure_selection = pop_fields(selection) if is_pure_fancy_indexing(pure_selection, self.ndim): self.vindex[cast(CoordinateSelection | MaskSelection, selection)] = value elif is_pure_orthogonal_indexing(pure_selection, self.ndim): self.set_orthogonal_selection(pure_selection, value, fields=fields) else: self.set_basic_selection(cast(BasicSelection, pure_selection), value, fields=fields)
[docs] @_deprecate_positional_args def get_basic_selection( self, selection: BasicSelection = Ellipsis, *, out: NDBuffer | None = None, prototype: BufferPrototype | None = None, fields: Fields | None = None, ) -> NDArrayLike: """Retrieve data for an item or region of the array. Parameters ---------- selection : tuple A tuple specifying the requested item or region for each dimension of the array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. out : NDBuffer, optional If given, load the selected data directly into this buffer. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. Returns ------- NDArrayLike An array-like containing the data for the requested region. Examples -------- Setup a 1-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(100, dtype="uint16") >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(3,), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve a single item:: >>> z.get_basic_selection(5) 5 Retrieve a region via slicing:: >>> z.get_basic_selection(slice(5)) array([0, 1, 2, 3, 4]) >>> z.get_basic_selection(slice(-5, None)) array([95, 96, 97, 98, 99]) >>> z.get_basic_selection(slice(5, 10)) array([5, 6, 7, 8, 9]) >>> z.get_basic_selection(slice(5, 10, 2)) array([5, 7, 9]) >>> z.get_basic_selection(slice(None, None, 2)) array([ 0, 2, 4, ..., 94, 96, 98]) Setup a 3-dimensional array:: >>> data = np.arange(1000).reshape(10, 10, 10) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(5, 5, 5), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve an item:: >>> z.get_basic_selection((1, 2, 3)) 123 Retrieve a region via slicing and Ellipsis:: >>> z.get_basic_selection((slice(1, 3), slice(1, 3), 0)) array([[110, 120], [210, 220]]) >>> z.get_basic_selection(0, (slice(1, 3), slice(None))) array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]]) >>> z.get_basic_selection((..., 5)) array([[ 2 12 22 32 42 52 62 72 82 92] [102 112 122 132 142 152 162 172 182 192] ... [802 812 822 832 842 852 862 872 882 892] [902 912 922 932 942 952 962 972 982 992]] Notes ----- Slices with step > 1 are supported, but slices with negative step are not. For arrays with a structured dtype, see Zarr format 2 for examples of how to use the `fields` parameter. This method provides the implementation for accessing data via the square bracket notation (__getitem__). See :func:`__getitem__` for examples using the alternative notation. See Also -------- set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, set_orthogonal_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() return sync( self._async_array._get_selection( BasicIndexer(selection, self.shape, self.metadata.chunk_grid), out=out, fields=fields, prototype=prototype, ) )
[docs] @_deprecate_positional_args def set_basic_selection( self, selection: BasicSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify data for an item or region of the array. Parameters ---------- selection : tuple A tuple specifying the requested item or region for each dimension of the array. May be any combination of int and/or slice or ellipsis for multidimensional arrays. value : npt.ArrayLike An array-like containing values to be stored into the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. prototype : BufferPrototype, optional The prototype of the buffer used for setting the data. If not provided, the default buffer prototype is used. Examples -------- Setup a 1-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(100,), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(100,), >>> dtype="i4", >>> ) Set all array elements to the same scalar value:: >>> z.set_basic_selection(..., 42) >>> z[...] array([42, 42, 42, ..., 42, 42, 42]) Set a portion of the array:: >>> z.set_basic_selection(slice(10), np.arange(10)) >>> z.set_basic_selection(slice(-10, None), np.arange(10)[::-1]) >>> z[...] array([ 0, 1, 2, ..., 2, 1, 0]) Setup a 2-dimensional array:: >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set all array elements to the same scalar value:: >>> z.set_basic_selection(..., 42) Set a portion of the array:: >>> z.set_basic_selection((0, slice(None)), np.arange(z.shape[1])) >>> z.set_basic_selection((slice(None), 0), np.arange(z.shape[0])) >>> z[...] array([[ 0, 1, 2, 3, 4], [ 1, 42, 42, 42, 42], [ 2, 42, 42, 42, 42], [ 3, 42, 42, 42, 42], [ 4, 42, 42, 42, 42]]) Notes ----- For arrays with a structured dtype, see Zarr format 2 for examples of how to use the `fields` parameter. This method provides the underlying implementation for modifying data via square bracket notation, see :func:`__setitem__` for equivalent examples using the alternative notation. See Also -------- get_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, set_orthogonal_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() indexer = BasicIndexer(selection, self.shape, self.metadata.chunk_grid) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
[docs] @_deprecate_positional_args def get_orthogonal_selection( self, selection: OrthogonalSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLike: """Retrieve data by making a selection for each dimension of the array. For example, if an array has 2 dimensions, allows selecting specific rows and/or columns. The selection for each dimension can be either an integer (indexing a single item), a slice, an array of integers, or a Boolean array where True values indicate a selection. Parameters ---------- selection : tuple A selection for each dimension of the array. May be any combination of int, slice, integer array or Boolean array. out : NDBuffer, optional If given, load the selected data directly into this buffer. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. Returns ------- NDArrayLike An array-like containing the data for the requested selection. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(100).reshape(10, 10) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=data.shape, >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve rows and columns via any combination of int, slice, integer array and/or Boolean array:: >>> z.get_orthogonal_selection(([1, 4], slice(None))) array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) >>> z.get_orthogonal_selection((slice(None), [1, 4])) array([[ 1, 4], [11, 14], [21, 24], [31, 34], [41, 44], [51, 54], [61, 64], [71, 74], [81, 84], [91, 94]]) >>> z.get_orthogonal_selection(([1, 4], [1, 4])) array([[11, 14], [41, 44]]) >>> sel = np.zeros(z.shape[0], dtype=bool) >>> sel[1] = True >>> sel[4] = True >>> z.get_orthogonal_selection((sel, sel)) array([[11, 14], [41, 44]]) For convenience, the orthogonal selection functionality is also available via the `oindex` property, e.g.:: >>> z.oindex[[1, 4], :] array([[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]]) >>> z.oindex[:, [1, 4]] array([[ 1, 4], [11, 14], [21, 24], [31, 34], [41, 44], [51, 54], [61, 64], [71, 74], [81, 84], [91, 94]]) >>> z.oindex[[1, 4], [1, 4]] array([[11, 14], [41, 44]]) >>> sel = np.zeros(z.shape[0], dtype=bool) >>> sel[1] = True >>> sel[4] = True >>> z.oindex[sel, sel] array([[11, 14], [41, 44]]) Notes ----- Orthogonal indexing is also known as outer indexing. Slices with step > 1 are supported, but slices with negative step are not. See Also -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, set_orthogonal_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self._async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) )
[docs] @_deprecate_positional_args def set_orthogonal_selection( self, selection: OrthogonalSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify data via a selection for each dimension of the array. Parameters ---------- selection : tuple A selection for each dimension of the array. May be any combination of int, slice, integer array or Boolean array. value : npt.ArrayLike An array-like array containing the data to be stored in the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. prototype : BufferPrototype, optional The prototype of the buffer used for setting the data. If not provided, the default buffer prototype is used. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set data for a selection of rows:: >>> z.set_orthogonal_selection(([1, 4], slice(None)), 1) >>> z[...] array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 1, 1, 1, 1]]) Set data for a selection of columns:: >>> z.set_orthogonal_selection((slice(None), [1, 4]), 2) >>> z[...] array([[0, 2, 0, 0, 2], [1, 2, 1, 1, 2], [0, 2, 0, 0, 2], [0, 2, 0, 0, 2], [1, 2, 1, 1, 2]]) Set data for a selection of rows and columns:: >>> z.set_orthogonal_selection(([1, 4], [1, 4]), 3) >>> z[...] array([[0, 2, 0, 0, 2], [1, 3, 1, 1, 3], [0, 2, 0, 0, 2], [0, 2, 0, 0, 2], [1, 3, 1, 1, 3]]) Set data from a 2D array:: >>> values = np.arange(10).reshape(2, 5) >>> z.set_orthogonal_selection(([0, 3], ...), values) >>> z[...] array([[0, 1, 2, 3, 4], [1, 3, 1, 1, 3], [0, 2, 0, 0, 2], [5, 6, 7, 8, 9], [1, 3, 1, 1, 3]]) For convenience, this functionality is also available via the `oindex` property. E.g.:: >>> z.oindex[[1, 4], [1, 4]] = 4 >>> z[...] array([[0, 1, 2, 3, 4], [1, 4, 1, 1, 4], [0, 2, 0, 0, 2], [5, 6, 7, 8, 9], [1, 4, 1, 1, 4]]) Notes ----- Orthogonal indexing is also known as outer indexing. Slices with step > 1 are supported, but slices with negative step are not. See Also -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_coordinate_selection, set_coordinate_selection, get_orthogonal_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() indexer = OrthogonalIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype) )
[docs] @_deprecate_positional_args def get_mask_selection( self, mask: MaskSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLike: """Retrieve a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True values indicate a selected item. Parameters ---------- mask : ndarray, bool A Boolean array of the same shape as the array against which the selection is being made. out : NDBuffer, optional If given, load the selected data directly into this buffer. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. Returns ------- NDArrayLike An array-like containing the data for the requested selection. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(100).reshape(10, 10) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=data.shape, >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve items by specifying a mask:: >>> sel = np.zeros_like(z, dtype=bool) >>> sel[1, 1] = True >>> sel[4, 4] = True >>> z.get_mask_selection(sel) array([11, 44]) For convenience, the mask selection functionality is also available via the `vindex` property, e.g.:: >>> z.vindex[sel] array([11, 44]) Notes ----- Mask indexing is a form of vectorized or inner indexing, and is equivalent to coordinate indexing. Internally the mask array is converted to coordinate arrays by calling `np.nonzero`. See Also -------- get_basic_selection, set_basic_selection, set_mask_selection, get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, set_coordinate_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) return sync( self._async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) )
[docs] @_deprecate_positional_args def set_mask_selection( self, mask: MaskSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify a selection of individual items, by providing a Boolean array of the same shape as the array against which the selection is being made, where True values indicate a selected item. Parameters ---------- mask : ndarray, bool A Boolean array of the same shape as the array against which the selection is being made. value : npt.ArrayLike An array-like containing values to be stored into the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set data for a selection of items:: >>> sel = np.zeros_like(z, dtype=bool) >>> sel[1, 1] = True >>> sel[4, 4] = True >>> z.set_mask_selection(sel, 1) >>> z[...] array([[0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 1]]) For convenience, this functionality is also available via the `vindex` property. E.g.:: >>> z.vindex[sel] = 2 >>> z[...] array([[0, 0, 0, 0, 0], [0, 2, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 2]]) Notes ----- Mask indexing is a form of vectorized or inner indexing, and is equivalent to coordinate indexing. Internally the mask array is converted to coordinate arrays by calling `np.nonzero`. See Also -------- get_basic_selection, set_basic_selection, get_mask_selection, get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, set_coordinate_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() indexer = MaskIndexer(mask, self.shape, self.metadata.chunk_grid) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
[docs] @_deprecate_positional_args def get_coordinate_selection( self, selection: CoordinateSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLike: """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. Parameters ---------- selection : tuple An integer (coordinate) array for each dimension of the array. out : NDBuffer, optional If given, load the selected data directly into this buffer. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. Returns ------- NDArrayLike An array-like containing the data for the requested coordinate selection. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(3, 3), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve items by specifying their coordinates:: >>> z.get_coordinate_selection(([1, 4], [1, 4])) array([11, 44]) For convenience, the coordinate selection functionality is also available via the `vindex` property, e.g.:: >>> z.vindex[[1, 4], [1, 4]] array([11, 44]) Notes ----- Coordinate indexing is also known as point selection, and is a form of vectorized or inner indexing. Slices are not supported. Coordinate arrays must be provided for all dimensions of the array. Coordinate arrays may be multidimensional, in which case the output array will also be multidimensional. Coordinate arrays are broadcast against each other before being applied. The shape of the output will be the same as the shape of each coordinate array after broadcasting. See Also -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_orthogonal_selection, set_orthogonal_selection, set_coordinate_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) out_array = sync( self._async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) ) if hasattr(out_array, "shape"): # restore shape out_array = np.array(out_array).reshape(indexer.sel_shape) return out_array
[docs] @_deprecate_positional_args def set_coordinate_selection( self, selection: CoordinateSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify a selection of individual items, by providing the indices (coordinates) for each item to be modified. Parameters ---------- selection : tuple An integer (coordinate) array for each dimension of the array. value : npt.ArrayLike An array-like containing values to be stored into the array. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(5, 5), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(5, 5), >>> dtype="i4", >>> ) Set data for a selection of items:: >>> z.set_coordinate_selection(([1, 4], [1, 4]), 1) >>> z[...] array([[0, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 1]]) For convenience, this functionality is also available via the `vindex` property. E.g.:: >>> z.vindex[[1, 4], [1, 4]] = 2 >>> z[...] array([[0, 0, 0, 0, 0], [0, 2, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 2]]) Notes ----- Coordinate indexing is also known as point selection, and is a form of vectorized or inner indexing. Slices are not supported. Coordinate arrays must be provided for all dimensions of the array. See Also -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() # setup indexer indexer = CoordinateIndexer(selection, self.shape, self.metadata.chunk_grid) # handle value - need ndarray-like flatten value if not is_scalar(value, self.dtype): try: from numcodecs.compat import ensure_ndarray_like value = ensure_ndarray_like(value) # TODO replace with agnostic except TypeError: # Handle types like `list` or `tuple` value = np.array(value) # TODO replace with agnostic if hasattr(value, "shape") and len(value.shape) > 1: value = np.array(value).reshape(-1) if not is_scalar(value, self.dtype) and ( isinstance(value, NDArrayLike) and indexer.shape != value.shape ): raise ValueError( f"Attempting to set a selection of {indexer.sel_shape[0]} " f"elements with an array of {value.shape[0]} elements." ) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
[docs] @_deprecate_positional_args def get_block_selection( self, selection: BasicSelection, *, out: NDBuffer | None = None, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> NDArrayLike: """Retrieve a selection of individual items, by providing the indices (coordinates) for each selected item. Parameters ---------- selection : int or slice or tuple of int or slice An integer (coordinate) or slice for each dimension of the array. out : NDBuffer, optional If given, load the selected data directly into this buffer. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to extract data for. prototype : BufferPrototype, optional The prototype of the buffer to use for the output data. If not provided, the default buffer prototype is used. Returns ------- NDArrayLike An array-like containing the data for the requested block selection. Examples -------- Setup a 2-dimensional array:: >>> import zarr >>> import numpy as np >>> data = np.arange(0, 100, dtype="uint16").reshape((10, 10)) >>> z = zarr.create_array( >>> StorePath(MemoryStore(mode="w")), >>> shape=data.shape, >>> chunks=(3, 3), >>> dtype=data.dtype, >>> ) >>> z[:] = data Retrieve items by specifying their block coordinates:: >>> z.get_block_selection((1, slice(None))) array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) Which is equivalent to:: >>> z[3:6, :] array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) For convenience, the block selection functionality is also available via the `blocks` property, e.g.:: >>> z.blocks[1] array([[30, 31, 32, 33, 34, 35, 36, 37, 38, 39], [40, 41, 42, 43, 44, 45, 46, 47, 48, 49], [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]]) Notes ----- Block indexing is a convenience indexing method to work on individual chunks with chunk index slicing. It has the same concept as Dask's `Array.blocks` indexing. Slices are supported. However, only with a step size of one. Block index arrays may be multidimensional to index multidimensional arrays. For example:: >>> z.blocks[0, 1:3] array([[ 3, 4, 5, 6, 7, 8], [13, 14, 15, 16, 17, 18], [23, 24, 25, 26, 27, 28]]) See Also -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, set_coordinate_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) return sync( self._async_array._get_selection( indexer=indexer, out=out, fields=fields, prototype=prototype ) )
[docs] @_deprecate_positional_args def set_block_selection( self, selection: BasicSelection, value: npt.ArrayLike, *, fields: Fields | None = None, prototype: BufferPrototype | None = None, ) -> None: """Modify a selection of individual blocks, by providing the chunk indices (coordinates) for each block to be modified. Parameters ---------- selection : tuple An integer (coordinate) or slice for each dimension of the array. value : npt.ArrayLike An array-like containing the data to be stored in the block selection. fields : str or sequence of str, optional For arrays with a structured dtype, one or more fields can be specified to set data for. prototype : BufferPrototype, optional The prototype of the buffer used for setting the data. If not provided, the default buffer prototype is used. Examples -------- Set up a 2-dimensional array:: >>> import zarr >>> z = zarr.zeros( >>> shape=(6, 6), >>> store=StorePath(MemoryStore(mode="w")), >>> chunk_shape=(2, 2), >>> dtype="i4", >>> ) Set data for a selection of items:: >>> z.set_block_selection((1, 0), 1) >>> z[...] array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]) For convenience, this functionality is also available via the `blocks` property. E.g.:: >>> z.blocks[2, 1] = 4 >>> z[...] array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [0, 0, 4, 4, 0, 0], [0, 0, 4, 4, 0, 0]]) >>> z.blocks[:, 2] = 7 >>> z[...] array([[0, 0, 0, 0, 7, 7], [0, 0, 0, 0, 7, 7], [1, 1, 0, 0, 7, 7], [1, 1, 0, 0, 7, 7], [0, 0, 4, 4, 7, 7], [0, 0, 4, 4, 7, 7]]) Notes ----- Block indexing is a convenience indexing method to work on individual chunks with chunk index slicing. It has the same concept as Dask's `Array.blocks` indexing. Slices are supported. However, only with a step size of one. See Also -------- get_basic_selection, set_basic_selection, get_mask_selection, set_mask_selection, get_orthogonal_selection, set_orthogonal_selection, get_coordinate_selection, get_block_selection, set_block_selection, vindex, oindex, blocks, __getitem__, __setitem__ """ if prototype is None: prototype = default_buffer_prototype() indexer = BlockIndexer(selection, self.shape, self.metadata.chunk_grid) sync(self._async_array._set_selection(indexer, value, fields=fields, prototype=prototype))
@property def vindex(self) -> VIndex: """Shortcut for vectorized (inner) indexing, see :func:`get_coordinate_selection`, :func:`set_coordinate_selection`, :func:`get_mask_selection` and :func:`set_mask_selection` for documentation and examples.""" return VIndex(self) @property def oindex(self) -> OIndex: """Shortcut for orthogonal (outer) indexing, see :func:`get_orthogonal_selection` and :func:`set_orthogonal_selection` for documentation and examples.""" return OIndex(self) @property def blocks(self) -> BlockIndex: """Shortcut for blocked chunked indexing, see :func:`get_block_selection` and :func:`set_block_selection` for documentation and examples.""" return BlockIndex(self)
[docs] def resize(self, new_shape: ShapeLike) -> None: """ Change the shape of the array by growing or shrinking one or more dimensions. Parameters ---------- new_shape : tuple New shape of the array. Notes ----- If one or more dimensions are shrunk, any chunks falling outside the new array shape will be deleted from the underlying store. However, it is noteworthy that the chunks partially falling inside the new array (i.e. boundary chunks) will remain intact, and therefore, the data falling outside the new array but inside the boundary chunks would be restored by a subsequent resize operation that grows the array size. Examples -------- >>> import zarr >>> z = zarr.zeros(shape=(10000, 10000), >>> chunk_shape=(1000, 1000), >>> dtype="i4",) >>> z.shape (10000, 10000) >>> z = z.resize(20000, 1000) >>> z.shape (20000, 1000) >>> z2 = z.resize(50, 50) >>> z.shape (20000, 1000) >>> z2.shape (50, 50) """ sync(self._async_array.resize(new_shape))
[docs] def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords: """Append `data` to `axis`. Parameters ---------- data : array-like Data to be appended. axis : int Axis along which to append. Returns ------- new_shape : tuple Notes ----- The size of all dimensions other than `axis` must match between this array and `data`. Examples -------- >>> import numpy as np >>> import zarr >>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000) >>> z = zarr.array(a, chunks=(1000, 100)) >>> z.shape (10000, 1000) >>> z.append(a) (20000, 1000) >>> z.append(np.vstack([a, a]), axis=1) (20000, 2000) >>> z.shape (20000, 2000) """ return sync(self._async_array.append(data, axis=axis))
[docs] def update_attributes(self, new_attributes: dict[str, JSON]) -> Array: """ Update the array's attributes. Parameters ---------- new_attributes : dict A dictionary of new attributes to update or add to the array. The keys represent attribute names, and the values must be JSON-compatible. Returns ------- Array The array with the updated attributes. Raises ------ ValueError If the attributes are invalid or incompatible with the array's metadata. Notes ----- - The updated attributes will be merged with existing attributes, and any conflicts will be overwritten by the new values. """ # TODO: remove this cast when type inference improves new_array = sync(self._async_array.update_attributes(new_attributes)) # TODO: remove this cast when type inference improves _new_array = cast(AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], new_array) return type(self)(_new_array)
def __repr__(self) -> str: return f"<Array {self.store_path} shape={self.shape} dtype={self.dtype}>" @property def info(self) -> Any: """ Return the statically known information for an array. Returns ------- ArrayInfo See Also -------- Array.info_complete All information about a group, including dynamic information like the number of bytes and chunks written. Examples -------- >>> arr = zarr.create(shape=(10,), chunks=(2,), dtype="float32") >>> arr.info Type : Array Zarr format : 3 Data type : DataType.float32 Shape : (10,) Chunk shape : (2,) Order : C Read-only : False Store type : MemoryStore Codecs : [BytesCodec(endian=<Endian.little: 'little'>)] No. bytes : 40 """ return self._async_array.info
[docs] def info_complete(self) -> Any: """ Returns all the information about an array, including information from the Store. In addition to the statically known information like ``name`` and ``zarr_format``, this includes additional information like the size of the array in bytes and the number of chunks written. Note that this method will need to read metadata from the store. Returns ------- ArrayInfo See Also -------- Array.info The statically known subset of metadata about an array. """ return sync(self._async_array.info_complete())
async def chunks_initialized( array: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], ) -> tuple[str, ...]: """ Return the keys of the chunks that have been persisted to the storage backend. Parameters ---------- array : AsyncArray The array to inspect. Returns ------- chunks_initialized : tuple[str, ...] The keys of the chunks that have been initialized. See Also -------- nchunks_initialized """ store_contents = [ x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path) ] return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents) def _build_parents( node: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup, ) -> list[AsyncGroup]: from zarr.core.group import AsyncGroup, GroupMetadata store = node.store_path.store path = node.store_path.path if not path: return [] required_parts = path.split("/")[:-1] parents = [ # the root group AsyncGroup( metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), store_path=StorePath(store=store, path=""), ) ] for i, part in enumerate(required_parts): p = "/".join(required_parts[:i] + [part]) parents.append( AsyncGroup( metadata=GroupMetadata(zarr_format=node.metadata.zarr_format), store_path=StorePath(store=store, path=p), ) ) return parents def _get_default_codecs( np_dtype: np.dtype[Any], ) -> tuple[Codec, ...]: filters, serializer, compressors = _get_default_chunk_encoding_v3(np_dtype) return filters + (serializer,) + compressors FiltersLike: TypeAlias = ( Iterable[dict[str, JSON] | ArrayArrayCodec | numcodecs.abc.Codec] | ArrayArrayCodec | Iterable[numcodecs.abc.Codec] | numcodecs.abc.Codec | Literal["auto"] | None ) CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None CompressorsLike: TypeAlias = ( Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec] | dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None ) SerializerLike: TypeAlias = dict[str, JSON] | ArrayBytesCodec | Literal["auto"] class ShardsConfigParam(TypedDict): shape: ChunkCoords index_location: ShardingCodecIndexLocation | None ShardsLike: TypeAlias = ChunkCoords | ShardsConfigParam | Literal["auto"]
[docs] async def create_array( store: str | StoreLike, *, name: str | None = None, shape: ShapeLike, dtype: npt.DTypeLike, chunks: ChunkCoords | Literal["auto"] = "auto", shards: ShardsLike | None = None, filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", fill_value: Any | None = None, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None, dimension_names: Iterable[str] | None = None, storage_options: dict[str, Any] | None = None, overwrite: bool = False, config: ArrayConfig | ArrayConfigLike | None = None, ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]: """Create an array. Parameters ---------- store : str or Store Store or path to directory in file system or name of zip file. name : str or None, optional The name of the array within the store. If ``name`` is ``None``, the array will be located at the root of the store. shape : ChunkCoords Shape of the array. dtype : npt.DTypeLike Data type of the array. chunks : ChunkCoords, optional Chunk shape of the array. If not specified, default are guessed based on the shape and dtype. shards : ChunkCoords, optional Shard shape of the array. The default value of ``None`` results in no sharding at all. filters : Iterable[Codec], optional Iterable of filters to apply to each chunk of the array, in order, before serializing that chunk to bytes. For Zarr format 3, a "filter" is a codec that takes an array and returns an array, and these values must be instances of ``ArrayArrayCodec``, or dict representations of ``ArrayArrayCodec``. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v3_default_filters`` in :mod:`zarr.core.config`. Use ``None`` to omit default filters. For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the the order if your filters is consistent with the behavior of each filter. If no ``filters`` are provided, a default set of filters will be used. These defaults can be changed by modifying the value of ``array.v2_default_filters`` in :mod:`zarr.core.config`. Use ``None`` to omit default filters. compressors : Iterable[Codec], optional List of compressors to apply to the array. Compressors are applied in order, and after any filters are applied (if any are specified) and the data is serialized into bytes. For Zarr format 3, a "compressor" is a codec that takes a bytestream, and returns another bytestream. Multiple compressors my be provided for Zarr format 3. If no ``compressors`` are provided, a default set of compressors will be used. These defaults can be changed by modifying the value of ``array.v3_default_compressors`` in :mod:`zarr.core.config`. Use ``None`` to omit default compressors. For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may be provided for Zarr format 2. If no ``compressor`` is provided, a default compressor will be used. in :mod:`zarr.core.config`. Use ``None`` to omit the default compressor. serializer : dict[str, JSON] | ArrayBytesCodec, optional Array-to-bytes codec to use for encoding the array data. Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion. If no ``serializer`` is provided, a default serializer will be used. These defaults can be changed by modifying the value of ``array.v3_default_serializer`` in :mod:`zarr.core.config`. fill_value : Any, optional Fill value for the array. order : {"C", "F"}, optional The memory of the array (default is "C"). For Zarr format 2, this parameter sets the memory order of the array. For Zarr format 3, this parameter is deprecated, because memory order is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``. If no ``order`` is provided, a default order will be used. This default can be changed by modifying the value of ``array.order`` in :mod:`zarr.core.config`. zarr_format : {2, 3}, optional The zarr format to use when saving. attributes : dict, optional Attributes for the array. chunk_key_encoding : ChunkKeyEncoding, optional A specification of how the chunk keys are represented in storage. For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``. For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``. dimension_names : Iterable[str], optional The names of the dimensions (default is None). Zarr format 3 only. Zarr format 2 arrays should not use this parameter. storage_options : dict, optional If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. overwrite : bool, default False Whether to overwrite an array with the same name in the store, if one exists. config : ArrayConfig or ArrayConfigLike, optional Runtime configuration for the array. Returns ------- AsyncArray The array. Examples -------- >>> import zarr >>> store = zarr.storage.MemoryStore(mode='w') >>> async_arr = await zarr.api.asynchronous.create_array( >>> store=store, >>> shape=(100,100), >>> chunks=(10,10), >>> dtype='i4', >>> fill_value=0) <AsyncArray memory://140349042942400 shape=(100, 100) dtype=int32> """ if zarr_format is None: zarr_format = _default_zarr_format() from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation mode: Literal["a"] = "a" dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format) config_parsed = parse_array_config(config) shape_parsed = parse_shapelike(shape) chunk_key_encoding_parsed = _parse_chunk_key_encoding( chunk_key_encoding, zarr_format=zarr_format ) store_path = await make_store_path(store, path=name, mode=mode, storage_options=storage_options) shard_shape_parsed, chunk_shape_parsed = _auto_partition( array_shape=shape_parsed, shard_shape=shards, chunk_shape=chunks, dtype=dtype_parsed ) chunks_out: tuple[int, ...] result: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata] if zarr_format == 2: if shard_shape_parsed is not None: msg = ( "Zarr format 2 arrays can only be created with `shard_shape` set to `None`. " f"Got `shard_shape={shards}` instead." ) raise ValueError(msg) if serializer != "auto": raise ValueError("Zarr format 2 arrays do not support `serializer`.") filters_parsed, compressor_parsed = _parse_chunk_encoding_v2( compressor=compressors, filters=filters, dtype=np.dtype(dtype) ) if dimension_names is not None: raise ValueError("Zarr format 2 arrays do not support dimension names.") if order is None: order_parsed = zarr_config.get("array.order") else: order_parsed = order result = await AsyncArray._create_v2( store_path=store_path, shape=shape_parsed, dtype=dtype_parsed, chunks=chunk_shape_parsed, dimension_separator=chunk_key_encoding_parsed.separator, fill_value=fill_value, order=order_parsed, filters=filters_parsed, compressor=compressor_parsed, attributes=attributes, overwrite=overwrite, config=config_parsed, ) else: array_array, array_bytes, bytes_bytes = _parse_chunk_encoding_v3( compressors=compressors, filters=filters, serializer=serializer, dtype=dtype_parsed, ) sub_codecs = cast(tuple[Codec, ...], (*array_array, array_bytes, *bytes_bytes)) codecs_out: tuple[Codec, ...] if shard_shape_parsed is not None: index_location = None if isinstance(shards, dict): index_location = ShardingCodecIndexLocation(shards.get("index_location", None)) if index_location is None: index_location = ShardingCodecIndexLocation.end sharding_codec = ShardingCodec( chunk_shape=chunk_shape_parsed, codecs=sub_codecs, index_location=index_location ) sharding_codec.validate( shape=chunk_shape_parsed, dtype=dtype_parsed, chunk_grid=RegularChunkGrid(chunk_shape=shard_shape_parsed), ) codecs_out = (sharding_codec,) chunks_out = shard_shape_parsed else: chunks_out = chunk_shape_parsed codecs_out = sub_codecs result = await AsyncArray._create_v3( store_path=store_path, shape=shape_parsed, dtype=dtype_parsed, fill_value=fill_value, attributes=attributes, chunk_shape=chunks_out, chunk_key_encoding=chunk_key_encoding_parsed, codecs=codecs_out, dimension_names=dimension_names, overwrite=overwrite, config=config_parsed, ) return result
def _parse_chunk_key_encoding( data: ChunkKeyEncoding | ChunkKeyEncodingLike | None, zarr_format: ZarrFormat ) -> ChunkKeyEncoding: """ Take an implicit specification of a chunk key encoding and parse it into a ChunkKeyEncoding object. """ if data is None: if zarr_format == 2: result = ChunkKeyEncoding.from_dict({"name": "v2", "separator": "."}) else: result = ChunkKeyEncoding.from_dict({"name": "default", "separator": "/"}) elif isinstance(data, ChunkKeyEncoding): result = data else: result = ChunkKeyEncoding.from_dict(data) if zarr_format == 2 and result.name != "v2": msg = ( "Invalid chunk key encoding. For Zarr format 2 arrays, the `name` field of the " f"chunk key encoding must be 'v2'. Got `name` = {result.name} instead." ) raise ValueError(msg) return result def _get_default_chunk_encoding_v3( np_dtype: np.dtype[Any], ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]: """ Get the default ArrayArrayCodecs, ArrayBytesCodec, and BytesBytesCodec for a given dtype. """ dtype = DataType.from_numpy(np_dtype) if dtype == DataType.string: dtype_key = "string" elif dtype == DataType.bytes: dtype_key = "bytes" else: dtype_key = "numeric" default_filters = zarr_config.get("array.v3_default_filters").get(dtype_key) default_serializer = zarr_config.get("array.v3_default_serializer").get(dtype_key) default_compressors = zarr_config.get("array.v3_default_compressors").get(dtype_key) filters = tuple(_parse_array_array_codec(codec_dict) for codec_dict in default_filters) serializer = _parse_array_bytes_codec(default_serializer) compressors = tuple(_parse_bytes_bytes_codec(codec_dict) for codec_dict in default_compressors) return filters, serializer, compressors def _get_default_chunk_encoding_v2( np_dtype: np.dtype[Any], ) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]: """ Get the default chunk encoding for Zarr format 2 arrays, given a dtype """ compressor_dict = _default_compressor(np_dtype) filter_dicts = _default_filters(np_dtype) compressor = None if compressor_dict is not None: compressor = numcodecs.get_codec(compressor_dict) filters = None if filter_dicts is not None: filters = tuple(numcodecs.get_codec(f) for f in filter_dicts) return filters, compressor def _parse_chunk_encoding_v2( *, compressor: CompressorsLike, filters: FiltersLike, dtype: np.dtype[Any], ) -> tuple[tuple[numcodecs.abc.Codec, ...] | None, numcodecs.abc.Codec | None]: """ Generate chunk encoding classes for Zarr format 2 arrays with optional defaults. """ default_filters, default_compressor = _get_default_chunk_encoding_v2(dtype) _filters: tuple[numcodecs.abc.Codec, ...] | None _compressor: numcodecs.abc.Codec | None if compressor is None or compressor == (): _compressor = None elif compressor == "auto": _compressor = default_compressor elif isinstance(compressor, tuple | list) and len(compressor) == 1: _compressor = parse_compressor(compressor[0]) else: if isinstance(compressor, Iterable) and not isinstance(compressor, dict): msg = f"For Zarr format 2 arrays, the `compressor` must be a single codec. Got an iterable with type {type(compressor)} instead." raise TypeError(msg) _compressor = parse_compressor(compressor) if filters is None: _filters = None elif filters == "auto": _filters = default_filters else: if isinstance(filters, Iterable): for idx, f in enumerate(filters): if not isinstance(f, numcodecs.abc.Codec): msg = ( "For Zarr format 2 arrays, all elements of `filters` must be numcodecs codecs. " f"Element at index {idx} has type {type(f)}, which is not a numcodecs codec." ) raise TypeError(msg) _filters = parse_filters(filters) return _filters, _compressor def _parse_chunk_encoding_v3( *, compressors: CompressorsLike, filters: FiltersLike, serializer: SerializerLike, dtype: np.dtype[Any], ) -> tuple[tuple[ArrayArrayCodec, ...], ArrayBytesCodec, tuple[BytesBytesCodec, ...]]: """ Generate chunk encoding classes for v3 arrays with optional defaults. """ default_array_array, default_array_bytes, default_bytes_bytes = _get_default_chunk_encoding_v3( dtype ) if filters is None: out_array_array: tuple[ArrayArrayCodec, ...] = () elif filters == "auto": out_array_array = default_array_array else: maybe_array_array: Iterable[Codec | dict[str, JSON]] if isinstance(filters, dict | Codec): maybe_array_array = (filters,) else: maybe_array_array = cast(Iterable[Codec | dict[str, JSON]], filters) out_array_array = tuple(_parse_array_array_codec(c) for c in maybe_array_array) if serializer == "auto": out_array_bytes = default_array_bytes else: out_array_bytes = _parse_array_bytes_codec(serializer) if compressors is None: out_bytes_bytes: tuple[BytesBytesCodec, ...] = () elif compressors == "auto": out_bytes_bytes = default_bytes_bytes else: maybe_bytes_bytes: Iterable[Codec | dict[str, JSON]] if isinstance(compressors, dict | Codec): maybe_bytes_bytes = (compressors,) else: maybe_bytes_bytes = cast(Iterable[Codec | dict[str, JSON]], compressors) out_bytes_bytes = tuple(_parse_bytes_bytes_codec(c) for c in maybe_bytes_bytes) return out_array_array, out_array_bytes, out_bytes_bytes def _parse_deprecated_compressor( compressor: CompressorLike | None, compressors: CompressorsLike, zarr_format: int = 3 ) -> CompressorsLike | None: if compressor != "auto": if compressors != "auto": raise ValueError("Cannot specify both `compressor` and `compressors`.") if zarr_format == 3: warn( "The `compressor` argument is deprecated. Use `compressors` instead.", category=UserWarning, stacklevel=2, ) if compressor is None: # "no compression" compressors = () else: compressors = (compressor,) elif zarr_format == 2 and compressor == compressors == "auto": compressors = ({"id": "blosc"},) return compressors