`zarr.testing.strategies`

Strategies¶

zarr.testing.strategies ¶

TrueOrFalse `module-attribute` ¶

TrueOrFalse = Literal[True, False]

array_names `module-attribute` ¶

array_names = node_names

array_shapes `module-attribute` ¶

array_shapes = npst.array_shapes(
    max_dims=4, min_side=3, max_side=5
) | npst.array_shapes(max_dims=4, min_side=0)

attrs `module-attribute` ¶

attrs: SearchStrategy[Mapping[str, JSON] | None] = (
    st.none() | st.dictionaries(_attr_keys, _attr_values)
)

compressors `module-attribute` ¶

compressors = st.sampled_from([None, 'default'])

node_names `module-attribute` ¶

node_names = (
    st.text(zarr_key_chars, min_size=1)
    .filter(
        lambda t: (
            t not in (".", "..")
            and not t.startswith("__")
        )
    )
    .filter(lambda name: name.lower() != "zarr.json")
)

sharding_inner_codecs `module-attribute` ¶

sharding_inner_codecs: SearchStrategy[
    list[BytesCodec | ZstdCodec]
] = st.sampled_from(
    [[BytesCodec()], [BytesCodec(), ZstdCodec()]]
)

short_node_names `module-attribute` ¶

short_node_names = (
    st.text(zarr_key_chars, max_size=3, min_size=1)
    .filter(
        lambda t: (
            t not in (".", "..")
            and not t.startswith("__")
        )
    )
    .filter(lambda name: name.lower() != "zarr.json")
)

stores `module-attribute` ¶

stores = st.builds(MemoryStore, st.just({})).map(
    clear_store
)

subchunk_write_orders `module-attribute` ¶

subchunk_write_orders: SearchStrategy[
    SubchunkWriteOrder
] = st.sampled_from(SUBCHUNK_WRITE_ORDER)

zarr_formats `module-attribute` ¶

zarr_formats: SearchStrategy[ZarrFormat] = st.sampled_from(
    [3, 2]
)

zarr_key_chars `module-attribute` ¶

zarr_key_chars = st.sampled_from(
    ".-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
)

array_metadata ¶

array_metadata(
    draw: DrawFn,
    *,
    array_shapes: Callable[
        ..., SearchStrategy[tuple[int, ...]]
    ] = array_shapes,
    zarr_formats: SearchStrategy[ZarrFormat] = zarr_formats,
    attributes: SearchStrategy[
        Mapping[str, JSON] | None
    ] = attrs,
) -> ArrayV2Metadata | ArrayV3Metadata

Source code in zarr/testing/strategies.py

@st.composite
def array_metadata(
    draw: st.DrawFn,
    *,
    array_shapes: Callable[..., st.SearchStrategy[tuple[int, ...]]] = npst.array_shapes,
    zarr_formats: st.SearchStrategy[ZarrFormat] = zarr_formats,
    attributes: SearchStrategy[Mapping[str, JSON] | None] = attrs,
) -> ArrayV2Metadata | ArrayV3Metadata:
    zarr_format = draw(zarr_formats)
    # separator = draw(st.sampled_from(['/', '\\']))
    shape = draw(array_shapes())
    ndim = len(shape)
    np_dtype = draw(dtypes())
    dtype = get_data_type_from_native_dtype(np_dtype)
    fill_value = draw(npst.from_dtype(np_dtype))
    if zarr_format == 2:
        chunk_shape = draw(array_shapes(min_dims=ndim, max_dims=ndim, min_side=1))
        return ArrayV2Metadata(
            shape=shape,
            chunks=chunk_shape,
            dtype=dtype,
            fill_value=fill_value,
            order=draw(st.sampled_from(["C", "F"])),
            attributes=draw(attributes),  # type: ignore[arg-type]
            dimension_separator=draw(st.sampled_from([".", "/"])),
            filters=None,
            compressor=None,
        )
    else:
        chunk_grid = draw(chunk_grids(shape=shape))
        return ArrayV3Metadata(
            shape=shape,
            data_type=dtype,
            chunk_grid=chunk_grid,
            fill_value=fill_value,
            attributes=draw(attributes),  # type: ignore[arg-type]
            dimension_names=draw(dimension_names(ndim=ndim)),
            chunk_key_encoding=DefaultChunkKeyEncoding(separator="/"),  # FIXME
            codecs=[BytesCodec()],
            storage_transformers=(),
        )

arrays ¶

arrays(
    draw: DrawFn,
    *,
    shapes: SearchStrategy[tuple[int, ...]] = array_shapes,
    compressors: SearchStrategy = compressors,
    stores: SearchStrategy[StoreLike] = stores,
    paths: SearchStrategy[str] = paths(),
    array_names: SearchStrategy = array_names,
    arrays: SearchStrategy | None = None,
    attrs: SearchStrategy = attrs,
    zarr_formats: SearchStrategy = zarr_formats,
    subchunk_write_orders: SearchStrategy[
        SubchunkWriteOrder
    ] = subchunk_write_orders,
    open_mode: AccessModeLiteral = "w",
) -> AnyArray

Source code in zarr/testing/strategies.py

@st.composite
def arrays(
    draw: st.DrawFn,
    *,
    shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes,
    compressors: st.SearchStrategy = compressors,
    stores: st.SearchStrategy[StoreLike] = stores,
    paths: st.SearchStrategy[str] = paths(),  # noqa: B008
    array_names: st.SearchStrategy = array_names,
    arrays: st.SearchStrategy | None = None,
    attrs: st.SearchStrategy = attrs,
    zarr_formats: st.SearchStrategy = zarr_formats,
    subchunk_write_orders: SearchStrategy[SubchunkWriteOrder] = subchunk_write_orders,
    open_mode: AccessModeLiteral = "w",
) -> AnyArray:
    store = draw(stores, label="store")
    path = draw(paths, label="array parent")
    name = draw(array_names, label="array name")
    attributes = draw(attrs, label="attributes")
    zarr_format = draw(zarr_formats, label="zarr format")
    if arrays is None:
        arrays = numpy_arrays(shapes=shapes)
    nparray = draw(arrays, label="array data")
    dim_names: None | list[str | None] = None
    serializer: SerializerLike = "auto"
    compressors_unsearched: CompressorsLike = "auto"

    # For v3 arrays, optionally use RectilinearChunkGridMetadata
    chunk_grid_meta: RegularChunkGridMetadata | RectilinearChunkGridMetadata | None = None

    # test that None works too.
    fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)]))
    # compressor = draw(compressors)

    expected_attrs = {} if attributes is None else attributes

    array_path = _join_paths([path, name])
    root = zarr.open_group(store, mode=open_mode, zarr_format=zarr_format)

    # Convert chunk grid metadata to a form create_array accepts:
    # - RegularChunkGridMetadata -> flat tuple of ints
    # - RectilinearChunkGridMetadata -> nested list of ints (triggers rectilinear path)
    # - v2 -> flat tuple of ints
    chunks_param: tuple[int, ...] | list[list[int]]
    shard_shape = None
    dim_names = None
    if zarr_format == 3:
        chunk_grid_meta = draw(st.none() | chunk_grids(shape=nparray.shape), label="chunk grid")
        dim_names = draw(dimension_names(ndim=nparray.ndim), label="dimension names")
        if isinstance(chunk_grid_meta, RectilinearChunkGridMetadata):
            chunks_param = [
                list(dim) if isinstance(dim, tuple) else [dim]
                for dim in chunk_grid_meta.chunk_shapes
            ]
        elif isinstance(chunk_grid_meta, RegularChunkGridMetadata):
            chunks_param = chunk_grid_meta.chunk_shape
        else:
            chunks_param = draw(chunk_shapes(shape=nparray.shape), label="chunk shape")

            if all(s > c and c > 1 for s, c in zip(nparray.shape, chunks_param, strict=True)):
                shard_shape = draw(
                    st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunks_param),
                    label="shard shape",
                )
                if shard_shape is not None:
                    subchunk_write_order = draw(subchunk_write_orders)
                    inner_codecs = draw(sharding_inner_codecs, label="sharding inner codecs")
                    serializer = ShardingCodec(
                        subchunk_write_order=subchunk_write_order,
                        codecs=inner_codecs,
                        index_codecs=[BytesCodec(), Crc32cCodec()],
                        chunk_shape=chunks_param,
                    )
                    compressors_unsearched = None
    else:
        chunks_param = draw(chunk_shapes(shape=nparray.shape), label="chunk shape")
    a = root.create_array(
        array_path,
        shape=nparray.shape,
        chunks=chunks_param,
        shards=shard_shape,
        dtype=nparray.dtype,
        attributes=attributes,
        compressors=compressors_unsearched,  # FIXME
        fill_value=fill_value,
        dimension_names=dim_names,
        serializer=serializer,
    )

    assert isinstance(a, Array)
    if a.metadata.zarr_format == 3:
        assert a.fill_value is not None
    assert a.name is not None
    assert a.path == normalize_path(array_path)
    assert a.name == f"/{a.path}"
    assert isinstance(root[array_path], Array)
    assert nparray.shape == a.shape

    # Verify chunks — for rectilinear grids, .chunks raises
    if zarr_format == 3:
        assert shard_shape == a.shards
        if isinstance(a.metadata.chunk_grid, RegularChunkGridMetadata):
            assert a.metadata.chunk_grid.chunk_shape == (
                a.shards if shard_shape is not None else a.chunks
            )
            assert shard_shape == a.shards
        else:
            assert isinstance(a.metadata.chunk_grid, RectilinearChunkGridMetadata)
            assert shard_shape is None

    assert a.basename == name, (a.basename, name)
    assert dict(a.attrs) == expected_attrs

    a[:] = nparray

    return a

basic_indices ¶

basic_indices(
    draw: DrawFn,
    *,
    shape: tuple[int, ...],
    min_dims: int = 0,
    max_dims: int | None = None,
    allow_newaxis: TrueOrFalse = False,
    allow_ellipsis: TrueOrFalse = True,
) -> Any

Basic indices without unsupported negative slices.

Source code in zarr/testing/strategies.py

@st.composite
def basic_indices(
    draw: st.DrawFn,
    *,
    shape: tuple[int, ...],
    min_dims: int = 0,
    max_dims: int | None = None,
    allow_newaxis: TrueOrFalse = False,
    allow_ellipsis: TrueOrFalse = True,
) -> Any:
    """Basic indices without unsupported negative slices."""
    strategy = npst.basic_indices(
        shape=shape,
        min_dims=min_dims,
        max_dims=max_dims,
        allow_newaxis=allow_newaxis,
        allow_ellipsis=allow_ellipsis,
    ).filter(
        lambda idxr: (
            not (
                is_negative_slice(idxr)
                or (isinstance(idxr, tuple) and any(is_negative_slice(idx) for idx in idxr))
            )
        )
    )
    if math.prod(shape) >= 3:
        strategy = end_slices(shape=shape) | strategy
    return draw(strategy)

block_indices ¶

block_indices(
    draw: DrawFn,
    *,
    chunk_sizes: tuple[tuple[int, ...], ...],
) -> tuple[tuple[int | slice, ...], tuple[slice, ...]]

Strategy for block-selection indexers over a chunk grid.

Block indexing is basic indexing applied to the block grid (the grid of chunks), so each axis is drawn with basic_indices over that axis's chunk count, mirroring how orthogonal_indices reuses basic_indices per axis. chunk_sizes gives the per-chunk data sizes of the array's outer (block) grid for every axis — i.e. Array.write_chunk_sizes, the grid that Array.blocks addresses (the shard grid when sharding is used). For example (3, 3, 3, 1) for a length-10 axis with a regular chunk size of 3, or the explicit edges of a rectilinear axis; nchunks for an axis is len(chunk_sizes[axis]).

The array-space translation uses the cumulative sum of those sizes, matching BlockIndexer's use of dim_grid.chunk_offset. Because the sizes are clipped to the array extent, the final offset equals the extent and the translation is exact for regular (uniform), rectilinear, and sharded grids alike.

Block indexing only supports integers and step-1 slices whose start references an existing chunk, so strided slices and slices starting at the grid edge are filtered out.

Returns:

block_indexer –

A per-axis tuple of ints / step-1 slices addressing whole chunks, suitable for Array.blocks / get_block_selection / set_block_selection.
array_indexer –

The equivalent array-space selection (a tuple of slices) for indexing the corresponding numpy array, used as the comparison oracle.

Source code in zarr/testing/strategies.py

@st.composite
def block_indices(
    draw: st.DrawFn, *, chunk_sizes: tuple[tuple[int, ...], ...]
) -> tuple[tuple[int | slice, ...], tuple[slice, ...]]:
    """
    Strategy for block-selection indexers over a chunk grid.

    Block indexing is basic indexing applied to the block grid (the grid of
    chunks), so each axis is drawn with ``basic_indices`` over that axis's chunk
    count, mirroring how ``orthogonal_indices`` reuses ``basic_indices`` per
    axis. ``chunk_sizes`` gives the per-chunk data sizes of the array's *outer*
    (block) grid for every axis — i.e. ``Array.write_chunk_sizes``, the grid that
    ``Array.blocks`` addresses (the shard grid when sharding is used). For
    example ``(3, 3, 3, 1)`` for a length-10 axis with a regular chunk size of 3,
    or the explicit edges of a rectilinear axis; ``nchunks`` for an axis is
    ``len(chunk_sizes[axis])``.

    The array-space translation uses the cumulative sum of those sizes, matching
    ``BlockIndexer``'s use of ``dim_grid.chunk_offset``. Because the sizes are
    clipped to the array extent, the final offset equals the extent and the
    translation is exact for regular (uniform), rectilinear, and sharded grids
    alike.

    Block indexing only supports integers and step-1 slices whose start
    references an existing chunk, so strided slices and slices starting at the
    grid edge are filtered out.

    Returns
    -------
    block_indexer
        A per-axis tuple of ints / step-1 slices addressing whole chunks,
        suitable for ``Array.blocks`` / ``get_block_selection`` / ``set_block_selection``.
    array_indexer
        The equivalent array-space selection (a tuple of slices) for indexing
        the corresponding numpy array, used as the comparison oracle.
    """

    def supported(nchunks: int) -> Callable[[tuple[Any, ...]], bool]:
        # Block indexing only accepts step-1 slices whose start references an
        # existing chunk (a slice starting at nchunks raises, unlike numpy).
        def predicate(value: tuple[Any, ...]) -> bool:
            dim_sel = value[0]
            if isinstance(dim_sel, slice):
                if dim_sel.step not in (None, 1):
                    return False
                start = dim_sel.start or 0
                return 0 <= (start + nchunks if start < 0 else start) < nchunks
            return True

        return predicate

    block_indexer: list[int | slice] = []
    array_indexer: list[slice] = []
    for sizes in chunk_sizes:
        nchunks = len(sizes)
        # offsets[i] is the array-space start of chunk i; length nchunks + 1.
        offsets = list(itertools.accumulate(sizes, initial=0))
        dim_strategy = (
            basic_indices(min_dims=1, shape=(nchunks,), allow_ellipsis=False)
            # normalize bare ints / slices to a 1-tuple, skip the empty tuple
            .map(lambda x: (x,) if not isinstance(x, tuple) else x)
            .filter(bool)
            .filter(supported(nchunks))
        )
        # basic_indices draws slices far more often than bare integers, so the
        # integer (single-block) branch below would only be hit on rare draws.
        # Union in an explicit integer so it is reliably exercised — keeping
        # coverage deterministic under the derandomized ``ci`` Hypothesis profile.
        (dim_sel,) = draw(
            dim_strategy | st.integers(min_value=0, max_value=nchunks - 1).map(lambda i: (i,))
        )
        block_indexer.append(dim_sel)
        if isinstance(dim_sel, slice):
            start, stop, _ = dim_sel.indices(nchunks)
            array_indexer.append(slice(offsets[start], offsets[stop]))
        else:
            block = dim_sel % nchunks
            array_indexer.append(slice(offsets[block], offsets[block + 1]))
    return tuple(block_indexer), tuple(array_indexer)

block_test_arrays ¶

block_test_arrays(
    draw: DrawFn,
) -> tuple[Array[Any], ndarray[Any, Any]]

Draw an array for block-indexing property tests, with its source contents.

Two arms, selected with equal probability:

regular: a regular chunk grid, optionally wrapped in sharding.
rectilinear: a variable (rectilinear) chunk grid, always unsharded.

Returns (zarray, nparray). The per-axis block sizes the oracle needs are zarray.write_chunk_sizes — the array's outer (block / shard) grid, which is exactly the grid Array.blocks addresses; the caller reads it directly.

Source code in zarr/testing/strategies.py

@st.composite
def block_test_arrays(
    draw: st.DrawFn,
) -> tuple[Array[Any], np.ndarray[Any, Any]]:
    """Draw an array for block-indexing property tests, with its source contents.

    Two arms, selected with equal probability:

    - **regular**: a regular chunk grid, optionally wrapped in sharding.
    - **rectilinear**: a variable (rectilinear) chunk grid, always unsharded.

    Returns ``(zarray, nparray)``. The per-axis block sizes the oracle needs are
    ``zarray.write_chunk_sizes`` — the array's *outer* (block / shard) grid, which
    is exactly the grid ``Array.blocks`` addresses; the caller reads it directly.
    """
    chunks: tuple[int, ...] | list[list[int]]
    if draw(st.booleans()):
        # regular arm, optionally sharded
        nparray, chunks = draw(
            np_array_and_chunks(
                arrays=numpy_arrays(shapes=npst.array_shapes(max_dims=4, min_side=1))
            )
        )
        # min_side=1 chunking guarantees shape // chunk >= 1 on every axis, which
        # shard_shapes requires.
        shards = draw(st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunks))
        event("block regular sharded" if shards is not None else "block regular unsharded")
        rectilinear = False
    else:
        # rectilinear arm, always unsharded
        event("block rectilinear")
        shape = draw(_rectilinear_shapes)
        chunks = draw(rectilinear_chunks(shape=shape))
        nparray = draw(numpy_arrays(shapes=st.just(shape), dtype=draw(dtypes())))
        shards, rectilinear = None, True

    store = draw(stores)
    with zarr.config.set({"array.rectilinear_chunks": rectilinear}):
        zarray = zarr.create_array(
            store=store,
            shape=nparray.shape,
            chunks=chunks,
            shards=shards,
            dtype=nparray.dtype,
        )
    zarray[...] = nparray
    return zarray, nparray

chunk_grids ¶

chunk_grids(
    draw: DrawFn, *, shape: tuple[int, ...]
) -> (
    RegularChunkGridMetadata | RectilinearChunkGridMetadata
)

Generate either a RegularChunkGridMetadata or RectilinearChunkGridMetadata.

This strategy depends on the global state of the config having rectilinear chunk grids enabled or not. This means that it may be a possible source of a hypothesis FlakyStrategy error due dependence on global state. However, in practice this seems unlikely to happen.

This allows property tests to exercise both chunk grid types.

Source code in zarr/testing/strategies.py

@st.composite
def chunk_grids(
    draw: st.DrawFn, *, shape: tuple[int, ...]
) -> RegularChunkGridMetadata | RectilinearChunkGridMetadata:
    """Generate either a RegularChunkGridMetadata or RectilinearChunkGridMetadata.

    This strategy depends on the global state of the config having rectilinear chunk grids enabled or not.
    This means that it may be a possible source of a hypothesis FlakyStrategy error due dependence
    on global state. However, in practice this seems unlikely to happen.

    This allows property tests to exercise both chunk grid types.
    """
    # RectilinearChunkGridMetadata doesn't support zero-sized dimensions,
    # so use RegularChunkGridMetadata if any dimension is 0
    if any(s == 0 for s in shape):
        event("using RegularChunkGridMetadata (zero-sized dimensions)")
        return RegularChunkGridMetadata(chunk_shape=draw(chunk_shapes(shape=shape)))

    if zarr.config.get("array.rectilinear_chunks") and draw(st.booleans()):
        chunks = draw(rectilinear_chunks(shape=shape))
        event("using RectilinearChunkGridMetadata")
        return RectilinearChunkGridMetadata(chunk_shapes=tuple(tuple(dim) for dim in chunks))
    else:
        event("using RegularChunkGridMetadata")
        return RegularChunkGridMetadata(chunk_shape=draw(chunk_shapes(shape=shape)))

chunk_paths ¶

chunk_paths(
    draw: DrawFn,
    ndim: int,
    numblocks: tuple[int, ...],
    subset: bool = True,
) -> str

Source code in zarr/testing/strategies.py

@st.composite
def chunk_paths(draw: st.DrawFn, ndim: int, numblocks: tuple[int, ...], subset: bool = True) -> str:
    blockidx = draw(
        st.tuples(*tuple(st.integers(min_value=0, max_value=max(0, b - 1)) for b in numblocks))
    )
    subset_slicer = slice(draw(st.integers(min_value=0, max_value=ndim))) if subset else slice(None)
    return "/".join(map(str, blockidx[subset_slicer]))

chunk_shapes ¶

chunk_shapes(
    draw: DrawFn, *, shape: tuple[int, ...]
) -> tuple[int, ...]

Source code in zarr/testing/strategies.py

@st.composite
def chunk_shapes(draw: st.DrawFn, *, shape: tuple[int, ...]) -> tuple[int, ...]:
    # We want this strategy to shrink towards arrays with smaller number of chunks
    # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
    numchunks = draw(
        st.tuples(
            *[
                st.integers(min_value=0 if size == 0 else 1, max_value=max(size, 1))
                for size in shape
            ]
        )
    )
    # 2. and now generate the chunks tuple
    # Chunk sizes must be >= 1 per spec; for zero-extent dimensions use 1.
    chunks = tuple(
        max(1, size // nchunks) if nchunks > 0 else 1
        for size, nchunks in zip(shape, numchunks, strict=True)
    )

    for c in chunks:
        event("chunk size", c)

    if any((c != 0 and s % c != 0) for s, c in zip(shape, chunks, strict=True)):
        event("smaller last chunk")

    return chunks

clear_store ¶

clear_store(x: Store) -> Store

Source code in zarr/testing/strategies.py

def clear_store(x: Store) -> Store:
    sync(x.clear())
    return x

complex_rectilinear_arrays ¶

complex_rectilinear_arrays(
    draw: DrawFn,
    *,
    stores: SearchStrategy[StoreLike] = stores,
    paths: SearchStrategy[str] = paths(),
    array_names: SearchStrategy = array_names,
    attrs: SearchStrategy = attrs,
) -> tuple[NDArray[Any], AnyArray]

Generate a rectilinear array with many small chunks.

The shape is derived from the chunk edges (5-10 chunks per dim, sizes 1-5), exercising higher chunk counts than rectilinear_arrays.

Source code in zarr/testing/strategies.py

@st.composite
def complex_rectilinear_arrays(
    draw: st.DrawFn,
    *,
    stores: st.SearchStrategy[StoreLike] = stores,
    paths: st.SearchStrategy[str] = paths(),  # noqa: B008
    array_names: st.SearchStrategy = array_names,
    attrs: st.SearchStrategy = attrs,
) -> tuple[npt.NDArray[Any], AnyArray]:
    """Generate a rectilinear array with many small chunks.

    The shape is derived from the chunk edges (5-10 chunks per dim,
    sizes 1-5), exercising higher chunk counts than ``rectilinear_arrays``.
    """
    ndim = draw(st.integers(min_value=1, max_value=3))
    nchunks = draw(st.integers(min_value=5, max_value=10))
    dim_chunks = st.lists(st.integers(min_value=1, max_value=5), min_size=nchunks, max_size=nchunks)
    chunk_shapes = draw(st.lists(dim_chunks, min_size=ndim, max_size=ndim))

    shape = tuple(sum(dim) for dim in chunk_shapes)
    nparray = draw(numpy_arrays(shapes=st.just(shape)))
    dim_names = draw(dimension_names(ndim=ndim))
    fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)]))
    attributes = draw(attrs)

    store = draw(stores, label="store")
    path = draw(paths, label="array parent")
    name = draw(array_names, label="array name")
    array_path = _join_paths([path, name])

    root = zarr.open_group(store, mode="w", zarr_format=3)
    with zarr.config.set({"array.rectilinear_chunks": True}):
        a = root.create_array(
            array_path,
            shape=shape,
            chunks=chunk_shapes,
            dtype=nparray.dtype,
            fill_value=fill_value,
            dimension_names=dim_names,
            attributes=attributes,
        )
    a[:] = nparray
    return nparray, a

dimension_names ¶

dimension_names(
    draw: DrawFn, *, ndim: int | None = None
) -> list[None | str] | None

Source code in zarr/testing/strategies.py

@st.composite
def dimension_names(draw: st.DrawFn, *, ndim: int | None = None) -> list[None | str] | None:
    simple_text = st.text(zarr_key_chars, min_size=0)
    return draw(st.none() | st.lists(st.none() | simple_text, min_size=ndim, max_size=ndim))  # type: ignore[arg-type]

dtypes ¶

dtypes() -> SearchStrategy[dtype[Any]]

Source code in zarr/testing/strategies.py

def dtypes() -> st.SearchStrategy[np.dtype[Any]]:
    return (
        npst.boolean_dtypes()
        | npst.integer_dtypes(endianness="=")
        | npst.unsigned_integer_dtypes(endianness="=")
        | npst.floating_dtypes(endianness="=")
        | npst.complex_number_dtypes(endianness="=")
        | npst.byte_string_dtypes(endianness="=")
        | npst.unicode_string_dtypes(endianness="=")
        | npst.datetime64_dtypes(endianness="=")
        | npst.timedelta64_dtypes(endianness="=")
    )

end_slices ¶

end_slices(draw: DrawFn, *, shape: tuple[int, ...]) -> Any

A strategy that slices ranges that include the last chunk. This is intended to stress-test handling of a possibly smaller last chunk.

Source code in zarr/testing/strategies.py

@st.composite
def end_slices(draw: st.DrawFn, *, shape: tuple[int, ...]) -> Any:
    """
    A strategy that slices ranges that include the last chunk.
    This is intended to stress-test handling of a possibly smaller last chunk.
    """
    slicers = []
    for size in shape:
        start = draw(st.integers(min_value=size // 2, max_value=size - 1))
        length = draw(st.integers(min_value=0, max_value=size - start))
        slicers.append(slice(start, start + length))
    event("drawing end slice")
    return tuple(slicers)

is_negative_slice ¶

is_negative_slice(idx: Any) -> bool

Source code in zarr/testing/strategies.py

def is_negative_slice(idx: Any) -> bool:
    return isinstance(idx, slice) and idx.step is not None and idx.step < 0

key_ranges ¶

key_ranges(
    keys: SearchStrategy[str] = node_names,
    max_size: int = maxsize,
) -> SearchStrategy[list[tuple[str, ByteRequest | None]]]

Function to generate key_ranges strategy for get_partial_values() returns list strategy w/ form::

[(key, byte_request),
 (key, byte_request),...]

where byte_request is None or any of the concrete ByteRequest subtypes. The bounds are drawn independently of each value's length, so the offsets/suffixes routinely exceed the data and exercise the clamping logic in _normalize_byte_range_index.

Source code in zarr/testing/strategies.py

def key_ranges(
    keys: SearchStrategy[str] = node_names, max_size: int = sys.maxsize
) -> SearchStrategy[list[tuple[str, ByteRequest | None]]]:
    """
    Function to generate key_ranges strategy for get_partial_values()
    returns list strategy w/ form::

        [(key, byte_request),
         (key, byte_request),...]

    where ``byte_request`` is ``None`` or any of the concrete ``ByteRequest``
    subtypes. The bounds are drawn independently of each value's length, so the
    offsets/suffixes routinely exceed the data and exercise the clamping logic
    in ``_normalize_byte_range_index``.
    """

    def make_range(start: int, length: int) -> RangeByteRequest:
        return RangeByteRequest(start, end=min(start + length, max_size))

    bound = st.integers(min_value=0, max_value=max_size)
    byte_ranges: SearchStrategy[ByteRequest | None] = st.one_of(
        st.none(),
        st.builds(make_range, start=bound, length=bound),
        st.builds(OffsetByteRequest, offset=bound),
        st.builds(SuffixByteRequest, suffix=bound),
    )
    key_tuple = st.tuples(keys, byte_ranges)
    return st.lists(key_tuple, min_size=1, max_size=10)

keys ¶

keys(
    draw: DrawFn, *, max_num_nodes: int | None = None
) -> str

Source code in zarr/testing/strategies.py

@st.composite
def keys(draw: st.DrawFn, *, max_num_nodes: int | None = None) -> str:
    return draw(st.lists(node_names, min_size=1, max_size=max_num_nodes).map("/".join))

np_array_and_chunks ¶

np_array_and_chunks(
    draw: DrawFn,
    *,
    arrays: SearchStrategy[NDArray[Any]] = numpy_arrays(),
) -> tuple[ndarray[Any, Any], tuple[int, ...]]

A hypothesis strategy to generate small sized random arrays.

Returns: a tuple of the array and a suitable random chunking for it.

Source code in zarr/testing/strategies.py

@st.composite
def np_array_and_chunks(
    draw: st.DrawFn,
    *,
    arrays: st.SearchStrategy[npt.NDArray[Any]] = numpy_arrays(),  # noqa: B008
) -> tuple[np.ndarray[Any, Any], tuple[int, ...]]:
    """A hypothesis strategy to generate small sized random arrays.

    Returns: a tuple of the array and a suitable random chunking for it.
    """
    array = draw(arrays)
    return (array, draw(chunk_shapes(shape=array.shape)))

numpy_arrays ¶

numpy_arrays(
    draw: DrawFn,
    *,
    shapes: SearchStrategy[tuple[int, ...]] = array_shapes,
    dtype: dtype[Any] | None = None,
) -> NDArray[Any]

Generate numpy arrays that can be saved in the provided Zarr format.

Source code in zarr/testing/strategies.py

@st.composite
def numpy_arrays(
    draw: st.DrawFn,
    *,
    shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes,
    dtype: np.dtype[Any] | None = None,
) -> npt.NDArray[Any]:
    """
    Generate numpy arrays that can be saved in the provided Zarr format.
    """
    if dtype is None:
        dtype = draw(dtypes())
    if np.issubdtype(dtype, np.str_):
        safe_unicode_strings = safe_unicode_for_dtype(dtype)
        return draw(npst.arrays(dtype=dtype, shape=shapes, elements=safe_unicode_strings))

    return draw(npst.arrays(dtype=dtype, shape=shapes))

orthogonal_indices ¶

orthogonal_indices(
    draw: DrawFn, *, shape: tuple[int, ...]
) -> tuple[
    tuple[ndarray[Any, Any], ...],
    tuple[ndarray[Any, Any], ...],
]

Strategy that returns (1) a tuple of integer arrays used for orthogonal indexing of Zarr arrays. (2) a tuple of integer arrays that can be used for equivalent indexing of numpy arrays

Source code in zarr/testing/strategies.py

@st.composite
def orthogonal_indices(
    draw: st.DrawFn, *, shape: tuple[int, ...]
) -> tuple[tuple[np.ndarray[Any, Any], ...], tuple[np.ndarray[Any, Any], ...]]:
    """
    Strategy that returns
    (1) a tuple of integer arrays used for orthogonal indexing of Zarr arrays.
    (2) a tuple of integer arrays that can be used for equivalent indexing of numpy arrays
    """
    zindexer = []
    npindexer = []
    ndim = len(shape)
    for axis, size in enumerate(shape):
        if size != 0:
            strategy = npst.integer_array_indices(
                shape=(size,), result_shape=npst.array_shapes(min_side=1, max_side=size, max_dims=1)
            ) | basic_indices(min_dims=1, shape=(size,), allow_ellipsis=False)
        else:
            strategy = basic_indices(min_dims=1, shape=(size,), allow_ellipsis=False)

        val = draw(
            strategy
            # bare ints, slices
            .map(lambda x: (x,) if not isinstance(x, tuple) else x)
            # skip empty tuple
            .filter(bool)
        )
        (idxr,) = val
        if isinstance(idxr, int):
            idxr = np.array([idxr])
        zindexer.append(idxr)
        if isinstance(idxr, slice):
            idxr = np.arange(*idxr.indices(size))
        elif isinstance(idxr, (tuple, int)):
            idxr = np.array(idxr)
        newshape = [1] * ndim
        newshape[axis] = idxr.size
        npindexer.append(idxr.reshape(newshape))

    # casting the output of broadcast_arrays is needed for numpy < 2
    return tuple(zindexer), tuple(np.broadcast_arrays(*npindexer))

paths ¶

paths(
    draw: DrawFn, *, max_num_nodes: int | None = None
) -> str

Source code in zarr/testing/strategies.py

@st.composite
def paths(draw: st.DrawFn, *, max_num_nodes: int | None = None) -> str:
    return draw(st.just("/") | keys(max_num_nodes=max_num_nodes))

rectilinear_arrays ¶

rectilinear_arrays(
    draw: DrawFn,
    *,
    shapes: SearchStrategy[
        tuple[int, ...]
    ] = _rectilinear_shapes,
) -> Any

Generate a zarr v3 array with rectilinear (variable) chunk grid.

Source code in zarr/testing/strategies.py

@st.composite
def rectilinear_arrays(
    draw: st.DrawFn,
    *,
    shapes: st.SearchStrategy[tuple[int, ...]] = _rectilinear_shapes,
) -> Any:
    """Generate a zarr v3 array with rectilinear (variable) chunk grid."""
    shape = draw(shapes)
    chunk_shapes = draw(rectilinear_chunks(shape=shape))

    np_dtype = draw(dtypes())
    nparray = draw(numpy_arrays(shapes=st.just(shape), dtype=np_dtype))
    fill_value = draw(st.one_of([st.none(), npst.from_dtype(np_dtype)]))
    dim_names = draw(dimension_names(ndim=len(shape)))

    store = MemoryStore()
    with zarr.config.set({"array.rectilinear_chunks": True}):
        a = zarr.create_array(
            store=store,
            shape=shape,
            chunks=chunk_shapes,
            dtype=np_dtype,
            fill_value=fill_value,
            dimension_names=dim_names,
        )
        a[:] = nparray

    return a

rectilinear_chunks ¶

rectilinear_chunks(
    draw: DrawFn, *, shape: tuple[int, ...]
) -> list[list[int]]

Generate valid rectilinear chunk shapes for a given array shape.

Uses two modes per dimension: - "expanded": random divider points create arbitrary chunk sizes - "rle": uniform chunks with optional remainder, optionally shuffled

Keeps max chunks per dimension <= 20 to avoid performance issues in property tests. With higher dimensions, the total chunk count grows multiplicatively.

Source code in zarr/testing/strategies.py

@st.composite
def rectilinear_chunks(draw: st.DrawFn, *, shape: tuple[int, ...]) -> list[list[int]]:
    """Generate valid rectilinear chunk shapes for a given array shape.

    Uses two modes per dimension:
    - "expanded": random divider points create arbitrary chunk sizes
    - "rle": uniform chunks with optional remainder, optionally shuffled

    Keeps max chunks per dimension <= 20 to avoid performance issues
    in property tests. With higher dimensions, the total chunk count
    grows multiplicatively.
    """
    chunk_shapes: list[list[int]] = []
    for size in shape:
        assert size > 0
        if size > 1:
            mode = draw(st.sampled_from(["expanded", "rle"]))
            if mode == "expanded":
                event("rectilinear expanded")
                max_chunks = min(size - 1, 20)
                nchunks = draw(st.integers(min_value=1, max_value=max_chunks))
                dividers = sorted(
                    draw(
                        st.lists(
                            st.integers(min_value=1, max_value=size - 1),
                            min_size=nchunks - 1,
                            max_size=nchunks - 1,
                            unique=True,
                        )
                    )
                )
                chunk_shapes.append(
                    [a - b for a, b in zip(dividers + [size], [0] + dividers, strict=False)]
                )
            else:
                # RLE mode: uniform chunks with optional remainder
                max_chunk_size = min(size, 20)
                chunk_size = draw(st.integers(min_value=1, max_value=max_chunk_size))
                n_full = size // chunk_size
                remainder = size % chunk_size
                chunks_list = [chunk_size] * n_full
                if remainder > 0:
                    chunks_list.append(remainder)
                # Optionally shuffle to create non-contiguous duplicate patterns
                if draw(st.booleans()):
                    event("rectilinear rle shuffled")
                    chunks_list = draw(st.permutations(chunks_list))
                else:
                    event("rectilinear rle")
                chunk_shapes.append(list(chunks_list))
        else:
            chunk_shapes.append([1])
    return chunk_shapes

safe_unicode_for_dtype ¶

safe_unicode_for_dtype(
    dtype: dtype[str_],
) -> SearchStrategy[str]

Generate UTF-8-safe text constrained to max_len of dtype.

Source code in zarr/testing/strategies.py

def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]:
    """Generate UTF-8-safe text constrained to max_len of dtype."""
    # account for utf-32 encoding (i.e. 4 bytes/character)
    max_len = max(1, dtype.itemsize // 4)

    return st.text(
        alphabet=st.characters(
            exclude_categories=["Cs"],  # Avoid *technically allowed* surrogates
            min_codepoint=32,
        ),
        min_size=1,
        max_size=max_len,
    )

shard_shapes ¶

shard_shapes(
    draw: DrawFn,
    *,
    shape: tuple[int, ...],
    chunk_shape: tuple[int, ...],
) -> tuple[int, ...]

Source code in zarr/testing/strategies.py

@st.composite
def shard_shapes(
    draw: st.DrawFn, *, shape: tuple[int, ...], chunk_shape: tuple[int, ...]
) -> tuple[int, ...]:
    # We want this strategy to shrink towards arrays with smaller number of shards
    # shards must be an integral number of chunks
    assert all(c != 0 for c in chunk_shape)
    numchunks = tuple(s // c for s, c in zip(shape, chunk_shape, strict=True))
    multiples = tuple(draw(st.integers(min_value=1, max_value=nc)) for nc in numchunks)
    return tuple(m * c for m, c in zip(multiples, chunk_shape, strict=True))

simple_arrays ¶

simple_arrays(
    draw: DrawFn,
    *,
    shapes: SearchStrategy[tuple[int, ...]] = array_shapes,
) -> Any

Source code in zarr/testing/strategies.py

@st.composite
def simple_arrays(
    draw: st.DrawFn,
    *,
    shapes: st.SearchStrategy[tuple[int, ...]] = array_shapes,
) -> Any:
    return draw(
        arrays(
            shapes=shapes,
            paths=paths(max_num_nodes=2),
            array_names=short_node_names,
            attrs=st.none(),
            compressors=st.sampled_from([None, "default"]),
        )
    )

v2_dtypes ¶

v2_dtypes() -> SearchStrategy[dtype[Any]]

Source code in zarr/testing/strategies.py

def v2_dtypes() -> st.SearchStrategy[np.dtype[Any]]:
    return dtypes()

v3_dtypes ¶

v3_dtypes() -> SearchStrategy[dtype[Any]]

Source code in zarr/testing/strategies.py

def v3_dtypes() -> st.SearchStrategy[np.dtype[Any]]:
    return dtypes()

zarr.testing.strategies

Strategies¶