Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

top-level functions for reading, creating data #2463

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
10 changes: 10 additions & 0 deletions src/zarr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
copy_all,
copy_store,
create,
create_array,
create_group,
empty,
empty_like,
full,
Expand All @@ -19,6 +21,9 @@
open_consolidated,
open_group,
open_like,
read,
read_array,
read_group,
save,
save_array,
save_group,
Expand Down Expand Up @@ -46,6 +51,9 @@
"copy_all",
"copy_store",
"create",
"create_array",
"create_group",
"read_array",
"empty",
"empty_like",
"full",
Expand All @@ -55,9 +63,11 @@
"ones",
"ones_like",
"open",
"read",
"open_array",
"open_consolidated",
"open_group",
"read_group",
"open_like",
"save",
"save_array",
Expand Down
311 changes: 309 additions & 2 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,8 @@ async def open(
path : str or None, optional
The path within the store to open.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
If the store is backed by an fsspec-based implementation, then this dict will be passed to
the Store constructor for that implementation. Ignored otherwise.
**kwargs
Additional parameters are passed through to :func:`zarr.creation.open_array` or
:func:`zarr.hierarchy.open_group`.
Expand Down Expand Up @@ -313,6 +313,47 @@ async def open(
return await open_group(store=store_path, zarr_format=zarr_format, **kwargs)


async def read(
*,
store: StoreLike | None = None,
zarr_format: ZarrFormat | None = None,
path: str | None = None,
storage_options: dict[str, Any] | None = None,
**kwargs: Any,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup:
"""Convenience function to open a group or array for reading. This function
wraps :func:`zarr.api.asynchronous.open` See the documentation of that function for details.

Parameters
----------
store : Store or str, optional
Store or path to directory in file system or name of zip file.
zarr_format : {2, 3, None}, optional
The zarr format to require. The default value of None will first look for Zarr v3 data,
then Zarr v2 data, then fail if neither format is found.
path : str or None, optional
The path within the store to open.
storage_options : dict, optional
If using an fsspec URL to create the store, this will be passed to
the backend implementation. Ignored otherwise.
**kwargs
Additional parameters are passed through to :func:`zarr.creation.open`.

Returns
-------
z : array or group
Return type depends on what exists in the given store.
"""
return await open(
store=store,
mode="r",
zarr_format=zarr_format,
path=path,
storage_options=storage_options,
**kwargs,
)


async def open_consolidated(
*args: Any, use_consolidated: Literal[True] = True, **kwargs: Any
) -> AsyncGroup:
Expand Down Expand Up @@ -615,6 +656,54 @@ async def group(
)


async def create_group(
*,
store: StoreLike,
path: str | None = None,
overwrite: bool = False,
zarr_format: ZarrFormat | None = None,
attributes: dict[str, Any] | None = None,
storage_options: dict[str, Any] | None = None,
) -> AsyncGroup:
"""Create a group.

Parameters
----------
store : Store or str
Store or path to directory in file system.
path : str, optional
Group path within store.
overwrite : bool, optional
If True, pre-existing data at ``path`` will be deleted before
creating the group.
zarr_format : {2, 3, None}, optional
The zarr format to use when saving.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.

Returns
-------
g : group
The new group.
"""

if zarr_format is None:
zarr_format = _default_zarr_version()

# TODO: fix this when modes make sense. It should be `w` for overwriting, `w-` otherwise
mode: Literal["a"] = "a"

store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)

return await AsyncGroup.from_store(
store=store_path,
zarr_format=zarr_format,
exists_ok=overwrite,
attributes=attributes,
)


async def open_group(
store: StoreLike | None = None,
*, # Note: this is a change from v2
Expand Down Expand Up @@ -721,6 +810,188 @@ async def open_group(
)


async def read_group(
store: StoreLike,
*,
path: str | None = None,
zarr_format: ZarrFormat | None = None,
storage_options: dict[str, Any] | None = None,
use_consolidated: bool | str | None = None,
) -> AsyncGroup:
"""Open a group for reading. This function wraps :func:`zarr.api.asynchronous.open_group` See
the documentation of that function for details.

Parameters
----------
store : Store, str, or mapping, optional
Store or path to directory in file system or name of zip file.

Strings are interpreted as paths on the local file system
and used as the ``root`` argument to :class:`zarr.store.LocalStore`.

Dictionaries are used as the ``store_dict`` argument in
:class:`zarr.store.MemoryStore``.
path : str, optional
Group path within store.
zarr_format : {2, 3, None}, optional
The zarr format to require. The default value of None will first look for Zarr v3 data,
then Zarr v2 data, then fail if neither format is found.
storage_options : dict
If the store is backed by an fsspec-based implementation, then this dict will be passed to
the Store constructor for that implementation. Ignored otherwise.
use_consolidated : bool or str, default None
Whether to use consolidated metadata.

By default, consolidated metadata is used if it's present in the
store (in the ``zarr.json`` for Zarr v3 and in the ``.zmetadata`` file
for Zarr v2).

To explicitly require consolidated metadata, set ``use_consolidated=True``,
which will raise an exception if consolidated metadata is not found.

To explicitly *not* use consolidated metadata, set ``use_consolidated=False``,
which will fall back to using the regular, non consolidated metadata.

Zarr v2 allowed configuring the key storing the consolidated metadata
(``.zmetadata`` by default). Specify the custom key as ``use_consolidated``
to load consolidated metadata from a non-default key.

Returns
-------
g : group
The new group.
"""
return await open_group(
store=store,
mode="r",
path=path,
storage_options=storage_options,
zarr_format=zarr_format,
use_consolidated=use_consolidated,
)


async def create_array(
store: str | StoreLike,
*,
shape: ChunkCoords,
chunks: ChunkCoords | None = None, # TODO: v2 allowed chunks=True
dtype: npt.DTypeLike | None = None,
compressor: dict[str, JSON] | None = None, # TODO: default and type change
fill_value: Any | None = 0, # TODO: need type
order: MemoryOrder | None = None,
overwrite: bool = False,
path: PathLike | None = None,
filters: list[dict[str, JSON]] | None = None, # TODO: type has changed
dimension_separator: Literal[".", "/"] | None = None,
zarr_format: ZarrFormat | None = None,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ChunkCoords | None = None,
chunk_key_encoding: (
ChunkKeyEncoding
| tuple[Literal["default"], Literal[".", "/"]]
| tuple[Literal["v2"], Literal[".", "/"]]
| None
) = None,
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
**kwargs: Any,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Create an array.

Parameters
----------
shape : int or tuple of ints
Array shape.
chunks : int or tuple of ints, optional
Chunk shape. If True, will be guessed from `shape` and `dtype`. If
False, will be set to `shape`, i.e., single chunk for the whole array.
If an int, the chunk size in each dimension will be given by the value
of `chunks`. Default is True.
dtype : str or dtype, optional
NumPy dtype.
compressor : Codec, optional
Primary compressor.
fill_value : object
Default value to use for uninitialized portions of the array.
order : {'C', 'F'}, optional
Memory layout to be used within each chunk.
Default is set in Zarr's config (`array.order`).
store : Store or str
Store or path to directory in file system or name of zip file.
overwrite : bool, optional
If True, delete all pre-existing data in `store` at `path` before
creating the array.
path : str, optional
Path under which array is stored.
filters : sequence of Codecs, optional
Sequence of filters to use to encode chunk data prior to compression.
dimension_separator : {'.', '/'}, optional
Separator placed between the dimensions of a chunk.
zarr_format : {2, 3, None}, optional
The zarr format to use when saving.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.

Returns
-------
z : array
The array.
"""

if zarr_format is None:
zarr_format = _default_zarr_version()

if zarr_format == 2 and chunks is None:
chunks = shape
elif zarr_format == 3 and chunk_shape is None:
if chunks is not None:
chunk_shape = chunks
chunks = None
else:
chunk_shape = shape

if dimension_separator is not None:
if zarr_format == 3:
raise ValueError(
"dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead"
)
else:
warnings.warn(
"dimension_separator is not yet implemented",
RuntimeWarning,
stacklevel=2,
)

# TODO: fix this when modes make sense. It should be `w` for overwriting, `w-` otherwise
mode: Literal["a"] = "a"

store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)

return await AsyncArray.create(
store_path,
shape=shape,
chunks=chunks,
dtype=dtype,
compressor=compressor,
fill_value=fill_value,
exists_ok=overwrite,
filters=filters,
dimension_separator=dimension_separator,
zarr_format=zarr_format,
chunk_shape=chunk_shape,
chunk_key_encoding=chunk_key_encoding,
codecs=codecs,
dimension_names=dimension_names,
attributes=attributes,
order=order,
**kwargs,
)


async def create(
shape: ChunkCoords,
*, # Note: this is a change from v2
Expand Down Expand Up @@ -904,6 +1175,41 @@ async def create(
)


async def read_array(
store: StoreLike,
*,
path: str | None = None,
zarr_format: ZarrFormat | None = None,
storage_options: dict[str, Any] | None = None,
) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]:
"""Create an array for reading. Wraps `:func:zarr.api.asynchronous.create`.
See the documentation of that function for details.

Parameters
----------
store : Store or str
Store or path to directory in file system or name of zip file.
path : str, optional
Path under which the array is stored.
zarr_format : {2, 3, None}, optional
The zarr format to require. The default value of ``None`` will first look for Zarr v3 data,
then Zarr v2 data, then fail if neither format is found.
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.

Returns
-------
z : array
The array.
"""
store_path = await make_store_path(store, path=path, mode="r", storage_options=storage_options)
return await AsyncArray.open(
store=store_path,
zarr_format=zarr_format,
)


async def empty(
shape: ChunkCoords, **kwargs: Any
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
Expand Down Expand Up @@ -1081,6 +1387,7 @@ async def open_array(
store=store_path,
zarr_format=zarr_format or _default_zarr_version(),
overwrite=store_path.store.mode.overwrite,
storage_options=storage_options,
**kwargs,
)
raise
Expand Down
Loading