filesystem_spec
filesystem_spec copied to clipboard
Cannot use chained Zip-URL with ReferenceFileSystem
Note, this issue could also be caused by xarray.
I have kerchunked a large number of NetCDF4 files (located in S3) and put the reference JSONs into a single Zip archive. If I open a NetCDF as Zarr with xarray using a chained refs_url like so
# ...
refs_url = f"zip://{json_path}::file://{zip_path}"
return xr.open_dataset(
"reference://",
engine="zarr",
backend_kwargs={
"storage_options": {
"fo": refs_url,
"remote_protocol": "s3",
"remote_options": s3_options
},
"consolidated": False
},
decode_cf=False
)
I get error KeyError: "There is no item named '{{json_path}}/.zmetadata' in the archive" (where {{json_path}} is a placeholder for the actual key path into the Zip archive, just for clarity).
However, good news is that I can easily bypass the issue if I load the JSON via its chained URL using fsspec.open() directly:
with fsspec.open(refs_url) as f:
refs = json.load(f)
and pass refs to the fo kwarg instead of the refs_url.
Full stack trace:
../xcube_smos/catalog/index.py:68: in open_dataset
return xr.open_dataset(
../../../miniconda3/envs/xcube/Lib/site-packages/xarray/backends/api.py:570: in open_dataset
backend_ds = backend.open_dataset(
../../../miniconda3/envs/xcube/Lib/site-packages/xarray/backends/zarr.py:965: in open_dataset
store = ZarrStore.open_group(
../../../miniconda3/envs/xcube/Lib/site-packages/xarray/backends/zarr.py:454: in open_group
zarr_group = zarr.open_group(store, **open_kwargs)
../../../miniconda3/envs/xcube/Lib/site-packages/zarr/hierarchy.py:1508: in open_group
store = _normalize_store_arg(
../../../miniconda3/envs/xcube/Lib/site-packages/zarr/hierarchy.py:1350: in _normalize_store_arg
return normalize_store_arg(
../../../miniconda3/envs/xcube/Lib/site-packages/zarr/storage.py:197: in normalize_store_arg
return normalize_store(store, storage_options, mode)
../../../miniconda3/envs/xcube/Lib/site-packages/zarr/storage.py:167: in _normalize_store_arg_v2
return FSStore(store, mode=mode, **(storage_options or {}))
../../../miniconda3/envs/xcube/Lib/site-packages/zarr/storage.py:1377: in __init__
self.map = fsspec.get_mapper(url, **{**mapper_options, **storage_options})
../../../miniconda3/envs/xcube/Lib/site-packages/fsspec/mapping.py:245: in get_mapper
fs, urlpath = url_to_fs(url, **kwargs)
../../../miniconda3/envs/xcube/Lib/site-packages/fsspec/core.py:388: in url_to_fs
fs = filesystem(protocol, **inkwargs)
../../../miniconda3/envs/xcube/Lib/site-packages/fsspec/registry.py:290: in filesystem
return cls(**storage_options)
../../../miniconda3/envs/xcube/Lib/site-packages/fsspec/spec.py:79: in __call__
obj = super().__call__(*args, **kwargs)
../../../miniconda3/envs/xcube/Lib/site-packages/fsspec/implementations/reference.py:615: in __init__
self.references = LazyReferenceMapper(
../../../miniconda3/envs/xcube/Lib/site-packages/fsspec/implementations/reference.py:130: in __init__
self._items[".zmetadata"] = self.fs.cat_file(
../../../miniconda3/envs/xcube/Lib/site-packages/fsspec/spec.py:789: in cat_file
with self.open(path, "rb", **kwargs) as f:
../../../miniconda3/envs/xcube/Lib/site-packages/fsspec/spec.py:1307: in open
f = self._open(
../../../miniconda3/envs/xcube/Lib/site-packages/fsspec/implementations/zip.py:120: in _open
out = self.zip.open(path, mode.strip("b"))
../../../miniconda3/envs/xcube/Lib/zipfile.py:1547: in open
zinfo = self.getinfo(name)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <zipfile.ZipFile file=<fsspec.implementations.local.LocalFileOpener object at 0x0000028DE1A1B250> mode='r'>
name = 'SMOS/L2SM/MIR_SMUDP2/2021/05/01/SM_REPR_MIR_SMUDP2_20210501T000429_20210501T005742_700_320_1/SM_REPR_MIR_SMUDP2_20210501T000429_20210501T005742_700_320_1.nc.json/.zmetadata'
def getinfo(self, name):
"""Return the instance of ZipInfo given 'name'."""
info = self.NameToInfo.get(name)
if info is None:
> raise KeyError(
'There is no item named %r in the archive' % name)
E KeyError: "There is no item named 'SMOS/L2SM/MIR_SMUDP2/2021/05/01/SM_REPR_MIR_SMUDP2_20210501T000429_20210501T005742_700_320_1/SM_REPR_MIR_SMUDP2_20210501T000429_20210501T005742_700_320_1.nc.json/.zmetadata' in the archive"