Writing r+ backed adata causes error in raw
I read and write r+ backed adata (containing raw) as follows:
adata=sc.read_h5ad(path,backed='r+')
# Add some data to obs
# ...
adata.write(path)
I get the following error due to raw not being in adata (but raw was in the initial data that I saved).
AttributeError Traceback (most recent call last)
<ipython-input-36-45a564f83470> in <module>
1 # Save data
----> 2 adata_full2.write(path_save+'data_integrated_annotated.h5ad')
~/miniconda3/envs/rpy2_3/lib/python3.8/site-packages/anndata/_core/anndata.py in write_h5ad(self, filename, compression, compression_opts, force_dense, as_dense)
1844 filename = self.filename
1845
-> 1846 _write_h5ad(
1847 Path(filename),
1848 self,
~/miniconda3/envs/rpy2_3/lib/python3.8/site-packages/anndata/_io/h5ad.py in write_h5ad(filepath, adata, force_dense, as_dense, dataset_kwargs, **kwargs)
102 )
103 else:
--> 104 write_attribute(f, "raw", adata.raw, dataset_kwargs=dataset_kwargs)
105 write_attribute(f, "obs", adata.obs, dataset_kwargs=dataset_kwargs)
106 write_attribute(f, "var", adata.var, dataset_kwargs=dataset_kwargs)
~/miniconda3/envs/rpy2_3/lib/python3.8/functools.py in wrapper(*args, **kw)
873 '1 positional argument')
874
--> 875 return dispatch(args[0].__class__)(*args, **kw)
876
877 funcname = getattr(func, '__name__', 'singledispatch function')
~/miniconda3/envs/rpy2_3/lib/python3.8/site-packages/anndata/_io/h5ad.py in write_attribute_h5ad(f, key, value, *args, **kwargs)
124 if key in f:
125 del f[key]
--> 126 _write_method(type(value))(f, key, value, *args, **kwargs)
127
128
~/miniconda3/envs/rpy2_3/lib/python3.8/site-packages/anndata/_io/h5ad.py in write_raw(f, key, value, dataset_kwargs)
132 group.attrs["encoding-version"] = EncodingVersions.raw.value
133 group.attrs["shape"] = value.shape
--> 134 write_attribute(f, "raw/X", value.X, dataset_kwargs=dataset_kwargs)
135 write_attribute(f, "raw/var", value.var, dataset_kwargs=dataset_kwargs)
136 write_attribute(f, "raw/varm", value.varm, dataset_kwargs=dataset_kwargs)
~/miniconda3/envs/rpy2_3/lib/python3.8/site-packages/anndata/_core/raw.py in X(self)
56 X = self._adata.file["raw.X"] # Backwards compat
57 else:
---> 58 raise AttributeError(
59 f"Could not find dataset for raw X in file: "
60 f"{self._adata.file.filename}."
AttributeError: Could not find dataset for raw X in file: /storage/groups/ml01/workspace/karin.hrovatin/data/pancreas/scRNA/combined/data_integrated_annotated.h5ad.
sc version: 1.4.6 anndata version: '0.7.4'
I'm having some trouble reproducing. Here's what I tried (which worked):
import anndata as ad
from scipy import sparse
a = ad.AnnData(sparse.random(200, 100, format="csr"))
a.raw = a
a.write("from_mem_with_raw.h5ad")
b = ad.read_h5ad("from_mem_with_raw.h5ad", backed="r+")
b.write("from_backed_with_raw.h5ad")
Does this work for you? If it does, could you share the contents of the initial file?
$ h5ls -r data_integrated_annotated.h5ad
Thanks!
Hi,
I have the same issue while I am trying to save a raw version of adata which is on backed mode. I guess the .raw() method doesn't support storing raw X while it's on disk. Am I right?
Thanks!
I got this issue as well. It seems to be that saving a "r+" backed file results in corrupting/deleting the raw data, so that when you open it again and try to save it again, you get the error.
import anndata as ad
from scipy import sparse
a = ad.AnnData(sparse.random(200, 100, format="csr"))
a.raw = a
a.write("from_mem_with_raw.h5ad")
b = ad.read_h5ad("from_mem_with_raw.h5ad", backed="r+")
b.write("from_backed_with_raw.h5ad")
c = ad.read_h5ad("from_backed_with_raw.h5ad", backed="r+")
c.write()
Interestingly, in the previous example code, when you load c into memory and then write, there are no errors.
import anndata as ad
from scipy import sparse
a = ad.AnnData(sparse.random(200, 100, format="csr"))
a.raw = a
a.write("from_mem_with_raw.h5ad")
b = ad.read_h5ad("from_mem_with_raw.h5ad", backed="r+")
b.write("from_backed_with_raw.h5ad")
c = ad.read_h5ad("from_backed_with_raw.h5ad")
c.write("from_backed_with_raw.h5ad")
# no error
having the same issue. Providing a 'reproducible' test case by minor modifying the previous one:
import anndata as ad
from scipy import sparse
a = ad.AnnData(sparse.random(200, 100, format="csr"))
a.raw = a
a.write("test.h5ad")
b = ad.read_h5ad("test.h5ad", backed="r+") # since using r+, most likely to add information into the original h5ad file
# adding information or not, the error message shows
b.write("test.h5ad")
AttributeError: Could not find dataset for raw X in file: test.h5ad.
anndata version: '0.7.4' or '0.8.0'