scirpy icon indicating copy to clipboard operation
scirpy copied to clipboard

OverflowError while running ir.pp.ir_dist

Open m21camby opened this issue 6 months ago • 4 comments

Report

Dear grst,

I'm having below error while running ir.pp.ir_dist.

code

ir.pp.ir_dist(
    mdata,
    metric="tcrdist",
    sequence="aa",
    cutoff=15,
    airr_mod = "airr"
)

error message

---------------------------------------------------------------------------
OverflowError                             Traceback (most recent call last)
Cell In[5], line 1
----> 1 ir.pp.ir_dist(
      2     mdata,
      3     metric="tcrdist",
      4     sequence="aa",
      5     cutoff=15,
      6     airr_mod = "airr"
      7 )

File /lib/python3.10/site-packages/scirpy/ir_dist/__init__.py:261, in _ir_dist(adata, reference, metric, cutoff, sequence, key_added, inplace, n_jobs, airr_mod, airr_key, chain_idx_key, airr_mod_ref, airr_key_ref, chain_idx_key_ref, **kwargs)
    259 for chain_type in ["VJ", "VDJ"]:
    260     logging.info(f"Computing sequence x sequence distance matrix for {chain_type} sequences.")  # type: ignore
--> 261     result[chain_type]["distances"] = dist_calc.calc_dist_mat(
    262         result[chain_type]["seqs"], result[chain_type].get("seqs2", None)
    263     ).tocsr()
    265 # return or store results
    266 if inplace:

File /lib/python3.10/site-packages/scirpy/ir_dist/metrics.py:543, in _MetricDistanceCalculator.calc_dist_mat(self, seqs, seqs2)
    541     row_mins = np.concatenate(block_row_mins)
    542 else:
--> 543     distance_matrix_csr, row_mins = self._calc_dist_mat_block(seqs, seqs2, is_symmetric)
    545 if is_symmetric:
    546     upper_triangular_distance_matrix = distance_matrix_csr

File /lib/python3.10/site-packages/scirpy/ir_dist/metrics.py:506, in _MetricDistanceCalculator._calc_dist_mat_block(self, seqs, seqs2, is_symmetric, start_column)
    503 if len(seqs) == 0 or len(seqs2) == 0:
    504     return csr_matrix((len(seqs), len(seqs2))), np.array([None])
--> 506 data_rows, indices_rows, row_element_counts, row_mins = self._metric_mat(
    507     seqs=seqs,
    508     seqs2=seqs2,
    509     is_symmetric=is_symmetric,
    510     start_column=start_column,
    511 )
    513 indptr = np.zeros(row_element_counts.shape[0] + 1)
    514 indptr[1:] = np.cumsum(row_element_counts)

File /lib/python3.10/site-packages/scirpy/ir_dist/metrics.py:1233, in TCRdistDistanceCalculator._tcrdist_mat(self, seqs, seqs2, is_symmetric, start_column)
   1193 """Computes the pairwise TCRdist distances for sequences in seqs and seqs2.
   1194 
   1195 This function is a wrapper and contains an inner JIT compiled numba function without parameters. The reason for this is
   (...)
   1229     not implemented for the tcrdist calculator yet.
   1230 """
   1231 max_seq_len = max(len(s) for s in (*seqs, *seqs2))
-> 1233 seqs_mat1, seqs_L1 = _seqs2mat(seqs, max_len=max_seq_len)
   1234 seqs_mat2, seqs_L2 = _seqs2mat(seqs2, max_len=max_seq_len)
   1236 cutoff = self.cutoff

File /lib/python3.10/site-packages/scirpy/ir_dist/metrics.py:399, in _seqs2mat(seqs, alphabet, max_len)
    397 L = np.zeros(len(seqs), dtype=np.int8)
    398 for si, s in enumerate(seqs):
--> 399     L[si] = min(len(s), max_len)
    400     for aai in range(max_len):
    401         if aai >= len(s):

OverflowError: Python integer 132 out of bounds for int8

Thank you in advance!

Versions

anndata                   0.11.3                   pypi_0    pypi
python                    3.10.17         hd6af730_0_cpython    conda-forge
scanpy                    1.11.0                   pypi_0    pypi
scikit-learn              1.5.2                    pypi_0    pypi
scipy                     1.15.2          py310h1d65ade_0    conda-forge
scirpy                    0.22.0                   pypi_0    pypi

m21camby avatar Jul 21 '25 12:07 m21camby