bond_order==0 in CHEBI:52729
Describe the bug
The RDKitToolkitWrapper can parse CHEBI:52729 into a Molecule object but cannot convert that Molecule back into an RDMol because there are bonds with a bond type of 0. OpenEyeToolkitWrapper is able to convert that Molecule to an OEMol.
To Reproduce
from openff.toolkit.utils.toolkits import RDKitToolkitWrapper
from urllib.request import urlopen
url = "https://www.ebi.ac.uk/chebi/saveStructure.do?sdf=true&chebiId=52729&imageId=0"
rd = RDKitToolkitWrapper()
mol = rd.from_file_obj(urlopen(url), "sdf")[0]
print(mol) # Okay - this is created by OpenEyeToolkitWrapper
rd.to_rdkit(mol)
Output
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "openff/toolkit/utils/rdkit_wrapper.py", line 1699, in to_rdkit
rdbond.SetBondType(_bondtypes[bond.bond_order])
KeyError: 0
Diagnostics
As the above shows, to_rdkit() doesn't handle bonds with bond_order == 0. RDKit does read this as having bond_order == 0:
>>> from rdkit import Chem
>>> from urllib.request import urlopen
>>> url = "https://www.ebi.ac.uk/chebi/saveStructure.do?sdf=true&chebiId=52729&imageId=0"
>>> mol = next(Chem.ForwardSDMolSupplier(urlopen(url))
... )
>>> mol = next(Chem.ForwardSDMolSupplier(urlopen(url)))
>>> [(b.GetIdx(), b.GetBondType(), b.GetBondTypeAsDouble()) for b in mol.GetBonds() if b.GetBondTypeAsDouble() == 0.0]
[(85, rdkit.Chem.rdchem.BondType.UNSPECIFIED, 0.0), (86, rdkit.Chem.rdchem.BondType.UNSPECIFIED, 0.0), (87, rdkit.Chem.rdchem.BondType.UNSPECIFIED, 0.0), (88, rdkit.Chem.rdchem.BondType.UNSPECIFIED, 0.0), (89, rdkit.Chem.rdchem.BondType.UNSPECIFIED, 0.0), (90, rdkit.Chem.rdchem.BondType.UNSPECIFIED, 0.0)]
The relevant code in from_rdkit() is:
order = rdb.GetBondTypeAsDouble()
# Convert floating-point bond order to integral bond order
order = int(order)
# create a new bond
bond_index = offmol._add_bond(
map_atoms[a1], map_atoms[a2], order, is_aromatic
)
which turns the 0.0 into a 0.
The bond table in the molblock shows bond type 8 for those bonds:
21 78 8 0 0 0 0
7 78 8 0 0 0 0
35 78 8 0 0 0 0
46 78 8 0 0 0 0
72 78 8 0 0 0 0
63 78 8 0 0 0 0
The ctfile spec says those are "Any" bonds.
The likely solution is to have a mapping from 0 to UNSPECIFIED in the _bondtypes dict of rdkit_wrapper's to_rdkit().
In [10]: from openff.toolkit.utils.toolkits import RDKitToolkitWrapper
...: from urllib.request import urlopen
...: url = "https://www.ebi.ac.uk/chebi/saveStructure.do?sdf=true&chebiId=52
...: 729&imageId=0"
...: rd = RDKitToolkitWrapper()
...: mol = rd.from_file_obj(urlopen(url), "sdf")[0]
...: print(mol) # Okay - this is created by OpenEyeToolkitWrapper
...: rd.to_rdkit(mol)
Molecule with name '' and SMILES '[H]c1c(c2c(c(c1[H])[H])N3[N](=C2[H])[Eu]4567(O=C(C(=C(O4)C(F)(F)F)[H])C8=C(C(=C(S8)[H])[H])[H])(O=C(C(=C(O5)C(F)(F)F)[H])C9=C(C(=C(S9)[H])[H])[H])(O=C(C(=C(O6)C(F)(F)F)[H])C1=C(C(=C(S1)[H])[H])[H])[N]1=C(N2[N]7=C(c4c2c(c(c(c4[H])[H])[H])[H])[H])N=C(N=C31)c1c(c(c(c(c1[H])[H])N(C(C([H])([H])[H])([H])[H])C(C([H])([H])[H])([H])[H])[H])[H])[H]'
[08:07:13] Can't kekulize mol. Unkekulized atoms: 42 43 44 46 47
---------------------------------------------------------------------------
KekulizeException Traceback (most recent call last)
Cell In[10], line 7
5 mol = rd.from_file_obj(urlopen(url), "sdf")[0]
6 print(mol) # Okay - this is created by OpenEyeToolkitWrapper
----> 7 rd.to_rdkit(mol)
File ~/micromamba/envs/openff-interchange-env/lib/python3.11/site-packages/openff/toolkit/utils/rdkit_wrapper.py:2659, in RDKitToolkitWrapper.to_rdkit(self, molecule, aromaticity_model)
2653 raise InvalidAromaticityModelError(
2654 f"Given aromaticity model {aromaticity_model} which is not in the set of allowed aromaticity models: "
2655 f"{ALLOWED_AROMATICITY_MODELS}."
2656 )
2658 # Convert the OFF molecule's connectivity table to RDKit, returning a cached rdmol if possible
-> 2659 rdmol = self._connection_table_to_rdkit(
2660 molecule, aromaticity_model=aromaticity_model
2661 )
2662 # In case a cached rdmol was returned, make a copy of it
2663 rdmol = Chem.RWMol(rdmol)
File ~/micromamba/envs/openff-interchange-env/lib/python3.11/site-packages/cachetools/__init__.py:752, in cached.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
750 except KeyError:
751 pass # key not found
--> 752 v = func(*args, **kwargs)
753 try:
754 cache[k] = v
File ~/micromamba/envs/openff-interchange-env/lib/python3.11/site-packages/openff/toolkit/utils/rdkit_wrapper.py:2544, in RDKitToolkitWrapper._connection_table_to_rdkit(self, molecule, aromaticity_model)
2541 rdbond.SetBondType(_bondtypes[bond.bond_order])
2542 rdbond.SetIsAromatic(False)
-> 2544 Chem.SanitizeMol(
2545 rdmol,
2546 Chem.SANITIZE_ALL ^ Chem.SANITIZE_ADJUSTHS ^ Chem.SANITIZE_SETAROMATICITY,
2547 )
2549 if aromaticity_model == "OEAroModel_MDL":
2550 Chem.SetAromaticity(rdmol, Chem.AromaticityModel.AROMATICITY_MDL)
KekulizeException: Can't kekulize mol. Unkekulized atoms: 42 43 44 46 47