Source code for pestifer.charmmff.ligand_paramgen.mol2_writer

# Author: Cameron F. Abrams, <cfa22@drexel.edu>
"""
Write a protonated RDKit Mol to a Tripos mol2 file via Open Babel.

We stamp PDB-style atom names onto every atom (heavy atoms keep the names
attached by :func:`protonate_ligand`; hydrogens get sequential ``H<n>``
names), emit a PDB block, then shell out to ``obabel`` for format
conversion. Open Babel only sees the already-protonated, already-named
structure, so it never mangles atom names or alters the H count.
"""
from __future__ import annotations

import logging
import os
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING

from ...core.command import Command

if TYPE_CHECKING:
    from rdkit import Chem

logger = logging.getLogger(__name__)


[docs] class Mol2WriteError(RuntimeError): """Raised when mol2 generation fails."""
def _import_rdkit_chem(): """Lazily import ``rdkit.Chem`` (an optional ``ligand-paramgen`` dependency).""" try: from rdkit import Chem except ModuleNotFoundError as exc: raise Mol2WriteError( "RDKit is required for ligand parameterization. Install the " "optional dependencies with `pip install pestifer[ligand-paramgen]`." ) from exc return Chem
[docs] def write_mol2( mol: Chem.Mol, resname: str, outpath: str | os.PathLike, ) -> Path: """ Write ``mol`` to ``outpath`` as a mol2 file via Open Babel. Atom names on the result are: - heavy atoms: the value of the RDKit property ``_pdb_name`` if set, otherwise the atom's element symbol. - hydrogens: sequential ``H1``, ``H2``, ... in RDKit atom-index order. Parameters ---------- mol RDKit Mol with 3D coordinates and explicit Hs (as returned by :func:`pestifer.charmmff.ligand_paramgen.protonate_ligand`). resname Three-letter residue name to stamp on every atom (used by CGenFF and by Open Babel's PDB reader). outpath Destination mol2 path. Returns ------- pathlib.Path The path that was written. """ Chem = _import_rdkit_chem() if mol.GetNumConformers() == 0: raise Mol2WriteError("Mol has no conformer; cannot write 3D mol2.") _stamp_pdb_names(mol, resname) pdb_block = Chem.MolToPDBBlock(mol, flavor=0) outpath = Path(outpath) outpath.parent.mkdir(parents=True, exist_ok=True) with tempfile.NamedTemporaryFile( suffix=".pdb", prefix=f"{resname}_", mode="w", delete=False, ) as tmp: tmp.write(pdb_block) tmp_pdb = tmp.name try: # `obabel <in> -O <out>` infers formats from extensions. `--title` # sets the @<TRIPOS>MOLECULE name; without it obabel writes the # input filename, which CGenFF then picks up as the RESI name — # so the resulting .str registers a residue named like # "/tmp/AP5_xyz.pdb" that pestifer can never match. c = Command(f"obabel {tmp_pdb} -O {outpath} --title {resname}") rc = c.run(quiet=True) if rc != 0 or not outpath.exists(): raise Mol2WriteError( f"obabel failed (rc={rc}) for {resname}: {c.stderr or c.stdout}" ) finally: try: os.unlink(tmp_pdb) except OSError: pass return outpath
def _stamp_pdb_names(mol: Chem.Mol, resname: str) -> None: """Attach AtomPDBResidueInfo to every atom so RDKit's PDB writer emits a stable name for each one.""" Chem = _import_rdkit_chem() resname3 = resname.strip()[:3].ljust(3) h_idx = 0 used_names: set[str] = set() for atom in mol.GetAtoms(): if atom.HasProp("_pdb_name"): name = atom.GetProp("_pdb_name") elif atom.GetSymbol() == "H": h_idx += 1 name = f"H{h_idx}" else: name = atom.GetSymbol() name = name.strip()[:4] # Avoid duplicate atom names within a residue (PDB requires uniqueness). if name in used_names: base, suffix = name, 1 while f"{base}{suffix}" in used_names: suffix += 1 name = f"{base}{suffix}"[:4] used_names.add(name) info = Chem.AtomPDBResidueInfo() info.SetName(name.ljust(4)) info.SetResidueName(resname3) info.SetChainId("A") info.SetResidueNumber(1) info.SetIsHeteroAtom(True) atom.SetMonomerInfo(info)