Source code for pestifer.charmmff.charmmffprm

# Author: Cameron F. Abrams, <cfa22@drexel.edu>
"""
Parsing and writing CHARMM parameter files (.prm and the parameter sections of .str files).

The main class is :class:`CharmmParamFile`, which can be used to parse, merge, filter,
and write CHARMM parameter files.  The typical workflow for generating a minimal
consolidated parameter file for a simulation system is:

1. Parse all the parameter files that were used in the build.
2. Merge them into a single :class:`CharmmParamFile`.
3. Extract the subset of parameters needed for the atom types present in the PSF.
4. Write the result as a single ``.prm`` file.

Usage example::

    from pestifer.charmmff.charmmffprm import CharmmParamFile

    combined = CharmmParamFile()
    for fname in ['par_all36m_prot.prm', 'toppar_water_ions.str']:
        combined.merge(CharmmParamFile.from_file(fname))

    atom_types = {'C', 'NH1', 'CT1', ...}   # from PSF
    minimal = combined.extract_for_atomtypes(atom_types)
    minimal.write('my_system_minimal.prm')
"""

import logging
import re

from dataclasses import dataclass, field

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Parameter record dataclasses
# ---------------------------------------------------------------------------

[docs] @dataclass class CharmmBondParam: """A CHARMM bond parameter record.""" type1: str type2: str Kb: float b0: float comment: str = ''
[docs] @dataclass class CharmmAngleParam: """A CHARMM angle parameter record (with optional Urey-Bradley terms).""" type1: str type2: str type3: str Ktheta: float Theta0: float Kub: float | None = None S0: float | None = None comment: str = ''
[docs] @dataclass class CharmmDihedralParam: """A CHARMM dihedral parameter record. Note that multiple records with the same four atom types are allowed (different multiplicities). The wildcard atom type ``X`` may appear in position 1 or 4 (or both). """ type1: str type2: str type3: str type4: str Kchi: float n: int delta: float comment: str = ''
[docs] @dataclass class CharmmImproperParam: """A CHARMM improper dihedral parameter record.""" type1: str type2: str type3: str type4: str Kpsi: float n: int psi0: float comment: str = ''
[docs] @dataclass class CharmmNonbondedParam: """A CHARMM Lennard-Jones (nonbonded) parameter record for one atom type.""" atomtype: str ignored: float epsilon: float Rmin_half: float ignored14: float | None = None epsilon14: float | None = None Rmin_half14: float | None = None comment: str = ''
[docs] @dataclass class CharmmNBFixParam: """A CHARMM NBFIX record (pairwise LJ override).""" type1: str type2: str Emin: float Rmin: float comment: str = ''
[docs] @dataclass class CharmmCMAPParam: """A CHARMM CMAP correction record.""" types: list grid_size: int data: list comment: str = ''
# --------------------------------------------------------------------------- # Main class # --------------------------------------------------------------------------- _PARAM_SECTION_KEYWORDS = frozenset([ 'BONDS', 'ANGLES', 'DIHEDRALS', 'IMPROPER', 'NONBONDED', 'NBFIX', 'CMAP', ]) _END_SECTION_KEYWORDS = frozenset(['ATOMS', 'HBOND', 'END'])
[docs] class CharmmParamFile: """ Parses, merges, filters, and writes CHARMM parameter files. Supports both standalone ``.prm`` files and the parameter sections embedded in ``.str`` (stream) files. Parameters ---------- None – use the class-methods :meth:`from_file` or :meth:`from_text` to construct an instance from existing data. """ def __init__(self): self.bonds: list[CharmmBondParam] = [] self.angles: list[CharmmAngleParam] = [] self.dihedrals: list[CharmmDihedralParam] = [] self.impropers: list[CharmmImproperParam] = [] self.nonbonded: dict[str, CharmmNonbondedParam] = {} self.nbfix: list[CharmmNBFixParam] = [] self.cmaps: list[CharmmCMAPParam] = [] # Preserved header line for NONBONDED section (controls cutoffs, etc.) self.nonbonded_header: str = ( 'NONBONDED nbxmod 5 atom cdiel fshift vatom vdistance vfswitch -\n' 'cutnb 14.0 ctofnb 12.0 ctonnb 10.0 eps 1.0 e14fac 1.0 wmin 1.5' ) # ------------------------------------------------------------------ # Construction # ------------------------------------------------------------------
[docs] @classmethod def from_file(cls, filename: str) -> 'CharmmParamFile': """Parse a CHARMM parameter file (or stream file) from disk.""" with open(filename, 'r') as f: text = f.read() return cls.from_text(text)
[docs] @classmethod def from_text(cls, text: str) -> 'CharmmParamFile': """Parse CHARMM parameter content from a string. Works for both ``.prm`` files (bare section headers at top level) and ``.str`` files (sections inside ``read para … END`` blocks). """ obj = cls() sections = obj._extract_param_sections(text) for section_text in sections: obj._parse_param_section(section_text) return obj
# ------------------------------------------------------------------ # Internal parsing helpers # ------------------------------------------------------------------ @staticmethod def _extract_param_sections(text: str) -> list[str]: """Return a list of text blocks that contain parameter data. For ``.prm``-style files the whole text is returned as a single block. For ``.str``-style files each ``read para … END`` block is extracted. """ lines = text.splitlines() # Detect .str-style by looking for 'read para' has_read_para = any(re.match(r'^\s*read\s+para', l, re.IGNORECASE) for l in lines) if not has_read_para: return [text] blocks = [] in_param = False current: list[str] = [] for line in lines: stripped = line.strip() if re.match(r'^read\s+para', stripped, re.IGNORECASE): in_param = True current = [] elif in_param and stripped.upper() == 'END': blocks.append('\n'.join(current)) in_param = False current = [] elif in_param: current.append(line) # If a block was opened but never closed, keep it if in_param and current: blocks.append('\n'.join(current)) return blocks if blocks else [text] def _parse_param_section(self, text: str): """Parse one parameter block and append records to this instance.""" section = None nb_header_lines: list[str] = [] nb_header_complete = False # CMAP reading state cmap_entry: CharmmCMAPParam | None = None cmap_remaining: int = 0 for raw_line in text.splitlines(): # Split comment from data if '!' in raw_line: excl_pos = raw_line.index('!') comment = raw_line[excl_pos + 1:].strip() data_part = raw_line[:excl_pos].strip() else: comment = '' data_part = raw_line.strip() if not data_part: continue upper = data_part.upper() # ---------------------------------------------------------- # Section header detection # ---------------------------------------------------------- if upper.startswith('BOND'): section = 'BONDS' continue elif upper.startswith('ANGLE'): section = 'ANGLES' continue elif upper.startswith('DIHE'): section = 'DIHEDRALS' continue elif upper.startswith('IMPROPER'): section = 'IMPROPER' continue elif upper.startswith('NONBONDED'): section = 'NONBONDED' nb_header_lines = [raw_line] nb_header_complete = False # If the line does NOT end with '-' the header is one line if not data_part.rstrip().endswith('-'): nb_header_complete = True self.nonbonded_header = raw_line continue elif upper.startswith('NBFIX'): section = 'NBFIX' continue elif upper.startswith('CMAP'): section = 'CMAP' continue elif upper.startswith('HBOND') or any(upper.startswith(k) for k in _END_SECTION_KEYWORDS): section = None continue if section is None: continue tokens = data_part.split() # ---------------------------------------------------------- # NONBONDED header continuation (lines after NONBONDED until # we get to actual atom entries) # ---------------------------------------------------------- if section == 'NONBONDED' and not nb_header_complete: # A continuation of the keyword header: either cutnb... or # any line that is not an atom type + 3 floats try: float(tokens[1]) float(tokens[2]) float(tokens[3]) # This looks like a proper atom entry nb_header_complete = True self.nonbonded_header = '\n'.join(nb_header_lines) except (IndexError, ValueError): # Still header continuation nb_header_lines.append(raw_line) if not data_part.rstrip().endswith('-'): nb_header_complete = True self.nonbonded_header = '\n'.join(nb_header_lines) continue # ---------------------------------------------------------- # CMAP data reading # ---------------------------------------------------------- if section == 'CMAP': if cmap_remaining > 0: # Accumulate grid data try: vals = [float(t) for t in tokens] except ValueError: continue # skip any spurious non-numeric line cmap_entry.data.extend(vals) cmap_remaining -= len(vals) if cmap_remaining <= 0: self.cmaps.append(cmap_entry) cmap_entry = None cmap_remaining = 0 elif len(tokens) >= 9: # New CMAP entry header: 8 types + grid_size try: grid_size = int(tokens[8]) except (ValueError, IndexError): continue cmap_entry = CharmmCMAPParam( types=tokens[:8], grid_size=grid_size, data=[], comment=comment, ) cmap_remaining = grid_size * grid_size continue # ---------------------------------------------------------- # Standard section parsing # ---------------------------------------------------------- try: if section == 'BONDS' and len(tokens) >= 4: self.bonds.append(CharmmBondParam( type1=tokens[0], type2=tokens[1], Kb=float(tokens[2]), b0=float(tokens[3]), comment=comment, )) elif section == 'ANGLES' and len(tokens) >= 5: Kub = float(tokens[5]) if len(tokens) >= 7 else None S0 = float(tokens[6]) if len(tokens) >= 7 else None self.angles.append(CharmmAngleParam( type1=tokens[0], type2=tokens[1], type3=tokens[2], Ktheta=float(tokens[3]), Theta0=float(tokens[4]), Kub=Kub, S0=S0, comment=comment, )) elif section == 'DIHEDRALS' and len(tokens) >= 7: self.dihedrals.append(CharmmDihedralParam( type1=tokens[0], type2=tokens[1], type3=tokens[2], type4=tokens[3], Kchi=float(tokens[4]), n=int(tokens[5]), delta=float(tokens[6]), comment=comment, )) elif section == 'IMPROPER' and len(tokens) >= 7: self.impropers.append(CharmmImproperParam( type1=tokens[0], type2=tokens[1], type3=tokens[2], type4=tokens[3], Kpsi=float(tokens[4]), n=int(tokens[5]), psi0=float(tokens[6]), comment=comment, )) elif section == 'NONBONDED' and len(tokens) >= 4: ignored14 = epsilon14 = Rmin_half14 = None if len(tokens) >= 7: ignored14 = float(tokens[4]) epsilon14 = float(tokens[5]) Rmin_half14 = float(tokens[6]) self.nonbonded[tokens[0]] = CharmmNonbondedParam( atomtype=tokens[0], ignored=float(tokens[1]), epsilon=float(tokens[2]), Rmin_half=float(tokens[3]), ignored14=ignored14, epsilon14=epsilon14, Rmin_half14=Rmin_half14, comment=comment, ) elif section == 'NBFIX' and len(tokens) >= 4: self.nbfix.append(CharmmNBFixParam( type1=tokens[0], type2=tokens[1], Emin=float(tokens[2]), Rmin=float(tokens[3]), comment=comment, )) except (ValueError, IndexError) as exc: logger.debug(f'Skipping malformed parameter line: {repr(raw_line)} ({exc})') # ------------------------------------------------------------------ # Canonical keys for deduplication # ------------------------------------------------------------------ @staticmethod def _bond_key(b: 'CharmmBondParam') -> tuple: return tuple(sorted([b.type1, b.type2])) @staticmethod def _angle_key(a: 'CharmmAngleParam') -> tuple: fwd = (a.type1, a.type2, a.type3) rev = (a.type3, a.type2, a.type1) return min(fwd, rev) @staticmethod def _dihedral_key(d: 'CharmmDihedralParam') -> tuple: # Different n values are distinct terms (they are summed), so n is # part of the key. The quartet is canonicalised by reversibility. fwd = (d.type1, d.type2, d.type3, d.type4) rev = (d.type4, d.type3, d.type2, d.type1) return (min(fwd, rev), d.n) @staticmethod def _improper_key(i: 'CharmmImproperParam') -> tuple: return (i.type1, i.type2, i.type3, i.type4) @staticmethod def _nbfix_key(n: 'CharmmNBFixParam') -> tuple: return tuple(sorted([n.type1, n.type2])) # ------------------------------------------------------------------ # Merging # ------------------------------------------------------------------
[docs] def merge(self, other: 'CharmmParamFile') -> None: """Merge *other* into this instance (in-place). Uses last-wins semantics for all sections, matching CHARMM's ``READ PARAM APPEND`` behaviour. For dihedrals, entries with the same atom-type quartet but *different* multiplicity ``n`` are kept as separate terms (they are summed by the force field); only exact duplicates (same quartet *and* same ``n``) are deduplicated. """ def _merge_list(existing, incoming, key_fn): d = {key_fn(x): x for x in existing} d.update({key_fn(x): x for x in incoming}) return list(d.values()) self.bonds = _merge_list(self.bonds, other.bonds, self._bond_key) self.angles = _merge_list(self.angles, other.angles, self._angle_key) self.dihedrals = _merge_list(self.dihedrals, other.dihedrals, self._dihedral_key) self.impropers = _merge_list(self.impropers, other.impropers, self._improper_key) self.nonbonded.update(other.nonbonded) self.nbfix = _merge_list(self.nbfix, other.nbfix, self._nbfix_key) self.cmaps.extend(other.cmaps)
# ------------------------------------------------------------------ # Filtering # ------------------------------------------------------------------
[docs] def extract_for_atomtypes(self, atomtypes: set[str]) -> 'CharmmParamFile': """Return a new :class:`CharmmParamFile` containing only the records relevant to *atomtypes*. A record is included when every non-wildcard (``X``) atom type in that record appears in *atomtypes*. This is a conservative filter: it may include a handful of wildcard entries that are not strictly needed, but it never omits a record that is needed. Parameters ---------- atomtypes : set of str The set of atom types present in the target PSF. Returns ------- CharmmParamFile A new instance containing the filtered records. """ def match(types: list[str]) -> bool: return all(t == 'X' or t in atomtypes for t in types) result = CharmmParamFile() result.nonbonded_header = self.nonbonded_header result.bonds = [b for b in self.bonds if match([b.type1, b.type2])] result.angles = [a for a in self.angles if match([a.type1, a.type2, a.type3])] result.dihedrals = [d for d in self.dihedrals if match([d.type1, d.type2, d.type3, d.type4])] result.impropers = [i for i in self.impropers if match([i.type1, i.type2, i.type3, i.type4])] result.nonbonded = {t: v for t, v in self.nonbonded.items() if t in atomtypes} result.nbfix = [n for n in self.nbfix if match([n.type1, n.type2])] result.cmaps = [c for c in self.cmaps if match(c.types)] return result
# ------------------------------------------------------------------ # Writing # ------------------------------------------------------------------
[docs] def write(self, filename: str, title: str = '') -> None: """Write this parameter set to *filename* in CHARMM format. Parameters ---------- filename : str Output file path. title : str, optional A short description written into the file header. """ lines: list[str] = [] # Header if title: lines.append(f'* {title}') else: lines.append('* Minimal CHARMM parameter file generated by pestifer') lines.append('*') lines.append('') def _comment(c: str) -> str: return f' ! {c}' if c else '' # BONDS if self.bonds: lines.append('BONDS') for b in self.bonds: lines.append( f'{b.type1:<6s} {b.type2:<6s} {b.Kb:10.3f} {b.b0:8.4f}{_comment(b.comment)}' ) lines.append('') # ANGLES if self.angles: lines.append('ANGLES') for a in self.angles: ub = (f' {a.Kub:8.3f} {a.S0:8.4f}' if a.Kub is not None else '') lines.append( f'{a.type1:<6s} {a.type2:<6s} {a.type3:<6s} {a.Ktheta:10.3f} {a.Theta0:8.4f}' f'{ub}{_comment(a.comment)}' ) lines.append('') # DIHEDRALS if self.dihedrals: lines.append('DIHEDRALS') for d in self.dihedrals: lines.append( f'{d.type1:<6s} {d.type2:<6s} {d.type3:<6s} {d.type4:<6s} ' f'{d.Kchi:10.4f} {d.n:1d} {d.delta:8.2f}{_comment(d.comment)}' ) lines.append('') # IMPROPER if self.impropers: lines.append('IMPROPER') for i in self.impropers: lines.append( f'{i.type1:<6s} {i.type2:<6s} {i.type3:<6s} {i.type4:<6s} ' f'{i.Kpsi:10.4f} {i.n:1d} {i.psi0:8.2f}{_comment(i.comment)}' ) lines.append('') # CMAP if self.cmaps: lines.append('CMAP') for c in self.cmaps: lines.append(' '.join(c.types) + f' {c.grid_size}') # Write grid data in lines of 5 values (CHARMM convention) for i in range(0, len(c.data), 5): chunk = c.data[i:i + 5] lines.append(''.join(f'{v:9.2f}' for v in chunk)) lines.append('') # NONBONDED if self.nonbonded: lines.append(self.nonbonded_header) for t, nb in self.nonbonded.items(): nb14 = '' if nb.ignored14 is not None: nb14 = ( f' {nb.ignored14:8.4f} {nb.epsilon14:12.6f} {nb.Rmin_half14:10.6f}' ) lines.append( f'{nb.atomtype:<8s} {nb.ignored:8.4f} {nb.epsilon:12.6f} ' f'{nb.Rmin_half:10.6f}{nb14}{_comment(nb.comment)}' ) lines.append('') # NBFIX if self.nbfix: lines.append('NBFIX') for n in self.nbfix: lines.append( f'{n.type1:<8s} {n.type2:<8s} {n.Emin:12.6f} {n.Rmin:10.6f}{_comment(n.comment)}' ) lines.append('') lines.append('END') lines.append('') with open(filename, 'w') as f: f.write('\n'.join(lines))
# ------------------------------------------------------------------ # Diagnostics # ------------------------------------------------------------------
[docs] def summary(self) -> str: """Return a one-line summary of the parameter counts.""" return ( f'bonds={len(self.bonds)}, angles={len(self.angles)}, ' f'dihedrals={len(self.dihedrals)}, impropers={len(self.impropers)}, ' f'nonbonded={len(self.nonbonded)}, nbfix={len(self.nbfix)}, ' f'cmap={len(self.cmaps)}' )