Source code for pestifer.objs.seqadv

# Author: Cameron F. Abrams, <cfa22@drexel.edu>
"""
``SEQADV`` (PDB) and ``seq_dif`` (mmCIF) records declare differences between the actual sequence of the molecule and the sequence
of its database reference.  This is how authors report things like accidental mutations (conflicts),
engineered mutations, and other differences.  These records are residue-specific.
"""
from __future__ import annotations

import logging

from mmcif.api.PdbxContainers import DataContainer
from pydantic import Field
from pidibble.pdbrecord import PDBRecord, PDBRecordDict
from typing import ClassVar, TYPE_CHECKING

from .resid import ResID
from ..core.baseobj import BaseObj, BaseObjList
from ..util.cifutil import CIFdict

if TYPE_CHECKING:
    from ..molecule.residue import Residue, ResidueList

logger = logging.getLogger(__name__)

[docs] class Seqadv(BaseObj): """ A class for handling SEQADV/seq_dif records in input structure files """ _required_fields = {'idCode', 'resname', 'chainID', 'resid', 'typekey'} """ Required attributes for a Seqadv object. These attributes must be provided when creating a Seqadv object. - ``idCode``: The PDB ID code of the structure. - ``resname``: The residue name of the sequence difference. - ``chainID``: The chain ID of the segment where the sequence difference occurs. - ``resid``: The residue ID where the difference occurs, represented by a `ResID` object. - ``typekey``: A key indicating the type of sequence difference (e.g., ``conflict``, ``cloning``, ``expression tag``, ``engineered mutation``, ``variant``, ``insertion``, ``deletion``, ``microheterogeneity``, ``chromophore``, ``user``, ``_other_``). """ _optional_fields = {'database', 'dbAccession', 'dbRes', 'dbSeq', 'pdbx_ordinal', 'pdbx_auth_seq_num', 'residue', 'pdbx_stash'} """ Optional attributes for a Seqadv object. These attributes may be present but are not required. - ``database``: The database name where the sequence difference is recorded. - ``dbAccession``: The accession number of the sequence in the database. - ``dbRes``: The residue name in the database. - ``dbSeq``: The sequence number in the database. - ``pdbx_ordinal``: The ordinal number of the sequence difference in mmCIF files. - ``pdbx_auth_seq_num``: The author-assigned sequence number in mmCIF files. - ``residue``: The corresponding Residue object, if available. This attribute is used to link the Seqadv object to a specific Residue object in the structure, allowing for easier access to the residue's properties. """ _attr_choices = { 'typekey': {'conflict', 'cloning', 'expression tag', 'engineered mutation', 'variant', 'insertion', 'deletion', 'microheterogeneity', 'chromophore', 'user', '_other_'} } """ Attribute choices for Seqadv objects. - ``typekey``: A list of valid sequence difference types, including ``conflict``, ``cloning``, ``expression``, ``engineered``, ``variant``, ``insertion``, ``deletion``, ``microheterogeneity``, ``chromophore``, ``user``, and ``_other_``. """ idCode: str = Field(..., description="PDB ID code of the structure") resname: str = Field(..., description="Residue name of the sequence difference") chainID: str = Field(..., description="Chain ID of the segment where the sequence difference occurs") resid: ResID = Field(..., description="Residue ID where the difference occurs") database: str | None = Field(None, description="Database name where the sequence difference is recorded") dbAccession: str | None = Field(None, description="Accession number of the sequence in the database") dbRes: str | None = Field(None, description="Residue name in the database") dbSeq: int | None = Field(None, description="Sequence number in the database") typekey: str | None = Field(None, description="Type of sequence difference (e.g., 'conflict', 'cloning', 'expression tag', 'engineered mutation', 'variant', 'insertion', 'deletion', 'microheterogeneity', 'chromophore', 'user', '_other_')") pdbx_ordinal: int | None = Field(None, description="Ordinal number of the sequence difference in mmCIF files") pdbx_auth_seq_num: int | None = Field(None, description="Author-assigned sequence number in mmCIF files") residue: 'Residue' = Field(None, description="Corresponding Residue object, if available") pdbx_stash: dict | None = Field(None, description="Stash for temporary storage of attributes") _yaml_header: ClassVar[str] = 'seqadvs' """ YAML header for Seqadv objects. This header is used to identify Seqadv objects in YAML files. """ _objcat: ClassVar[str] = 'seq' """ Object category for Seqadv objects. This categorization is used to group Seqadv objects in the object manager. """ _PDB_keyword: ClassVar[str] = 'SEQADV' """ PDB keyword for Seqadv objects. This keyword is used to identify Seqadv objects in PDB files. """ _CIF_CategoryName: ClassVar[str] = 'struct_ref_seq_dif' """ mmCIF name for Seqadv objects. This name is used to identify Seqadv objects in mmCIF files. """ @classmethod def _adapt(cls, *args, **kwargs) -> dict: """ Adapts the input to a dictionary format suitable for Seqadv instantiation. This method is used to convert various input types into a dictionary of parameters. Parameters ---------- *args : Any The input to adapt, which can be a shortcode string or an Adapter instance. Returns ------- dict A dictionary of parameters for Seqadv instantiation. """ if args and isinstance(args[0], PDBRecord): input_dict = Seqadv._from_pdbrecord(args[0]) return input_dict elif args and isinstance(args[0], CIFdict): input_dict = Seqadv._from_cifdict(args[0]) return input_dict return super()._adapt(*args, **kwargs) @staticmethod def _from_pdbrecord(raw: PDBRecord) -> dict: """ Converts a PDBRecord object to a dictionary of parameters for Seqadv. """ if raw.conflict.lower() == 'deletion': return dict( idCode = raw.idCode, chainID = raw.residue.chainID, resname = raw.dbRes, resid = ResID(raw.dbSeq, raw.dbIcode), dbRes = raw.dbRes, typekey = 'deletion' ) return dict( idCode = raw.idCode, chainID = raw.residue.chainID, resname = raw.residue.resName, resid = ResID(raw.residue.seqNum, raw.residue.iCode), dbRes = raw.dbRes, typekey = raw.conflict.lower() if raw.conflict.lower() in Seqadv._attr_choices['typekey'] else '_other_', ) @staticmethod def _from_cifdict(cd: CIFdict) -> dict: """ Converts a CIFdict object to a dictionary of parameters for Seqadv. Parameters ---------- cd : CIFdict The CIFdict instance containing the attributes of the Seqadv object. Returns ------- dict A dictionary of parameters for Seqadv instantiation. """ return { 'idCode': cd['pdbx_pdb_id_code'], 'resname': cd['mon_id'], 'chainID': cd['pdbx_pdb_strand_id'], 'resid': ResID(cd['pdbx_auth_seq_num'], cd.get('pdbx_auth_seq_ins_code', None)), 'database': cd.get('pdbx_seq_db_name', None), 'dbAccession': cd.get('pdbx_seq_db_accession_code', None), 'dbRes': cd.get('db_mon_id', None), 'dbSeq': None if cd['pdbx_seq_db_seq_num'] == '' else int(cd['pdbx_seq_db_seq_num']), 'typekey': cd.get('details', None), 'pdbx_ordinal': cd.get('pdbx_ordinal', None), 'pdbx_auth_seq_num': int(cd['pdbx_auth_seq_num']) }
[docs] def seqadv_details_keyword(self,text): """ Returns the typekey for the given text Parameters ---------- text : str The text to analyze for keywords. Returns ------- str The typekey corresponding to the text, or ``_other_`` if no keyword is found. """ text = text.lower() keyword_list = self.__class__.attr_choices['typekey'] for keyword in keyword_list: if keyword in text: return keyword return '_other_'
[docs] def pdb_line(self): """ Write the seqadv as it would appear in a PDB file Returns ------- str The PDB line for the seqadv record. """ return f'SEQADV {self.idCode:3s} {self.resname:>3s} {self.chainID:1s} {self.resid.pdbresid:>5s} {self.database:>4s} {self.dbAccession:9s} {self.dbRes:3s} {self.dbSeq:>5d} {self.typekey:21s} '
[docs] def assign_residue(self, Residues: 'ResidueList'): """ Assigns the residue attribute of the seqadv to the corresponding Residue object This method searches through the provided list of Residues to find a match based on the attributes of the seqadv object. If a match is found, the residue attribute is set to the corresponding Residue object. If no match is found, the residue attribute remains None. Parameters ---------- Residues : list A list of Residue objects to search for the corresponding residue. """ # logger.debug(f'Searching {len(Residues)} Residues for auth_chain {self.pdbx_pdb_strand_id} auth_seq {self.pdbx_auth_seq_num}') assert self.residue == None if self.typekey != 'deletion': if hasattr(self, 'pdbx_auth_seq_num') and self.pdbx_auth_seq_num is not None: logger.debug(f'Assigning residue for seqadv {self.chainID}:{self.resid}:{self.typekey} from mmCIF record') # this was initialized from a mmCIF record # these records are weird in that the 'strand' id # is the author-assigned chain id but the sequence # number is the mmCIF-assigned sequence number self.assign_obj_to_attr('residue', Residues, auth_asym_id='chainID', auth_seq_id='pdbx_auth_seq_num') if self.residue is not None: if self.pdbx_stash is None: self.pdbx_stash = {} self.pdbx_stash['chainID'] = self.chainID self.pdbx_stash['resid'] = self.resid self.chainID = self.residue.chainID self.resid = self.residue.resid logger.debug(f'...seqadv {self.typekey} auth {self.chainID}:{self.resid} (orig. {self.pdbx_stash["chainID"]}:{self.pdbx_stash["resid"].resid}) assigned to residue {self.residue.resid} (pdb {self.residue.auth_asym_id}:{self.residue.auth_seq_id})') else: logger.debug(f'...seqadv {self.typekey} auth {self.chainID}:{self.resid} cannot be resolved from current set of residues') else: # normal PDB record # logger.debug(f'Looking to assign residue for seqadv {self.chainID}:{self.resid}') self.assign_obj_to_attr('residue', Residues, chainID='chainID', resid='resid') if self.residue is None: # failed to find logger.debug(f'...seqadv {self.typekey} auth {self.chainID}:{self.resid} cannot be resolved from current set of residues')
[docs] def update_from_residue(self): """ Updates the chainID attribute of the seqadv from its residue attribute This method sets the chainID attribute of the seqadv to the chainID of its residue attribute, if the residue is not None. If the residue is None, it does nothing. """ assert self.residue != None self.chainID = self.residue.chainID
# else: # logger.debug(f'...seqadv {self.typekey} auth {self.pdbx_pdb_strand_id}:{self.pdbx_auth_seq_num} cannot be resolved from current set of residues') # we'll assume that if this residue is not found, then this seqadv is never used anyway
[docs] class SeqadvList(BaseObjList[Seqadv]): """ A class for handling lists of Seqadvs. """
[docs] def describe(self): return f'<SeqadvList with {len(self)} seqadvs>'
[docs] @classmethod def from_pdb(cls, parsed: PDBRecordDict) -> SeqadvList: """ Create a SeqadvList from a PDBRecordDict. """ if Seqadv._PDB_keyword not in parsed: return cls([]) return cls([Seqadv(x) for x in parsed[Seqadv._PDB_keyword]])
[docs] @classmethod def from_cif(cls, parsed: DataContainer) -> SeqadvList: """ Create a SeqadvList from a DataContainer (mmCIF format). Parameters ---------- parsed : DataContainer The parsed mmCIF data container. Returns ------- SeqadvList A new SeqadvList instance containing Seqadv objects created from the mmCIF data. """ obj = parsed.getObj(Seqadv._CIF_CategoryName) if obj is not None: return cls([Seqadv(CIFdict(obj, i)) for i in range(len(obj))]) return cls([])
[docs] def assign_residues(self, Residues: 'ResidueList') -> SeqadvList: """ Assigns residues to each Seqadv in the list from the provided Residues. This method iterates over each Seqadv in the list and calls its ``assign_residue`` method with the provided Residues. After assigning residues, it creates a new SeqadvList containing the Seqadv objects that have no assigned residue (i.e., their ``residue`` attribute is None). It then removes these Seqadv objects from the original list. Parameters ---------- Residues : list A list of Residue objects to assign to the Seqadv objects. Returns ------- SeqadvList A new SeqadvList containing the Seqadv objects that could not be assigned a residue. """ delete_us = [] for s in self.data: s.assign_residue(Residues) delete_us = self.__class__([s for s in self.data if s.residue is None]) for s in delete_us: self.remove(s) return delete_us
[docs] def update_from_residues(self): """ Updates the chainID attribute of each Seqadv in the list from its residue. This method iterates over each Seqadv in the list and calls its ``update_from_residue`` method. It updates the chainID attribute of each Seqadv based on its residue attribute. """ for s in self.data: s.update_from_residue()