Source code for pestifer.objs.substitution

# Author: Cameron F. Abrams, <cfa22@drexel.edu>
"""
A Substitution is a user-specified modification in which a sequence of one or
more contiguous residues are replaced by a new sequence of one or more residues.
"""
import logging
logger = logging.getLogger(__name__)

from pydantic import Field
from typing import ClassVar

from ..core.baseobj import BaseObj, BaseObjList
from .resid import ResID, ResIDList

[docs] class Substitution(BaseObj): """ A class for handling substitutions """ _required_fields = {'chainID','resid1','resid2','subseq'} """ Required attributes for a Substitution object. These attributes must be provided when creating a Substitution object. - ``chainID``: The chain ID of the segment where the substitution occurs. - ``resid1``: The N-terminal residue ID of the substitution. - ``resid2``: The C-terminal residue ID of the substitution. - ``subseq``: The one-letter amino acid sequence to be substituted. """ chainID: str = Field(..., description="Chain ID of the segment where the substitution occurs") resid1: ResID = Field(..., description="N-terminal residue ID of the substitution") resid2: ResID = Field(..., description="C-terminal residue ID of the substitution") subseq: str = Field(..., description="One-letter amino acid sequence to be substituted") _yaml_header: ClassVar[str] ='substitutions' """ YAML header for Substitution objects. This header is used to identify Substitution objects in YAML files. """ _objcat: ClassVar[str] = 'seq' """ Category of the Substitution object. This categorization is used to group Substitution objects in the object manager. """ @classmethod def _adapt(cls, *args, **kwargs) -> dict: """ Adapts the input to a dictionary format suitable for Substitution instantiation. This method is used to convert various input formats into a dictionary that can be used to create a Substitution object. """ if args and len(args) == 1 and isinstance(args[0], str): # C:nnn-ccc,S # where: # C is the chain ID # nnn is the N-terminal residue/insertion of the sequence to be substituted # ccc is the C-terminal residue/insertion of the sequence to be substituted # S is the one-letter amino acid sequence to be substituted raw = args[0] if ':' not in raw or '-' not in raw or ',' not in raw: raise ValueError(f'Invalid substitution shortcode: {raw}') p1 = raw.split(':') chainID = p1[0] p2 = p1[1].split(',') seqrange = p2[0] subseq = p2[1] seq = ResIDList(seqrange) resid1, resid2 = seq input_dict = { 'chainID': chainID, 'resid1': resid1, 'resid2': resid2, 'subseq': subseq } return input_dict return super()._adapt(*args, **kwargs)
[docs] def shortcode(self) -> str: return f"{self.chainID}:{self.resid1.resid}-{self.resid2.resid},{self.subseq}"
[docs] class SubstitutionList(BaseObjList[Substitution]): """ A list of Substitution objects. This class is used to manage a collection of Substitution objects. """
[docs] def describe(self): return f'<SubstitutionList: {len(self)} items>'