Source code for pestifer.objs.link

# Author: Cameron F. Abrams, <cfa22@drexel.edu>
"""
A Link is a covalent bond between two residues in a protein structure.
"""
from __future__ import annotations

import logging

import numpy as np

from mmcif.api.PdbxContainers import DataContainer
from pidibble.pdbrecord import PDBRecord, PDBRecordDict
from pydantic import Field
from typing import ClassVar, TYPE_CHECKING

from .mutation import MutationList
from .resid import ResID

from ..core.baseobj import BaseObj, BaseObjList
from ..util.coord import measure_dihedral

if TYPE_CHECKING:
    from ..molecule.atom import Atom
    from ..molecule.residue import Residue, ResidueList
    from ..molecule.segment import Segment, SegmentList

from ..psfutil.psfpatch import PSFLinkPatch

from ..util.cifutil import CIFdict

logger = logging.getLogger(__name__)


[docs]
class Link(BaseObj):
    """
    A class for handling covalent bonds between residues where at least one residue is non-protein
    """
    _required_fields = {'chainID1', 'resid1', 'name1',
                        'chainID2', 'resid2', 'name2'}

    _optional_fields = {'altloc1', 'altloc2', 'resname1', 'resname2', 'sym1', 'sym2', 'link_distance', 'segname1', 'segname2', 'residue1', 'residue2', 'atom1', 'atom2', 'empty', 'segtype1', 'segtype2', 'ptnr1_label_asym_id', 'ptnr2_label_asym_id', 'ptnr1_label_seq_id', 'ptnr2_label_seq_id', 'ptnr1_label_comp_id', 'ptnr2_label_comp_id', 'ptnr1_auth_asym_id', 'ptnr2_auth_asym_id', 'ptnr1_auth_seq_id', 'ptnr2_auth_seq_id', 'ptnr1_auth_comp_id', 'ptnr2_auth_comp_id','patchname','patchhead'}

    _attr_choices = {'patchhead': {1, 2}}

    chainID1: str = Field(..., description="Chain ID of the first residue in the link")
    resid1: ResID = Field(..., description="Residue ID of the first residue in the link")
    name1: str = Field(..., description="Name of the first atom in the link")
    chainID2: str = Field(..., description="Chain ID of the second residue in the link")
    resid2: ResID = Field(..., description="Residue ID of the second residue in the link")
    name2: str = Field(..., description="Name of the second atom in the link")
    """
    Required attributes for a Link object.
    These attributes must be provided when creating a Link object.

    - ``chainID1``: The chain ID of the first residue in the link.
    - ``resid1``: The residue ID of the first residue in the link.
    - ``name1``: The name of the first atom in the link.
    - ``chainID2``: The chain ID of the second residue in the link.
    - ``resid2``: The residue ID of the second residue in the link.
    - ``name2``: The name of the second atom in the link.
    """
    altloc1: str | None = Field(None, description="Alternate location identifier for the first atom")
    altloc2: str | None = Field(None, description="Alternate location identifier for the second atom")
    resname1: str | None = Field(None, description="Residue name of the first residue in the link")
    resname2: str | None = Field(None, description="Residue name of the second residue in the link")
    sym1: str | None = Field(None, description="Symmetry operator for the first residue")
    sym2: str | None = Field(None, description="Symmetry operator for the second residue")
    link_distance: float | None = Field(None, description="Distance between the two atoms in the link")
    segname1: str | None = Field(None, description="Segment name of the first residue")
    segname2: str | None = Field(None, description="Segment name of the second residue")
    residue1: "Residue" = Field(None, description="First residue object in the link")
    residue2: "Residue" = Field(None, description="Second residue object in the link")
    atom1: "Atom" = Field(None, description="First atom object in the link")
    atom2: "Atom" = Field(None, description="Second atom object in the link")
    empty: bool | None = Field(None, description="Indicates if the link is empty")
    segtype1: str | None = Field(None, description="Segment type of the first residue")
    segtype2: str | None = Field(None, description="Segment type of the second residue")
    ptnr1_label_asym_id: str | None = Field(None, description="Asym ID of the first partner in the link (mmCIF)")
    ptnr2_label_asym_id: str | None = Field(None, description="Asym ID of the second partner in the link (mmCIF)")
    ptnr1_label_seq_id: str | None = Field(None, description="Sequence ID of the first partner in the link (mmCIF)")
    ptnr2_label_seq_id: str | None = Field(None, description="Sequence ID of the second partner in the link (mmCIF)")
    ptnr1_label_comp_id: str | None = Field(None, description="Component ID of the first partner in the link (mmCIF)")
    ptnr2_label_comp_id: str | None = Field(None, description="Component ID of the second partner in the link (mmCIF)")
    ptnr1_auth_asym_id: str | None = Field(None, description="Author asym ID of the first partner in the link (mmCIF)")
    ptnr2_auth_asym_id: str | None = Field(None, description="Author asym ID of the second partner in the link (mmCIF)")
    ptnr1_auth_seq_id: str | None = Field(None, description="Author sequence ID of the first partner in the link (mmCIF)")
    ptnr2_auth_seq_id: str | None = Field(None, description="Author sequence ID of the second partner in the link (mmCIF)")
    ptnr1_auth_comp_id: str | None = Field(None, description="Author component ID of the first partner in the link (mmCIF)")
    ptnr2_auth_comp_id: str | None = Field(None, description="Author component ID of the second partner in the link (mmCIF)")
    patchname: str | None = Field(None, description="Name of the patch applied to the link")
    patchhead: int | None = Field(None, description="1 = residue1 is the first residue in the link, 2 = residue2 is the first residue in the link")

    """
    Optional attributes for a Link object.
    These attributes can be provided to modify the behavior of the link.
    - ``residue1``: The first residue object in the link.
    - ``residue2``: The second residue object in the link.
    - ``atom1``: The first atom object in the link.
    - ``atom2``: The second atom object in the link.
    - ``empty``: A boolean indicating if the link is empty.
    - ``segtype1``: The segment type of the first residue.
    - ``segtype2``: The segment type of the second residue.
    - ``ptnr1_label_asym_id``: The asym ID of the first partner in the link (mmCIF).
    - ``ptnr2_label_asym_id``: The asym ID of the second partner in the link (mmCIF).
    - ``ptnr1_label_seq_id``: The sequence ID of the first partner in the link (mmCIF).
    - ``ptnr2_label_seq_id``: The sequence ID of the second partner in the link (mmCIF).
    - ``ptnr1_label_comp_id``: The component ID of the first partner in the link (mmCIF).
    - ``ptnr2_label_comp_id``: The component ID of the second partner in the link (mmCIF).
    - ``ptnr1_auth_asym_id``: The author asym ID of the first partner in the link (mmCIF).
    - ``ptnr2_auth_asym_id``: The author asym ID of the second partner in the link (mmCIF).
    - ``ptnr1_auth_seq_id``: The author sequence ID of the first partner in the link (mmCIF).
    - ``ptnr2_auth_seq_id``: The author sequence ID of the second partner in the link (mmCIF).
    - ``ptnr1_auth_comp_id``: The author component ID of the first partner in the link (mmCIF).
    - ``ptnr2_auth_comp_id``: The author component ID of the second partner in the link (mmCIF).
    - ``patchname``: The name of the patch applied to the link.
    - ``patchhead``: 1 = residue1 is the first residue in the link, 2 = residue2 is the first residue in the link.
    """
    
    _yaml_header: ClassVar[str] = 'links'
    """
    YAML header for Link objects.
    This header is used to identify Link objects in YAML files.
    """

    _PDB_keyword: ClassVar[str] = 'LINK'
    """
    PDB keyword for Link objects.
    """

    _CIF_CategoryName: ClassVar[str] = 'struct_conn'
    """
    Name of the CIF category that contains information for Link objects.
    """
    _CIF_CategoryElementTypes: ClassVar[dict[str, set]] = {'conn_type_id': {'covale', 'metalc'}}
    """
    CIF category element types for Link objects; a CIF Category is effectively a list of dictionaries, and _CIF_CategoryElementTypes[keyname] is a set of values that are valid for that key, indicating the element is to be interpreted as a Link.
    """

    _objcat: ClassVar[str] = 'topol'
    """
    Category of the Link object.
    This categorization is used to group Link objects in the object manager.
    """

    _patch_atomnames: ClassVar[dict[str, list[str]]] = {
        'NGLA':['ND2','C1'],
        'NGLB':['ND2','C1'],
        'SGPA':['OG','C1'],
        'SGPB':['OG','C1'],
        '11aa':['O1','C1'],
        '11ab':['O1','C1'],
        '11bb':['O1','C1'],
        '12aa':['O2','C1'],
        '12ab':['O2','C1'],
        '12ba':['O2','C1'],
        '12bb':['O2','C1'],
        '13aa':['O3','C1'],
        '13ab':['O3','C1'],
        '13ba':['O3','C1'],
        '13bb':['O3','C1'],
        '14aa':['O4','C1'],
        '14ab':['O4','C1'],
        '14ba':['O4','C1'],
        '14bb':['O4','C1'],
        '16AT':['O6','C1'],
        '16BT':['O6','C1'],
        'SA26AT':['O6','C2'],
        'SA28AA':['O8','C2'],
        'SA29AT':['O9','C2'],
        'ZNHE':['ZN','NE2'],
        'ZNHD':['ZN','ND2']
    }
    """
    A dictionary mapping atom names to their corresponding CHARMM36 patch names.
    """

    @classmethod
    def _adapt(cls, *args, **kwargs) -> dict:
        """
        Adapts the input to a dictionary format suitable for Link instantiation.
        This method is used to convert various input types into a dictionary of parameters.
        """
        if args and isinstance(args[0], str):
            input_dict = Link._from_shortcode(args[0])
            return input_dict
        elif args and isinstance(args[0], PDBRecord):
            return Link._from_pdbrecord(args[0])
        elif args and isinstance(args[0], CIFdict):
            return Link._from_cifdict(args[0])
        elif args and isinstance(args[0], PSFLinkPatch):
            return Link._from_psflinkpatch(args[0])
        return super()._adapt(*args, **kwargs)
    
    @staticmethod
    def _from_pdbrecord(pdbrecord: PDBRecord) -> dict:
        return {
            'name1': pdbrecord.name1,
            'resname1': pdbrecord.residue1.resName,
            'chainID1': pdbrecord.residue1.chainID,
            'resid1': ResID(pdbrecord.residue1.seqNum, pdbrecord.residue1.iCode),
            'name2': pdbrecord.name2,
            'resname2': pdbrecord.residue2.resName,
            'chainID2': pdbrecord.residue2.chainID,
            'resid2': ResID(pdbrecord.residue2.seqNum, pdbrecord.residue2.iCode),
            'altloc2': pdbrecord.altLoc2,
            'altloc1': pdbrecord.altLoc1,
            'sym1': pdbrecord.sym1,
            'sym2': pdbrecord.sym2,
            'link_distance': pdbrecord.length,
            'segname1': pdbrecord.residue1.chainID,
            'segname2': pdbrecord.residue2.chainID,
            'empty': False
        }

    @staticmethod
    def _from_cifdict(cd: CIFdict) -> dict:
        resseqnum1 = int(cd['ptnr1_label_seq_id']) if cd['ptnr1_label_seq_id'] != '.' else int(cd['ptnr1_auth_seq_id'])
        resseqnum2 = int(cd['ptnr2_label_seq_id']) if cd['ptnr2_label_seq_id'] != '.' else int(cd['ptnr2_auth_seq_id'])
        insertion1 = cd['pdbx_ptnr1_pdb_ins_code'] if cd['pdbx_ptnr1_pdb_ins_code'] != '.' else ''
        insertion2 = cd['pdbx_ptnr2_pdb_ins_code'] if cd['pdbx_ptnr2_pdb_ins_code'] != '.' else ''
        resid1 = ResID(resseqnum1, insertion1)
        resid2 = ResID(resseqnum2, insertion2)
        return {
                'name1': cd['ptnr1_label_atom_id'],
                'altloc1': cd['pdbx_ptnr1_label_alt_id'],
                'resname1': cd['ptnr1_label_comp_id'],
                'chainID1': cd['ptnr1_label_asym_id'],
                'resid1': resid1,
                'name2': cd['ptnr2_label_atom_id'],
                'altloc2': cd['pdbx_ptnr2_label_alt_id'],
                'resname2': cd['ptnr2_label_comp_id'],
                'chainID2': cd['ptnr2_label_asym_id'],
                'resid2': resid2,
                'sym1': cd.get('ptnr1_symmetry', ''),
                'sym2': cd.get('ptnr2_symmetry', ''),
                'link_distance': float(cd.get('pdbx_dist_value', 0.0)),
                'segname1': cd['ptnr1_label_asym_id'],
                'segname2': cd['ptnr2_label_asym_id'],
                'empty': False
            }

    @staticmethod
    def _from_psflinkpatch(L: PSFLinkPatch) -> dict:
        idict = {
            'chainID1': L.seg1,
            'resid1': L.resid1,
            'chainID2': L.seg2,
            'resid2': L.resid2,
            'segname1': L.seg1,
            'segname2': L.seg2,
            'patchname': L.patchname if hasattr(L, 'patchname') else '',
            'patchhead': 1,  # default order
            'name1': Link._patch_atomnames[L.patchname][0],
            'name2': Link._patch_atomnames[L.patchname][1],
            'empty': False,
            }
        return idict

    @staticmethod
    def _from_shortcode(raw: str) -> dict:
        """
        Create a Link.Adapter instance from a string representation of a link.
        The string should be in the format 'C1_R1_A1-C2_R2_A2', where:
        - C1 is the chain ID of the first residue
        - R1 is the residue ID of the first residue
        - A1 is the atom name of the first residue that is part of the link
        - C2 is the chain ID of the second residue
        - R2 is the residue ID of the second residue
        - A2 is the atom name of the second residue that is linked to A1 of R1 of C1
        """
        I, J = raw.split('-')
        s1, ri1, a1 = I.split('_')
        resid1 = ResID(ri1)
        s2, ri2, a2 = J.split('_')
        resid2 = ResID(ri2)
        input_dict = {
            'chainID1': s1,
            'resid1': resid1,
            'name1': a1,
            'chainID2': s2,
            'resid2': resid2,
            'name2': a2,
            'empty': False
        }
        return input_dict


[docs]
    def shortcode(self) -> str:
        """
        Returns a string representation of the link in the format 'C1_R1_A1-C2_R2_A2'.
        """
        return f"{self.chainID1}_{self.resid1.resid}_{self.name1}-{self.chainID2}_{self.resid2.resid}_{self.name2}"



[docs]
    def set_patchname(self, force=False):
        """
        Set the charmmff patch name for this link.
        This method assigns a patch name based on the residues involved in the link.
        If the patch name is already set and ``force`` is False, it will not change the patch name.
        This method does not return any value. It modifies the ``patchname`` attribute of the Link object.
        It checks the residues involved in the link and assigns a patch name based on predefined mappings.
        
        Parameters
        ----------
        force : bool, optional
            If True, forces the patch name to be set even if it is already assigned.
            Default is False.
        """
        if hasattr(self,'patchname') and (self.patchname and len(self.patchname)>0) and not force:
            logger.debug(f'Patchname for {str(self)} already set to {self.patchname}')
            return
        self.patchname=''
        self.patchhead=1
        logger.debug(f'patch assignment for link {str(self)}')
        if not self.residue1 and not self.residue2:
            logger.debug(f'missing residue')
            logger.debug(f'1 {self.residue1}')
            logger.debug(f'2 {self.residue2}')
            return
        logger.debug(f'resname1 {self.resname1} segtype2 {self.segtype2}')
        my_res12 = [self.residue1, self.residue2]
        if self.resname1 == 'ASN' and self.segtype2 == 'glycan':
            # N-linked glycosylation site (toppar_all36_carb_glycopeptide)
            ICmap=[
                {'ICatomnames': ['1CG','1ND2','2C1','2O5'],
                 'mapping': {'NGLA':168.99,'NGLB':-70.91}}
            ]
            self.patchname = ic_reference_closest(my_res12, ICmap)
        elif self.resname1 == 'SER' and self.segtype2 == 'glycan':
            # O-linked to serine (toppar_all36_carb_glycopeptide)
            ICmap=[
                {'ICatomnames':['1CB','1OG','2C1','2O5'],
                 'mapping':{'SGPA':45.37,'SGPB':19.87}}
            ]
            self.patchname = ic_reference_closest(my_res12, ICmap)
        elif self.resname1 == 'THR' and self.segtype2 == 'glycan':
            # O-linked to threonine (toppar_all36_carb_glycopeptide)
            ICmap=[
                {'ICatomnames':['1CB','1OG1','2C1','2O5'],
                 'mapping':{'SGPA':69.9,'SGPB':33.16}}
            ]
            self.patchname = ic_reference_closest(my_res12, ICmap)
        elif self.name2 == 'C1' and self.segtype2 == 'glycan' and self.segtype1 == 'glycan':
            # all taken from 1xyz pres in top_all36_carb.rtf
            # including PHI angles of ICs with atoms in both residues
            if self.name1 == 'O1':  # 1->1 link
                ICmap=[
                    {'ICatomnames':'1O5  1C1  1O1  2C1'.split(),
                     'mapping':{'11aa':103.46,'11ab':121.75,'11bb':-56.58}
                    },
                    {'ICatomnames':'1C1  1O1  2C1  2O5'.split(),
                     'mapping':{'11aa':103.54,'11ab':51.80,'11bb':-79.64}
                    },
                    {'atomnames':'1O1  2C1  2O5  2C5'.split(),
                     'mapping':{'11aa':64.56,'11ab':167.51,'11bb':172.18}}
                ]
                self.patchname = ic_reference_closest(my_res12, ICmap)
            elif self.name1 == 'O2':  # 1->2 link
                ICmap=[
                    {'ICatomnames':'1C1  1C2  1O2  2C1'.split(),
                     'mapping':{'12aa':-132.81,'12ab':115.32,'12ba':-133.78,'12bb':117.14}
                    },
                    {'ICatomnames':'1C2  1O2  2C1  2O5'.split(),
                     'mapping':{'12aa':47.16,'12ab':86.93,'12ba':168.07,'12bb':-168.07}
                    }
                ]
                self.patchname = ic_reference_closest(my_res12, ICmap)
            elif self.name1 == 'O3':  # 1->3 link
                ICmap=[
                    {'ICatomnames':'1C2  1C3  1O3  2C1'.split(),
                     'mapping':{'13aa':113.19,'13ab':-141.32,'13ba':-131.68,'13bb':-141.32}
                    },
                    {'ICatomnames':'1C3  1O3  2C1  2O5'.split(),
                     'mapping':{'13aa':65.46,'13ab':65.46,'13ba':-100.16,'13bb':-130.16}
                    }
                ]
                self.patchname = ic_reference_closest(my_res12, ICmap)
            elif self.name1 == 'O4':  # 1->4 link
                ICmap=[
                    {'ICatomnames':'1C3  1C4  1O4  2C1'.split(),
                     'mapping':{'14aa':-86.29,'14ab':72.71,'14ba':-86.3,'14bb':81.86}},
                    {'ICatomnames':'1C4  1O4  2C1  2O5'.split(),
                     'mapping':{'14aa':133.57,'14ab':48.64,'14ba':-130.97,'14bb':-130.97}}
                ]
                self.patchname = ic_reference_closest(my_res12, ICmap)
            elif self.name1 == 'O6':  # 1->6 link
                ICmap=[
                    {'ICatomnames':'1C6  1O6  2C1  2O5'.split(),
                     'mapping':{'16AT':71.24,'16BT':-63.49}}
                ]
                self.patchname = ic_reference_closest(my_res12, ICmap)
        elif self.name2 == 'C2' and self.segtype2 == 'glycan' and self.segtype1 == 'glycan':
            if self.name1 == 'O6':
                self.patchname = 'SA26AT'
            elif self.name1 == 'O8':
                self.patchname = 'SA28AA'
            elif self.name1 == 'O9':
                self.patchname = 'SA29AT'
        elif self.name1 == 'O6' and self.name2 == 'C2':
            self.patchname = 'SA26AT'
        elif 'ZN' in self.resname1 and 'HIS' in self.resname2:
                if self.name2 == 'NE2':
                    self.patchname = 'ZNHE'
                else:
                    self.patchname = 'ZNHD'
                self.patchhead = 2
        elif 'HIS' in self.resname1 and 'ZN' in self.resname2:
                if self.name1 == 'NE2':
                    self.patchname = 'ZNHE'
                else:
                    self.patchname = 'ZNHD'
        elif 'HIS' in self.resname1 and 'HEM' in self.resname2:
                if self.name1 == 'NE2':
                    self.patchname = 'PHEM'
                else:
                    self.patchname = 'UNFOUND'
        elif 'HEM' in self.resname1 and 'HIS' in self.resname2:
                if self.name2 == 'NE2':
                    self.patchname = 'PHEM'
                else:
                    self.patchname = 'UNFOUND'
                self.patchhead = 2
        else:
            logger.warning(f'Could not identify patch for link {self.resname1}-{self.resname2}')
            self.patchname = 'UNFOUND'



[docs]
    def update_residue(self, idx, **fields):
        """
        Updates the chainID of the residue in the link based on the index provided
        
        Parameters
        ----------
        idx: int
            1 for residue1, 2 for residue2
        fields: dict
            a dictionary of fields to update; currently only 'chainID' is supported
        """
        if idx==1:
            if 'chainID' in fields:
                new_chainID=fields['chainID']
                if self.chainID1!=new_chainID:
                    logger.debug(f'updating link residue1 chainID from {self.chainID1} to {new_chainID}')
                    if self.segname1 is not None and self.segname1 == self.chainID1:
                        self.segname1 = new_chainID
                    self.chainID1=new_chainID
                    self.residue1.set(chainID=new_chainID)
        elif idx==2:
            if 'chainID' in fields:
                new_chainID=fields['chainID']
                if self.chainID2!=new_chainID:
                    logger.debug(f'updating link residue2 chainID from {self.chainID2} to {new_chainID}')
                    if self.segname2 is not None and self.segname2 == self.chainID2:
                        self.segname2 = new_chainID
                    self.chainID2=new_chainID
                    self.residue2.set(chainID=new_chainID)


    def __str__(self):
        # if this a nascent link object that hasn't been processed by assigning residue objects to it,
        # just regurgitate the chainID and residue ID information that was used to create it.
        if not hasattr(self,'residue1') or not self.residue1 or not hasattr(self,'residue2') or not self.residue2:
            return f'{self.chainID1}_{self.resname1}{self.resid1.resid}-{self.chainID2}_{self.resname2}{self.resid2.resid}'
        # otherwise, access the string representation of the residues
        return f'{str(self.residue1)}-{str(self.residue2)}'



[docs]
class LinkList(BaseObjList[Link]):
    """
    A class for handling lists of Links
    """


[docs]
    @classmethod
    def from_pdb(cls, pdb: PDBRecordDict) -> 'LinkList':
        """
        Create a LinkList from a PDBRecordDict.
        """
        if Link._PDB_keyword not in pdb:
            return cls([])
        return cls([Link(x) for x in pdb[Link._PDB_keyword]])



[docs]
    @classmethod
    def from_cif(cls, dc: DataContainer) -> 'LinkList':
        """
        Create a LinkList from a CIF DataContainer.
        
        Parameters
        ----------
        dc : DataContainer
            A CIF DataContainer containing the necessary fields to create Link objects.
        
        Returns
        -------
        LinkList
            An instance of LinkList created from the CIF DataContainer.
        """
        L = []
        cif_category = dc.getObj(Link._CIF_CategoryName)
        if cif_category is None:
            return cls(L)  # Return empty LinkList if category not found
        for i in range(len(cif_category)):
            for key, valset in Link._CIF_CategoryElementTypes.items():
                objTypeid = cif_category.getValue(key, i)
                if objTypeid in valset:
                    this_link = Link(CIFdict(cif_category, i))
                    L.append(this_link)
        return cls(L)



[docs]
    def describe(self) -> str:
        """
        Returns a string description of the LinkList.
        
        Returns
        -------
        str
            A string describing the number of links in the list.
        """
        return f'<LinkList with {len(self)} links>'



[docs]
    def assign_residues(self, Residues: "ResidueList") -> tuple["ResidueList", "LinkList"]:
        """
        Assigns residue and atom pointers to each link; sets up the up and down links of both
        residues so that linked residue objects can reference one another; flags residues from
        list of residues passed in that are not assigned to any links

        Parameters
        ----------
        Residues: ResidueList
            list of residues to assign to links; this is typically a list of all residues in the
            structure, but it can also be a list of residues that are not linked to any other
            residues, such as when reading in a set of links from a pre-built psf file.

        Returns
        -------
        Residues: ResidueList
            list of residues from Residues that are not used for any assignments
        """
        logger.debug(f'Links: Assigning residues from list of {len(Residues)} residues')
        # PSF-loaded links store the psfgen segname as chainID1/chainID2. When atoms carry
        # their biological chainID (not their segname), the primary chainID match would fail.
        # Pre-normalise: if no residue matches chainIDN but one matches by segname, update chainIDN.
        segname_map = {(getattr(r, 'segname', None), r.resid): r.chainID
                       for r in Residues if getattr(r, 'segname', None)}
        for link in self.data:
            if not Residues.get(lambda x, _l=link: x.chainID == _l.chainID1 and x.resid == _l.resid1):
                mapped = segname_map.get((link.chainID1, link.resid1))
                if mapped:
                    link.chainID1 = mapped
            if not Residues.get(lambda x, _l=link: x.chainID == _l.chainID2 and x.resid == _l.resid2):
                mapped = segname_map.get((link.chainID2, link.resid2))
                if mapped:
                    link.chainID2 = mapped
        ignored_by_ptnr1 = self.assign_objs_to_attr('residue1', Residues, chainID='chainID1', resid='resid1')
        ignored_by_ptnr2 = self.assign_objs_to_attr('residue2', Residues, chainID='chainID2', resid='resid2')
        for link in self.data:
            link.segtype1 = link.residue1.segtype
            link.segtype2 = link.residue2.segtype
            # shortcodes don't provide resnames, so set them here
            if link.residue1 is not None and link.resname1 is None:
                link.resname1 = link.residue1.resname
            if link.residue2 is not None and link.resname2 is None:
                link.resname2 = link.residue2.resname
            try:
                link.residue1.link_to(link.residue2, link)
            except:
                raise ValueError(f'Bad residue in link')
            if link.patchname is not None and len(link.patchname) > 0:
            # this link was most likely created when reading in patch records from a set of REMARKS in a pre-built psf file.
            # we need to get the precise atom names for this patch
                continue

            link.atom1 = link.residue1.atoms.get(lambda x: x.name == link.name1 and x.altloc == link.altloc1)
            link.atom2 = link.residue2.atoms.get(lambda x: x.name == link.name2 and x.altloc == link.altloc2)
            link.segtype1 = link.residue1.segtype
            link.segtype2 = link.residue2.segtype
            # shortcodes don't provide resnames, so set them here
            if link.residue1 is not None and link.resname1 is None:
                link.resname1 = link.residue1.resname
            if link.residue2 is not None and link.resname2 is None:
                link.resname2 = link.residue2.resname
            link.set_patchname()
        # do cross-assignment to find true orphan links and dangling links
        orphan_1 = ignored_by_ptnr1.assign_objs_to_attr('residue2', Residues, chainID='chainID2', resid='resid2')
        orphan_2 = ignored_by_ptnr2.assign_objs_to_attr('residue1', Residues, chainID='chainID1', resid='resid1')
        orphans = orphan_1 + orphan_2
        rlist = []
        for link in ignored_by_ptnr1:
            rlist, list = link.residue2.get_down_group()
            rlist.insert(0, link.residue2)
            for r in rlist:
                Residues.remove(r)
        return Residues.__class__(rlist), self.__class__(ignored_by_ptnr1 + ignored_by_ptnr2)



[docs]
    def remove_links_to(self, r) -> 'LinkList':
        """
        Remove all links to a specific residue.

        Parameters
        ----------
        Residue: Residue
            The residue to remove links to.

        Returns
        -------
        LinkList
            A new LinkList containing the removed links.
        """
        removed_links = self.__class__([])
        for link in self.data:
            if link.residue1 == r or link.residue2 == r:
                removed_links.append(link)
        for badlink in removed_links:
            self.remove(badlink)
        logger.debug(f'Removed {len(removed_links)} links to residue {str(r)}')
        return removed_links


    # def prune_mutations(self, Mutations: 'MutationList', Segments: 'SegmentList'):
    #     """
    #     Prune off any links and associated objects as a result of mutations

    #     Parameters
    #     ----------
    #     Mutations: MutationList
    #         list of mutations to prune off links; these are typically the mutations that were applied
    #         to the structure before the links were created, so they are not part of the original
    #         structure, but they are part of the current structure.
    #     Segments: SegmentList
    #         Current list of segments in the structure; it is from this list that segments are removed if they become empty as a result of pruning off links that are pruned off due to mutations.

    #     Returns
    #     -------
    #     pruned: dict
    #         A dictionary containing lists of pruned residues, links, and segments.
        
    #         - ``residues``: list of Residue objects that were pruned
    #         - ``links``: list of Link objects that were pruned
    #         - ``segments``: list of Segment objects that were pruned
    #     """
    #     pruned = {'residues': [], 'links': self.__class__([]), 'segments': []}
    #     for m in Mutations.data:
    #         left = self.get(lambda x: x.chainID1 == m.chainID and x.resid1 == m.resid) # links for which partner 1 is the mutation
    #         right = self.get(lambda x: x.chainID2 == m.chainID and x.resid2 == m.resid) # links for which partner 2 is the mutation
    #         if left:  # this is a link in which this mutation is the partner 1
    #             self.remove(left) # get rid of this link
    #             # we need to remove residue2 and everything downstream
    #             # remove downstream residues!
    #             rlist, llist = left.residue2.get_down_group()
    #             rlist.insert(0, left.residue2)
    #         elif right: # this is a link in which this mutation is the right member (should be very rare)
    #             self.remove(right)
    #             rlist, llist = right.residue2.get_down_group()
    #             rlist.insert(0, right.residue2)
    #         if rlist and llist:
    #             # logger.debug(f'Deleting residues down from and including {str(rlist[0])} due to a mutation')
    #             S = Segments.get_segment_of_residue(rlist[0])
    #             for r in rlist:
    #                 # logger.debug(f'...{str(r)}')
    #                 S.residues.remove(r)
    #                 pruned['residues'].append(r)
    #             if len(S.residues) == 0:
    #                 # logger.debug(f'All residues of {S.psfgen_segname} are deleted; {S.psfgen_segname} is deleted')
    #                 Segments.remove(S)
    #                 pruned['segments'].append(S)
    #             for l in llist:
    #                 self.data.remove(l)
    #                 pruned['links'].append(l)
    #     return pruned


[docs]
    def apply_segtypes(self, map):
        """
        Apply segtype values to each of the two residues using the map
        
        Parameters
        ----------
        map: dict
            map of segtypes for given resnames
        """
        self.map_attr('segtype1','resname1', map)
        self.map_attr('segtype2','resname2', map)



[docs]
    def report(self) -> str:
        """
        Report the string representation of each link in the list.
        """
        return "\n".join([str(l) for l in self])


    

[docs]
def ic_reference_closest(res12: list["Residue"], ICmaps: list[dict]) -> str:
    """
    Given the two Residues in res12 and the maps in ICmaps, 
    return the mapping key to which the given IC values are
    closest in a Euclidean sense.

    This method will identify the four atoms of the IC and reference
    them directly when calling the measure_dihedral function. 
    
    The list of computed dihedral values from the set of atoms is a "point"
    in "IC-space", and each patch has its own "reference point" in this space.

    The reference point to which the point is closest is identified as the 
    desired result.

    Parameters
    ----------
    res12: list
       exactly two Residue objects which must have lists of atoms attributes
    ICMaps : list of dict
        A list of dictionaries, each with the following structure:

        +--------------+-----------------------------------------------------------+
        | Key          | Description                                               |
        +==============+===========================================================+
        | ICatomnames  | List of 4 atom names as they appear in the CHARMM FF IC.  |
        +--------------+-----------------------------------------------------------+
        | mapping      | Dictionary mapping patch names to IC values.              |
        +--------------+-----------------------------------------------------------+
    """
    for ic in ICmaps:
        # logger.debug(f'icmap {ic}')
        ic['atoms'] = []
        for n in ic['ICatomnames']:
            r = int(n[0])-1
            an = n[1:]
            at = res12[r].atoms.get(lambda x: x.name == an)
            ic['atoms'].append(at)
            # logger.debug(f'Assigned atom {at.name} of {at.resname}{at.resseqnum}')
    map_points = {}
    the_point = []
    for ic in ICmaps:
        value = measure_dihedral(*(ic['atoms'])) * 180.0 / np.pi
        # logger.debug(f'{ic["ICatomnames"]} value {value:.2f}')
        the_point.append(value)
        for m, v in ic['mapping'].items():
            if not m in map_points:
                map_points[m] = []
            map_points[m].append(v)
    for k, v in map_points.items():
        map_points[k] = np.array(v)
    the_point = np.array(the_point)
    # logger.debug(f'ic the point: {the_point}')
    # calculate Euclidean distance adhering to the periodicity
    # of dihedral-angle space
    displacements = {k: (the_point - v) for k, v in map_points.items()}
    for n, d in displacements.items():
        for i in range(len(d)):
            if d[i] < 180.0:
                d[i] += 180.0
            if d[i] > 180.0:
                d[i] -= 180.0
    norms = {k: np.linalg.norm(d) for k, d in displacements.items()}
    # logger.debug(f'norms {norms}')
    the_one = [k for (k, v) in sorted(norms.items(), key=lambda x: x[1])][0]
    # logger.debug(f'returning {the_one}')
    return the_one