# Author: Cameron F. Abrams, <cfa22@drexel.edu>
"""
Definition of the :class:`PsfgenTask` class for handling invocations of psfgen which create a molecule from a base PDB/mmCIF file.
Usage is described in the :ref:`subs_buildtasks_psfgen` documentation.
"""
import logging
import networkx as nx
import os
from copy import deepcopy
from pathlib import Path
from typing import ClassVar
from .basetask import VMDTask
from ..molecule.chainidmanager import ChainIDManager
from ..molecule.molecule import Molecule
from ..core.objmanager import ObjManager
from ..core.artifacts import *
from ..core.errors import PestiferBuildError
from ..objs.graft import GraftList
from ..psfutil.psfatom import PSFAtomList
from ..psfutil.psfcontents import PSFContents
from ..scripters import VMDScripter, PsfgenScripter
from ..util.util import write_residue_map
logger = logging.getLogger(__name__)
[docs]
class PsfgenTask(VMDTask):
"""
A class for handling invocations of psfgen which create a molecule from a base PDB/mmCIF file
or from a PSF file generated previously by psfgen
This class is a descendant of the :class:`BaseTask <pestifer.core.basetask.BaseTask>` class, and as such it has two parameters.
Parameters
----------
config_specs : dict
Configuration specifications for the task.
"""
_yaml_header: ClassVar[str] = 'psfgen'
"""
YAML header for the PsfgenTask, used to identify the task in configuration files as part of a ``tasks`` list.
"""
[docs]
def provision(self, packet: dict = {}):
"""
Provision the task with the standard packet, and initialize molecule dict and base molecule placeholders.
"""
super().provision(packet=packet)
self.molecules = {}
self.base_molecule: Molecule = None
[docs]
def do(self):
"""
Execute the psfgen task.
This method initializes the task, ingests the base molecule, and runs the psfgen process.
It also handles any necessary coormods and declashing of loops and glycans based on the task specifications.
The results of the psfgen process are saved as a PSF/PDB fileset, and the state is updated accordingly.
"""
logger.debug('ingesting molecule(s)')
self.ingest_molecules()
logger.debug(f'base mol num images {self.base_molecule.num_images()}')
logger.debug('Running first psfgen')
self.result = self.psfgen()
if self.result != 0:
return self.result
# we now have a full coordinate set, so we can do coormods
self.coormods()
self.declash()
return self.result
[docs]
def coormods(self):
"""
Perform coordinate modifications based on the specifications provided in the task.
"""
coormods = self.objmanager.get('coord',{})
logger.debug(f'psfgen task has {len(coormods)} coormods:')
logger.debug(';'.join([str(_) for _ in coormods]))
if coormods:
logger.debug(f'performing coormods')
for objtype, objlist in coormods.items():
if len(objlist) > 0:
self.next_basename(objtype)
vm: VMDScripter = self.scripters['vmd']
packages = []
if objtype == 'crotations':
packages.append('PestiferCRot')
vm.newscript(self.basename, packages=packages)
state: StateArtifacts = self.get_current_artifact('state')
vm.load_psf_pdb(state.psf.name, state.pdb.name, new_molid_varname='mCM')
match objtype:
case 'crotations':
for transform in self.base_molecule.active_biological_assembly.transforms.data:
vm.write_crots(objlist, chainIDmap=transform.chainIDmap)
case 'orient':
vm.write_orients(objlist)
case 'rottrans':
vm.write_rottranslist(objlist)
vm.write_pdb(self.basename, 'mCM')
vm.writescript()
vm.runscript()
self.register(dict(
pdb=PDBFileArtifact(self.basename),
psf=state.psf,
xsc=state.xsc), key='state', artifact_type=StateArtifacts)
self.register(self.basename, key='tcl', artifact_type=VMDScriptArtifact)
self.register(self.basename, key='log', artifact_type=VMDLogFileArtifact)
[docs]
def declash(self):
"""
Manages the declashing of protein loops and glycans
"""
self.min_loop_length = self.specs['source'].get('sequence',{}).get('loops',{}).get('min_loop_length',0)
self.declash_counts = self.base_molecule.loop_counts(min_loop_length=self.min_loop_length)
num_images = self.base_molecule.num_images()
for segtype in ['protein','nucleicacid']:
self.declash_counts[segtype] *= num_images
self.declash_counts['glycan'] = self.base_molecule.nglycans() * num_images
for segtype,speckey in zip(['protein','nucleicacid','glycan'],['loops','loops','glycans']):
if self.declash_counts[segtype]>0 and self.specs['source']['sequence'][speckey]['declash']['maxcycles']>0:
logger.debug(f'Declashing {self.declash_counts[segtype]} {segtype} segments')
self.declash_segtype(self.specs['source']['sequence'][speckey]['declash'], segtype=segtype)
[docs]
def resi_topologies(self):
"""
Collect the topology files that are needed for the residues in the base molecule.
"""
resis = set([x.resname for x in self.base_molecule.asymmetric_unit.residues.data])
logger.debug(f'Residue names in base molecule: {resis}')
CC = self.resource_manager.charmmff_content
new_topfiles = set()
for resname in resis:
topfile = CC.get_topfile_of_resname(resname)
if topfile:
new_topfiles.add(topfile)
else:
raise PestiferBuildError(f'No topology file found for residue name {resname}')
return list(new_topfiles)
[docs]
def patch_topologies(self):
"""
Collect the topology files that are needed for the patches and links in the base molecule.
This method retrieves the CHARMMFF topology files associated with the patches and links defined
in the base molecule's object manager.
"""
objmanager = self.base_molecule.objmanager
seqmods = objmanager.get('seq', {})
patches = seqmods.get('patches', [])
CC = self.resource_manager.charmmff_content
new_topfiles = set()
# logger.debug(f'New topologies: {new_topfiles}')
for patch in patches:
topfile = CC.get_topfile_of_resname(patch.patchname)
if topfile:
new_topfiles.add(topfile)
# logger.debug(f'New topologies: {new_topfiles}')
topomods = objmanager.get('topol', {})
links = topomods.get('links', [])
for link in links:
topfile = CC.get_topfile_of_resname(link.patchname)
if topfile:
new_topfiles.add(topfile)
# logger.debug(f'New topologies: {new_topfiles}')
return list(new_topfiles)
[docs]
def psfgen(self):
"""
Run the psfgen process to generate a PSF and PDB fileset from the base molecule.
"""
self.next_basename('build')
pg: PsfgenScripter = self.scripters['psfgen']
required_topology_files = list(set(self.patch_topologies() + self.resi_topologies()))
pg.newscript(self.basename, packages=['PestiferCRot'], additional_topologies=required_topology_files)
pg.set_molecule(self.base_molecule, altcoords=self.specs.get('source', {}).get('altcoords', None))
pg.describe_molecule(self.base_molecule)
pg.writescript(self.basename)
result = pg.runscript(keep_tempfiles=True)
if result != 0:
return result
# register PSF, PDB, log, and all charmmff files in the pipeline context
self.register(self.basename, key='tcl', artifact_type=PsfgenInputScriptArtifact)
self.register(self.basename, key='log', artifact_type=PsfgenLogFileArtifact)
self.register(dict(
pdb=PDBFileArtifact(self.basename, pytestable=True),
psf=PSFFileArtifact(self.basename, pytestable=True)), key='state', artifact_type=StateArtifacts)
self.register([CharmmffTopFileArtifact(x) for x in pg.topologies if x.endswith('.rtf')], key='charmmff_topfiles', artifact_type=CharmmffTopFileArtifacts)
self.register([CharmmffStreamFileArtifact(x) for x in pg.topologies if x.endswith('.str')], key='charmmff_streamfiles', artifact_type=CharmmffStreamFileArtifacts)
self.register([PDBFileArtifact(x) for x in pg.F if x.endswith('.pdb')], key='psfgen_temp_pdbs', artifact_type=PDBFileArtifactList)
self.pipeline.show_artifacts(header='Artifacts after psfgen')
self.strip_remarks()
return 0
[docs]
def declash_segtype(self, specs: dict, segtype='protein'):
"""
Declash loops in the base molecule using the custom ``PestiferDeclash`` TcL package.
This method generates a VMD script to identify and declash loops in the molecular structure.
It uses the ``PestiferDeclash`` package to perform the declashing operation, which involves identifying
and modifying the coordinates of atoms in loop regions.
"""
specs_to_declashers = specs#['source']['sequence']['declash']
if segtype == 'protein':
self.declash_protein_loops(specs_to_declashers)
elif segtype == 'nucleicacid':
self.declash_na_loops(specs_to_declashers)
elif segtype == 'glycan':
self.declash_glycans(specs_to_declashers)
else:
raise PestiferBuildError(f'Unknown segment type {segtype} for declashing')
[docs]
def declash_protein_loops(self, specs: dict):
"""
Declash loops in the base molecule using the custom ``PestiferDeclash`` TcL package.
This method generates a VMD script to identify and declash loops in the molecular structure.
It uses the ``PestiferDeclash`` package to perform the declashing operation, which involves identifying
and modifying the coordinates of atoms in loop regions.
"""
mol = self.base_molecule
cycles = specs['maxcycles']
self.next_basename('declash-loops')
state: StateArtifacts = self.get_current_artifact('state')
psf = state.psf.name
pdb = state.pdb.name
vt: VMDScripter = self.scripters['vmd']
vt.newscript(self.basename, packages=['PestiferDeclash'])
vt.load_psf_pdb(psf, pdb, new_molid_varname='mLL')
vt.write_protein_loop_lines(mol, cycles=cycles, include_c_termini=specs['include_C_termini'])
vt.write_pdb(self.basename, 'mLL')
vt.writescript()
vt.runscript()
self.register(dict(
pdb=PDBFileArtifact(self.basename),
psf=state.psf), key='state', artifact_type=StateArtifacts)
self.register(self.basename, key='tcl', artifact_type=VMDScriptArtifact)
self.register(self.basename, key='log', artifact_type=VMDLogFileArtifact)
[docs]
def declash_na_loops(self,specs):
"""
Declash nucleic acid loops in the base molecule using the custom ``PestiferDeclash`` TcL package.
This method generates a VMD script to identify and declash nucleic acid loops in the molecular structure.
It uses the ``PestiferDeclash`` package to perform the declashing operation, which involves identifying
and modifying the coordinates of atoms in nucleic acid loop regions.
"""
cycles = specs['maxcycles']
clashdist = specs['clashdist']
self.next_basename('declash-na-loops')
vt: VMDScripter = self.scripters['vmd']
state: StateArtifacts = self.get_current_artifact('state')
psf = state.psf.name
pdb = state.pdb.name
outpdb = f'{self.basename}.pdb'
vt.newscript(self.basename, packages=['PestiferDeclash'])
vt.addline(f'mol new {psf}')
vt.addline(f'mol addfile {pdb} waitfor all')
vt.addline(f'set a [atomselect top all]')
vt.addline(f'set molid [molinfo top get id]')
nna = self._write_na_loops(vt)
vt.addline(f'set nna {nna}')
vt.addline(f'vmdcon -info "Declashing $nna nucleic acid loops; clashdist {clashdist}; maxcycles {cycles}"')
vt.addline(r'for {set i 0} {$i<$nna} {incr i} {')
vt.addline(f' declash_pendant $molid $na_idx($i) $rbonds($i) $movers($i) {cycles} {clashdist}')
vt.addline(r'}')
vt.addline(f'$a writepdb {outpdb}')
vt.writescript()
logger.debug(f'Declashing {nna} nucleic acid loops')
vt.runscript(progress_title='declash-nucleic-acid-loops')
self.register(dict(
pdb=PDBFileArtifact(self.basename),
psf=state.psf), key='state', artifact_type=StateArtifacts)
self.register(self.basename, key='tcl', artifact_type=VMDScriptArtifact)
self.register(self.basename, key='log', artifact_type=VMDLogFileArtifact)
def _write_na_loops(self, vt: VMDScripter, **options):
mol = self.base_molecule
au = mol.asymmetric_unit
state: StateArtifacts = self.get_current_artifact('state')
psf = state.psf.name
logger.debug(f'ingesting {psf}')
struct = PSFContents(psf, parse_topology=['bonds'])
logger.debug(f'PSF has {len(struct.atoms)} atoms; extracting nucleic acid atoms...')
na_atoms: PSFAtomList = struct.atoms.get(lambda x: x.segtype == 'nucleicacid')
logger.debug(f'PSF has {len(na_atoms)} nucleic acid atoms')
# my_rep = list(set([(x.chainID, x.resid) for x in na_atoms.data]))
# my_rep.sort(key=lambda x: (x[0], x[1]))
logger.debug(f'Getting loops from {len(na_atoms)} nucleic acid atoms in PSF file {psf}')
# logger.debug(f'{my_rep}')
min_length = self.min_loop_length
include_c_termini = options.get('include_c_termini', False)
i = 0
SL = [S for S in au.segments.data if S.segtype == 'nucleicacid']
logger.debug(f'Asymmetric unit has {len(SL)} nucleic acid segments')
for S in SL:
asymm_segname = S.segname
logger.debug(f'Processing segment {asymm_segname} for nucleic acid loops')
n_subsegs = len(S.subsegments)
for b in S.subsegments.data:
lr_resid = S.residues[b.bounds[0]].resid
rr_resid = S.residues[b.bounds[1]].resid
logger.debug(f'Processing subsegment {b.state} for segname {asymm_segname} with bounds {lr_resid}-{rr_resid}')
is_c_terminus = (S.subsegments.index(b) == (n_subsegs - 1))
is_processible = b.state == 'MISSING' and b.num_items() >= min_length
if is_processible and (not include_c_termini) and is_c_terminus:
logger.debug(f'A.U. C-terminal loop {b.state} declashing is skipped')
is_processible = False
if is_processible:
logger.debug(f'Processing loop {b.state} {b.bounds} for segname {asymm_segname}')
loop_atoms = PSFAtomList([x for x in na_atoms.data if x.segname == asymm_segname and x.resid >= lr_resid and x.resid <= rr_resid])
logger.debug(f'Loop {b.state} has {len(loop_atoms)} atoms from PSFAtomList')
na_graph = loop_atoms.graph()
logger.debug(f'{na_graph}')
G = [na_graph.subgraph(c).copy() for c in nx.connected_components(na_graph)]
assert len(G) == 1, f'NA loop {b.state} has more than one connected component'
logger.debug(f'Loop {b.state} has {len(loop_atoms)} atoms')
g = G[0]
serials = [x.serial for x in g]
for at in g:
lig_ser = [x.serial for x in at.ligands]
for k, ls in enumerate(lig_ser):
if not ls in serials:
at.is_root = True
rp = at.ligands[k]
logger.debug(f'-> Atom {str(at)} is the root, bound to atom {str(rp)}')
indices = ' '.join([str(x.serial-1) for x in g])
vt.addline(f'set na_idx({i}) [list {indices}]')
vt.addline(f'set rbonds({i}) [list]')
vt.addline(f'set movers({i}) [list]')
for bond in nx.bridges(g):
ai, aj = bond
if not (ai.isH() or aj.isH()) and not ai.is_pep(aj):
g.remove_edge(ai, aj)
CC = [g.subgraph(c).copy() for c in nx.connected_components(g)]
assert len(CC) == 2, f'Bond {ai.serial-1}-{aj.serial-1} when cut makes more than 2 components'
for sg in CC:
is_root = any([hasattr(x, 'is_root') for x in sg])
if not is_root:
if ai in sg:
sg.remove_node(ai)
if aj in sg:
sg.remove_node(aj)
if len(sg) > 1 or (len(sg) == 1 and not [x for x in sg.nodes][0].isH()):
mover_serials = [x.serial for x in sg]
mover_indices = " ".join([str(x-1) for x in mover_serials])
logger.debug(f'{str(ai)}--{str(aj)} is a rotatable bridging bond')
vt.addline(f'lappend rbonds({i}) [list {ai.serial-1} {aj.serial-1}]')
logger.debug(f' -> movers: {" ".join([str(x) for x in sg])}')
vt.addline(f'lappend movers({i}) [list {mover_indices}]')
g.add_edge(ai, aj)
i += 1
return i
[docs]
def declash_glycans(self, specs):
"""
Declash glycans in the base molecule using the custom ``PestiferDeclash`` TcL package.
This method generates a VMD script to identify and declash glycans in the molecular structure.
It uses the ``PestiferDeclash`` package to perform the declashing operation,
which involves identifying and modifying the coordinates of atoms in glycan regions.
"""
mol: Molecule = self.base_molecule
cycles: int = specs['maxcycles']
clashdist: float = specs['clashdist']
if not mol.nglycans() or not cycles:
logger.debug(f'Glycan declashing is intentionally not done.')
return
self.next_basename('declash-glycans')
outpdb = f'{self.basename}.pdb'
state: StateArtifacts = self.get_current_artifact('state')
psf = state.psf.name
pdb = state.pdb.name
vt: VMDScripter = self.scripters['vmd']
vt.newscript(self.basename, packages=['PestiferDeclash'])
vt.addline(f'mol new {psf}')
vt.addline(f'mol addfile {pdb} waitfor all')
vt.addline(f'set a [atomselect top all]')
vt.addline(f'set molid [molinfo top get id]')
nglycans = self._write_glycans(vt)
vt.addline(f'vmdcon -info "Declashing $nglycans glycans; clashdist {clashdist}; maxcycles {cycles}"')
vt.addline(r'for {set i 0} {$i<$nglycans} {incr i} {')
vt.addline(f' declash_pendant $molid $glycan_idx($i) $rbonds($i) $movers($i) {cycles} {clashdist}')
vt.addline(r'}')
vt.addline(f'$a writepdb {outpdb}')
vt.writescript()
logger.debug(f'Declashing {nglycans} glycans')
vt.runscript(progress_title='declash-glycans')
self.register(dict(
pdb=PDBFileArtifact(self.basename),
psf=state.psf), key='state', artifact_type=StateArtifacts)
self.register(self.basename, key='tcl', artifact_type=VMDScriptArtifact)
self.register(self.basename, key='log', artifact_type=VMDLogFileArtifact)
def _write_glycans(self, fw: VMDScripter):
state: StateArtifacts = self.get_current_artifact('state')
psf = state.psf.name
logger.debug(f'ingesting {psf}')
struct = PSFContents(psf, parse_topology=['bonds'])
logger.debug(f'Making graph structure of glycan atoms...')
glycanatoms = struct.atoms.get(lambda x: x.segtype == 'glycan')
logger.debug(f'{len(glycanatoms)} total glycan atoms')
glycangraph = glycanatoms.graph()
G = [glycangraph.subgraph(c).copy() for c in nx.connected_components(glycangraph)]
logger.debug(f'Preparing declash input for {len(G)} glycans')
fw.addline(f'set nglycans {len(G)}')
for i, g in enumerate(G):
rooted = False
logger.debug(f'Glycan {i} has {len(g)} atoms')
serials = [x.serial for x in g] # list of atoms in glycan by serial
for j, at in enumerate(g):
lig_ser = [x.serial for x in at.ligands] # list of ligands of this atom by serial
logger.debug(f' {j}: {str(at)} {len(lig_ser)} ligands')
for k, ls in enumerate(lig_ser):
if not ls in serials: # this atom has a ligand that is not in the glycan
at.is_root = True
rooted = True
rp = at.ligands[k]
logger.debug(f' -> {str(at)} ({at.resname} {at.segtype}) is the root, bound to outside atom {rp.serial}')
break
if rooted:
break
assert rooted, f' Could not find a root atom for glycan {i} -- this is a bug'
indices = ' '.join([str(x.serial-1) for x in g])
# logger.debug(f'indices {indices}')
fw.comment(f'Glycan {i}:')
fw.addline(f'set glycan_idx({i}) [list {indices}]')
fw.addline(f'set rbonds({i}) [list]')
fw.addline(f'set movers({i}) [list]')
# for each rotatable bond, define the two atoms in the bond by INDICES
# and identify movers when pendant is rotated around this bond
# a rotatable bond is one that is a bridge, is not a peptide bond, and is not a bond to an H
for bond in nx.bridges(g):
ai, aj = bond
if not (ai.isH() or aj.isH()) and not ai.is_pep(aj):
g.remove_edge(ai, aj)
S = [g.subgraph(c).copy() for c in nx.connected_components(g)]
assert len(S) == 2, f'Bond {str(ai)}-{str(aj)} when cut makes more than 2 components (not a bridge)'
is_root = [any([getattr(x, 'is_root', False) for x in sg]) for sg in S]
assert any(is_root), f'No root atom found in either half split by bond {str(ai)}-{str(aj)}'
unrooted_subgraph = S[is_root.index(False)]
if ai in unrooted_subgraph:
unrooted_subgraph.remove_node(ai)
if aj in unrooted_subgraph:
unrooted_subgraph.remove_node(aj)
if len(unrooted_subgraph) > 1 or (len(unrooted_subgraph) == 1 and not list(unrooted_subgraph.nodes)[0].isH()):
mover_serials = [x.serial for x in unrooted_subgraph]
mover_indices = " ".join([str(x-1) for x in mover_serials])
logger.debug(f'{str(ai)}--{str(aj)} is a rotatable bridging bond')
fw.addline(f'lappend rbonds({i}) [list {ai.serial-1} {aj.serial-1}]')
logger.debug(f' -> movers: {",".join([str(x) for x in unrooted_subgraph])}')
fw.addline(f'lappend movers({i}) [list {mover_indices}]')
g.add_edge(ai, aj)
return len(G)
[docs]
def ingest_molecules(self):
"""
Ingests the base molecule from the specifications provided in the task.
This molecule is initialized from either base coordinates fetched in a prior fetch task,
or a state defined by a previous continuation task.
It also handles any graft sources specified in the sequence modifications and
activates the biological assembly of the base molecule.
"""
specs = self.specs
self.source_specs = specs['source']
assert not 'id' in self.source_specs, f'Version 2.0+ of Pestifer does not support "id" in source specs. Please begin your task list with either a `fetch` or `continuation` task.'
this_source = {}
base_coordinates: Path = self.get_current_artifact_path('base_coordinates')
if not base_coordinates:
state: StateArtifacts = self.get_current_artifact('state')
if not (state.pdb and state.psf):
raise PestiferBuildError(f'No base_coordinates artifact found, and no prebuilt PDB/PSF/XSC files found in the pipeline context. Cannot ingest base molecule.')
this_source['prebuilt'] = {
'pdb': state.pdb.name,
'psf': state.psf.name,
'xsc': state.xsc.name if state.xsc else None
}
this_source['file_format'] = 'PDB'
else:
basename, ext = os.path.splitext(base_coordinates)
this_source['source_id'] = basename
this_source['source_db'] = 'local'
if ext.lower() == '.pdb':
this_source['file_format'] = 'PDB'
elif ext.lower() == '.cif':
this_source['file_format'] = 'mmCIF'
else:
raise PestiferBuildError(f'Unknown file format {ext} for base_coordinates artifact {base_coordinates.name}')
self.source_specs.update(this_source)
logger.debug(f'User-input modspecs:')
my_logger(self.specs["mods"], logger.debug, depth=1)
self.objmanager = ObjManager(self.specs['mods'])
seqmods = self.objmanager.get('seq', {})
logger.debug(f'Ingesting seqmods:')
my_logger(seqmods, logger.debug, depth=1)
if 'grafts' in seqmods:
# logger.debug(f'looking for graft sources to ingest')
Grafts: GraftList = seqmods['grafts']
graft_artifacts = []
for g in Grafts.data:
if not g.source_pdbid in self.molecules:
logger.debug(f'Ingesting graft source {g.source_pdbid}')
this_source = {
'source_db': 'rcsb',
'source_id': g.source_pdbid,
'file_format': 'PDB',
'sequence': {'skip_glycan_renaming': True}
}
# the Molecule call below will fetch coordinates for graft pdbs
self.molecules[g.source_pdbid] = Molecule(source=this_source)
graft_artifacts.append(PDBFileArtifact(g.source_pdbid))
g.activate(deepcopy(self.molecules[g.source_pdbid]))
if len(graft_artifacts) > 0:
self.register(graft_artifacts, key='graft_sources', artifact_type=PDBFileArtifactList)
self.chainIDmanager = ChainIDManager(
format = self.source_specs['file_format'],
transform_reserves = self.source_specs.get('transform_reserves', {}),
remap = self.source_specs.get('remap_chainIDs', {}))
# if 'vmdatomselections' in self.source_specs['exclude']:
# self.apply_vmdexclusions()
self.base_molecule = Molecule(source = self.source_specs,
objmanager = self.objmanager,
chainIDmanager = self.chainIDmanager).activate_biological_assembly(self.source_specs['biological_assembly'])
# register self.base_molecule in the pipeline context
self.register(self.base_molecule, key='base_molecule')
for molid, molecule in self.molecules.items():
logger.debug(f'Molecule "{molid}": {molecule.num_atoms()} atoms in {molecule.num_residues()} residues; {molecule.num_segments()} segments.')
if 'cif' in self.source_specs.get('file_format', '').lower():
if self.source_specs.get('cif_residue_map_file', ''):
write_residue_map(self.base_molecule.asymmetric_unit.residues.cif_residue_map(), self.source_specs['cif_residue_map_file'])
self.register(Path(self.source_specs['cif_residue_map_file']), key='cif_residue_map', artifact_type=CSVDataFileArtifact)
logger.debug(f'Base molecule has {self.base_molecule.num_atoms()} atoms in {self.base_molecule.num_residues()} residues; {self.base_molecule.num_segments()} segments.')
[docs]
def update_molecule(self):
"""
Updates all segments of the base molecule based on the
current coordinate file. All ssbonds and links are
carried forward. No biological assembly beyond the apparent
asymmetric unit is assumed. This should permit generation
of a new psfgen script based on this new molecule to
recreate it or modify it.
"""
# get the key of the base_molecule
logger.debug(f'{self.taskname} has {len(self.molecules)} entries in its molecules dict')
base_key = 'base'
for k, v in self.molecules.items():
if v == self.base_molecule:
base_key = k
# assert base_key!='UNSET',f'Cannot update a non-existent base molecule'
# get the psf, pdb, xsc from the pipeline context
state: StateArtifacts = self.get_current_artifact('state')
pdb: Path = state.pdb
psf: Path = state.psf
xsc: Path | None = state.xsc
if not (pdb and psf):
raise PestiferBuildError(f'No base_coordinates artifact found, and no prebuilt PDB/PSF/XSC files found in the pipeline context. Cannot ingest base molecule.')
source = {}
source['prebuilt'] = {
'pdb': pdb.name,
'psf': psf.name,
'xsc': xsc.name if xsc else None
}
if hasattr(self, 'chainIDmanager') and hasattr(self, 'objmanager'):
updated_molecule = Molecule(source=source, chainIDmanager=self.chainIDmanager, objmanager=self.objmanager).activate_biological_assembly(0)
else:
updated_molecule = Molecule(source=source).activate_biological_assembly(0)
self.molecules[base_key] = updated_molecule
self.base_molecule = updated_molecule
self.register(self.base_molecule, key='base_molecule')