Source code for pestifer.core.examplemanager

# Author: Cameron F. Abrams <cfa22@drexel.edu>

""" 
A module for managing example input files in pestifer.
This module provides the :class:`ExampleManager` class, which allows users to check out example YAML files,
report the list of examples, and manage example resources.

ExampleManager does not directly manage the example documentation, but it relies on the :class:`SphinxExampleManager` 
class to handle the documentation side of things.
"""

import logging
import os
import shutil
import yaml

from pathlib import Path

from .artifacts import *
from .example import Example, ExampleList
from ..sphinxext.sphinx_examplemanager import SphinxExampleManager
from ..util.formatvalidator import FormatValidator

logger = logging.getLogger(__name__)

[docs] class ExampleManager: """ A class for managing example YAML input files in pestifer. This class provides methods to check out example YAML files, report the list of examples, and manage example resources. Each example has a root folder that lives in `examples_path`. Parameters ---------- example_path : str The path to the directory containing example root folders. docs_path : str The path to the directory containing documentation files. This is passed directly to the :class:`SphinxExampleManager` class. If not provided, the documentation management is not enabled. """ def __init__(self, examples_path: str | Path | None = None, docs_source_path: str | Path | None = None): if not examples_path: raise ValueError('You must provide a path to the directory containing example input files') if not os.path.isdir(examples_path): logger.debug(f'Directory "{examples_path}" does not exist; creating it') os.makedirs(examples_path) self.root_folder_format_validator = FormatValidator(Example.folder_name_format, greedy=True, flexible_ws=True) self.path = Path(examples_path).absolute() self.examples: ExampleList = ExampleList([]) for efolder in self.path.iterdir(): if efolder.is_dir() and self.root_folder_format_validator.fullmatch(efolder.name): example_id = self.root_folder_format_validator.extract(efolder.name).get("example_id") inputsfolder = efolder / Example.inputs_subdir yaml_inputs = list(inputsfolder.glob('*.yaml')) if inputsfolder.is_dir() else [] title = 'No title provided' db_id = None shortname = None if len(yaml_inputs) == 0: logger.warning(f'No YAML files found in {inputsfolder}') elif len(yaml_inputs) > 1: logger.warning(f'Multiple YAML files found in {inputsfolder}: {yaml_inputs}') else: main_input_script = yaml_inputs[0] shortname = main_input_script.name.replace('.yaml', '') title, db_id = Example._get_implied_metadata(main_input_script) if shortname: self.examples.append(Example( example_id=example_id, title=title, shortname=shortname, db_id=db_id )) if docs_source_path: # create the SphinxExampleManager instance if docs_source_path is provided self.sphinx_example_manager = SphinxExampleManager(docs_source_path=Path(docs_source_path).absolute()) else: self.sphinx_example_manager = None
[docs] def examplefolderpath(self, example: Example) -> Path: return self.path / example.rootfolderpath
[docs] def scriptpath(self, example: Example) -> Path: return self.path / example.scriptpath
[docs] def inputspath(self, example: Example) -> Path: return self.path / example.inputspath
[docs] def outputspath(self, example: Example) -> Path: return self.path / example.outputspath
[docs] def checkout_example(self, example_id: int) -> Example: """ Copy example YAML file and associated companion files by example ID to the current working directory. Parameters ---------- example_id : int The ID of the example to check out. Returns ------- Example The example that was checked out. Raises ------ IndexError If the index is out of range for the examples list. FileNotFoundError If the example YAML file does not exist in the specified path. """ example = self.examples.get_example_by_example_id(example_id) if not example: raise IndexError(f'Example with ID {example_id} not found') with open(example.scriptname, 'w') as f: f.write(self.scriptpath(example).read_text()) for aux_path in self.inputspath(example).glob('*'): if aux_path.name != example.scriptname and aux_path.is_file(): shutil.copy(aux_path, os.getcwd()) logger.info(f'Checked out example {example_id} from {self.path.name} to current working directory {os.getcwd()}') return example
[docs] def new_example_yaml(self, db_id: str = 'ABCD', build_type: str = 'minimal', outputfilename: str = None, title: str = ''): """ Generate a new example YAML file based on an existing example template. The id can be a 4-letter PDB ID or an Alphafold/UNIPROT ID starting with "P". The build_type can be either 'minimal' or 'full', which determines whether the generated YAML file contains only the psfgen task or all tasks including termination. Parameters ---------- db_id : str, optional The ID for the new example YAML file. It can be a 4-letter PDB ID or an Alphafold/UNIPROT ID starting with "P". Default is 'ABCD'. build_type : str, optional The type of build for the new example YAML file. It can be either 'minimal' or 'full'. Default is 'minimal'. """ if len(db_id) == 4: # assume a PDB id idtype = 'PDB' elif db_id.startswith('P') or db_id.startswith('O'): # assume an alphafold id by uniprot id idtype = 'Alphafold' else: raise ValueError(f'Invalid id {db_id} for new example YAML; must be a 4-letter PDB ID or an Alphafold/UNIPROT ID starting with "P"') example_yaml_path = self.scriptpath(self.examples[0]) with open(example_yaml_path, 'r') as f: try: example_config = yaml.safe_load(f) except yaml.YAMLError as e: raise ValueError(f'Invalid YAML file {example_yaml_path}: {e}') example_config['title'] = title if title else f'New template pestifer config for id {db_id} ({idtype})' if build_type == 'minimal': fetch_task = example_config['tasks'][0] example_config['tasks'] = [fetch_task, example_config['tasks'][1]] # keep only the fetch task if idtype == 'PDB' or idtype == 'Alphafold': example_config['tasks'][0]['fetch']['sourceID'] = db_id if build_type == 'full': example_config['tasks'][-1]['terminate']['basename'] = f'my_{db_id.lower()}' example_config['tasks'][-1]['terminate']['package']['basename'] = f'my_{db_id.lower()}' if outputfilename: output_yaml = outputfilename else: output_yaml = f'{db_id.lower()}.yaml' with open(output_yaml, 'w') as f: yaml.dump(example_config, f, default_flow_style=False, sort_keys=False) logger.info(f'Generated new example YAML file {output_yaml} for id {db_id} ({idtype})')
[docs] def report_examples(self, header=False, formatter=r'{:>7s} {:>8s} {:<30s} {}'): """ Generate a report of the available examples in the examples list. Parameters ---------- header : bool, optional If True, include a header in the report. Default is False. formatter : str, optional A format string for the report. Default is a string that formats the index, name, and title of each example. Default is ``r'{:>7s} {:<30s} {}'``. Returns ------- str A report of the available examples in the examples list. """ if not self.examples: return 'No examples available' if header: report_lines = [formatter.format('ID','DBID','Name','Title')+'\n'] else: report_lines = [] for i, example in enumerate(self.examples): report_lines.append(example.report_line(formatter=formatter)+'\n') return ''.join(report_lines)
[docs] def delete_example(self, example_id: int): """ Delete an example from the examples list by its ID and returns the instance. Parameters ---------- example_id : int The example ID of the example to delete. Returns ------- Example The deleted example instance. Raises ------ IndexError If the index is out of range for the examples list. FileNotFoundError If the example file does not exist in the specified path. """ example = self.examples.get_example_by_example_id(example_id) if not example: raise IndexError(f'Example ID {example_id} is out of range for examples list of length {len(self.examples)}') self.examples.remove(example) # remove the example from the list example_folder_path = os.path.join(self.path, example.rootfolderpath) if os.path.isdir(example_folder_path): logger.debug(f'Deleting example folder "{example_folder_path}"') shutil.rmtree(example_folder_path) else: raise FileNotFoundError(f'Example folder "{example_folder_path}" does not exist') if self.sphinx_example_manager: self.sphinx_example_manager.delete_example(example) logger.info(f'Deleted example {example_id}: {example.shortname}') return example
[docs] def checkin_example(self, example: Example, overwrite: bool = False): """ Check in an Example instance by copying its YAML file and companion files to the appropriate example folder, which is created if it doesn't already exist. This will overwrite the existing files. If any file referenced by the example does not exist in the current working directory, a warning is logged but no action taken. Parameters ---------- example : Example The Example instance defining the destination folder and files to copy. overwrite : bool, optional If True, overwrite existing files in the example folder. Default is False. """ example_folder = self.examplefolderpath(example) example_inputs_subfolder = self.inputspath(example) example_outputs_subfolder = self.outputspath(example) user_yaml_file_path = Path(example.shortname + '.yaml') # correct; the name attribute should never have an extension if not example_folder.is_dir(): # we are checking in a new example logger.debug(f'Creating example folder "{example_folder}"') example_folder.mkdir() example_inputs_subfolder.mkdir() example_outputs_subfolder.mkdir() if not user_yaml_file_path.is_file(): raise FileNotFoundError(f'Example YAML file {user_yaml_file_path.name} does not exist in your current working directory {os.getcwd()}') shutil.copy(user_yaml_file_path, example_inputs_subfolder) for f in example.auxiliary_inputs: if os.path.isfile(f): shutil.copy(f, example_inputs_subfolder) else: logger.warning(f'Declared auxiliary input file {f} does not exist in {os.getcwd()}') for f in example.outputs: if os.path.isfile(f): shutil.copy(f, example_outputs_subfolder) else: logger.warning(f'Declared output file {f} does not exist in {os.getcwd()}') else: existing_yaml_file_path = self.scriptpath(example) if overwrite and user_yaml_file_path.is_file(): shutil.copy(user_yaml_file_path, example_inputs_subfolder) for f in example.auxiliary_inputs: existing_aux_path = example_inputs_subfolder / f if overwrite and existing_aux_path.is_file(): os.remove(existing_aux_path) shutil.copy(f, example_inputs_subfolder) elif not existing_yaml_file_path.exists(): shutil.copy(f, example_inputs_subfolder) for f in example.outputs: existing_output_path = example_outputs_subfolder / f if overwrite and existing_output_path.is_file(): os.remove(existing_output_path) shutil.copy(f, example_outputs_subfolder) elif not existing_output_path.exists(): shutil.copy(f, example_outputs_subfolder)
[docs] def append_example(self, example_id: int, scriptname: str, title: str = '', db_id: str = '', author_name: str = '', author_email: str = '', auxiliary_inputs: list = [], outputs: list = []): """ Create a new Example from keyword arguments and add it to the examples list. Parameters ---------- example_id : int The unique integer ID of the example. scriptname : str The name of the example script file to insert. This should be a valid file path since a new Example is created. title : str A title of the example; if not provided, it is extracted from the script. db_id : str The structure/sequence database ID associated with the example. author_name : str The name of the author of the example. author_email : str The email of the author of the example. auxiliary_inputs: list[str] A list of auxiliary input files associated with the example. outputs: list[str] A list of output files associated with the example. Returns ------- Example: The newly created Example object. Raises ------ FileNotFoundError If the example scriptfile does not exist in the CWD. """ if not os.path.isfile(scriptname): raise FileNotFoundError(f'Example scriptfile {scriptname} does not exist in the CWD.') new_example = Example( example_id=example_id, shortname=os.path.splitext(scriptname)[0], title=title, db_id=db_id, author_name=author_name, author_email=author_email, auxiliary_inputs=auxiliary_inputs, outputs=outputs ) self.checkin_example(new_example) # check in the new example by copying its YAML file and companion files to the appropriate example folder self.examples.append(new_example) if self.sphinx_example_manager: self.sphinx_example_manager.append_example(new_example) logger.info(f'Added new example {new_example.example_id}: {new_example.shortname}') return new_example
[docs] def update_example(self, example_id: int, shortname: str = '', title: str = '', db_id: str = '', author_name: str = '', author_email: str = '', auxiliary_inputs: list = [], outputs: list = [], skip_sphinx: bool = False): """ Update an existing example in the examples list by its unique index. Parameters ---------- example_id : int The unique ID of the example to update. shortname : str The short name of the example to update (without extension). title : str A title of the example; overrides value of 'title' in the <name>.yaml if it exists. pdbID : str The PDB ID (or Alphafold ID) associated with the example; overrides value of 'id' or 'alphafold' in the <name>.yaml if it exists. author_name : str The name of the author of the example; overrides the name in the # Author line of <name>.yaml if it exists. author_email : str The email of the author of the example; overrides the email in the # Author line of <name>.yaml if it exists. companion_files : list, optional A list of companion files associated with the example; defaults to an empty list. skip_sphinx : bool, optional If True, skip updating the Sphinx documentation for this example. Returns ------- Example or None The updated Example object if the update was successful, or None if the example was not found. Raises ------ FileNotFoundError If the example file does not exist at the specified path. """ current_example: Example = self.examples.get_example_by_example_id(example_id) if not current_example: logger.warning(f'No example found with ID {example_id}') return None if shortname: user_script = Path(f'{shortname}.yaml') # in cwd if user_script.is_file(): # user is providing a new script for this example self.scriptpath(current_example).unlink() # remove the old script shutil.copy(user_script, self.inputspath(current_example)) user_implied_title, user_implied_db_id = Example._get_implied_metadata(user_script) current_example.title = user_implied_title current_example.db_id = user_implied_db_id else: # user is not providing a new script, they just want to change the shortname self.scriptpath(current_example).rename(self.inputspath(current_example) / f'{shortname}.yaml') current_example.shortname = shortname if title: current_example.title = title if db_id: current_example.db_id = db_id if author_name: current_example.author_name = author_name if author_email: current_example.author_email = author_email if auxiliary_inputs: if current_example.auxiliary_inputs is not None: for f in current_example.auxiliary_inputs: Path(f).unlink(missing_ok=True) # remove old auxiliary inputs current_example.auxiliary_inputs = auxiliary_inputs for f in auxiliary_inputs: if os.path.isfile(f): shutil.copy(f, self.inputspath(current_example)) if outputs: if current_example.outputs is not None: for f in current_example.outputs: Path(f).unlink(missing_ok=True) current_example.outputs = outputs for f in outputs: if os.path.isfile(f): shutil.copy(f, self.outputspath(current_example)) if self.sphinx_example_manager and not skip_sphinx: self.sphinx_example_manager.update_example(example_id, current_example) logger.info(f'Updated example {example_id}: {current_example.shortname}') return current_example
[docs] def rename_example(self, example_id: int, new_shortname: str): """ Rename an example in the examples list by its ID. Parameters ---------- index : int The ID of the example to rename. new_shortname : str The new short name for the example. Returns ------- Example The renamed Example object. """ return self.update_example(example_id, shortname=new_shortname) # use update_example to handle renaming logic
[docs] def set_example_author(self, example_id: int, author_name: str, author_email: str): """ Set the author information for an example in the examples list by its ID. Parameters ---------- example_id : int The ID of the example to set the author for. author_name : str The name of the author. author_email : str The email address of the author. Returns ------- Example The updated Example object with the new author information. """ return self.update_example(example_id, author_name=author_name, author_email=author_email)
[docs] def clone(self, newpath: str | Path) -> 'ExampleManager': """ Spawn a new ExampleManager instance rooted at a directory different from the current ExampleManager's path (newpath). The new ExampleManager instance will contain copies of all examples in the current ExampleManager's examples list. Parameters ---------- subpath : str or Path The subdirectory path relative to the current ExampleManager's path. Returns ------- ExampleManager A new ExampleManager instance rooted at the specified subdirectory. """ new_manager = ExampleManager(examples_path=newpath, docs_source_path=None) for example in self.examples: new_manager.copy_from(self, example) return new_manager
[docs] def copy_from(self, other: 'ExampleManager', example: Example): """ Copy an example from another ExampleManager instance to this instance. Parameters ---------- other : ExampleManager The other ExampleManager instance to copy the example from. example : Example The Example instance to copy. overwrite : bool, optional If True, overwrite existing files in the example folder. Default is False. """ current_ids = [ex.example_id for ex in self.examples.data] if example.example_id in current_ids: raise ValueError(f'Example ID {example.example_id} already exists in the destination ExampleManager') source_example_folder = other.examplefolderpath(example) dest_example_folder = self.examplefolderpath(example) shutil.copytree(source_example_folder, dest_example_folder, dirs_exist_ok=True) # create the outputs subfolder regardless of whether there are any outputs to copy dest_outputs_path = self.outputspath(example) dest_outputs_path.mkdir(parents=True, exist_ok=True) if example.outputs: source_outputs_path = other.outputspath(example) for output_file in example.outputs: source_file_path = source_outputs_path / output_file if source_file_path.is_file(): shutil.copy(source_file_path, dest_outputs_path) else: logger.warning(f'Output file {output_file} does not exist in source ExampleManager; skipping copy') self.examples.append(example)