Source code for pestifer.logparsers.pdb2pqrlogparser

# Author: Cameron F. Abrams, <cfa22@drexel.edu>

"""
PDB2PQR log parsing utility
"""

import logging
import os
import re

import pandas as pd

from .logparser import LogParser

logger = logging.getLogger(__name__)

[docs] class PDB2PQRLogParser(LogParser): """ A class for parsing PDB2PQR log files. This class is a subclass of :class:`LogParser <pestifer.logparsers.logparser.LogParser>` and provides methods for reading, updating, and dumping PDB2PQR log data. Parameters ---------- basename : str The base name for the log parser. This is used to name the output log file. """ section_separator = '-'*80 # file is divided into sections """ The separator used to identify sections in the PDB2PQR log file. """ def __init__(self, basename='pdb2pqr-logparser'): super().__init__() self.processed_separator_idx = [] self.processed_sections = [] self.metadata = {} self.basename = basename self.progress = 0.0
[docs] def update(self, bytes: str): """ Update the PDB2PQR log parser with new bytes of data. This method appends the new bytes to the byte collector and processes the sections in the log file. Parameters ---------- bytes : bytes The bytes to update the log parser with. """ super().update(bytes) separator_idx = [0] + [m.start() for m in re.finditer(self.section_separator, self.byte_collector)] # logger.debug(f'update: found {len(separator_idx)-1} sections in {self.basename}') for i, j in zip(separator_idx[:-1], separator_idx[1:]): if i not in self.processed_separator_idx: self.processed_separator_idx.append(i) self.process_section(self.byte_collector[i+int(i>0)*len(self.section_separator):j])
[docs] def finalize(self): """ Finalize the PDB2PQR log parser by saving the metadata to a YAML file. This method writes the metadata to a YAML file with the base name of the log parser. """ separator_idx=[0]+[m.start() for m in re.finditer(self.section_separator,self.byte_collector)] # logger.debug(f'update: found {len(separator_idx)-1} sections in {self.basename}') for i,j in zip(separator_idx[:-1],separator_idx[1:]): if i not in self.processed_separator_idx: self.processed_separator_idx.append(i) self.process_section(self.byte_collector[i+len(self.section_separator):j])
[docs] def process_section(self, bytes: str): """ Process a section of the PDB2PQR log file. This method identifies the type of section and processes it accordingly. """ # logger.debug(f'process_section: {bytes[:50]}...') if 'SUMMARY OF THIS PREDICTION' in bytes: self.process_summary(bytes)
[docs] def process_summary(self, bytes: str): """ Process the summary section of the PDB2PQR log file. This method extracts information from the summary section and updates the `metadata` attribute with relevant data. Parameters ---------- bytes : bytes The bytes representing the summary section to be processed. This can be a string or bytes object containing the summary data. """ # logger.debug(f'process_summary: {bytes[:50]}...') lines = bytes.split(os.linesep) expected_table = lines[3:] logger.debug(f'process_summary: expected_table begins with: {expected_table[0]}') table_lines = [] for line in expected_table: if len(line.strip()) == 0: logger.debug('process_summary: empty line signals end of table') break tokens = [x.strip() for x in line.split() if x.strip()] resname = tokens[0] resnum = int(tokens[1]) reschain = tokens[2] respka = float(tokens[3]) resmodelpka = float(tokens[4]) if len(tokens) > 5: resatomtype = tokens[5] else: resatomtype = None table_lines.append(dict(resname=resname, resnum=resnum, reschain=reschain, respka=respka, resmodelpka=resmodelpka, resatomtype=resatomtype)) if len(table_lines) > 0: self.metadata['pka_table'] = pd.DataFrame(table_lines)