Source code for pestifer.logparsers.pdb2pqrlogparser
# Author: Cameron F. Abrams, <cfa22@drexel.edu>
"""
PDB2PQR log parsing utility
"""
import logging
import os
import re
import pandas as pd
from .logparser import LogParser
logger = logging.getLogger(__name__)
[docs]
class PDB2PQRLogParser(LogParser):
"""
A class for parsing PDB2PQR log files. This class is a subclass of :class:`LogParser <pestifer.logparsers.logparser.LogParser>` and provides methods for reading, updating, and dumping PDB2PQR log data.
Parameters
----------
basename : str
The base name for the log parser. This is used to name the output log file.
"""
section_separator = '-'*80 # file is divided into sections
"""
The separator used to identify sections in the PDB2PQR log file.
"""
def __init__(self, basename='pdb2pqr-logparser'):
super().__init__()
self.processed_separator_idx = []
self.processed_sections = []
self.metadata = {}
self.basename = basename
self.progress = 0.0
[docs]
def update(self, bytes: str):
"""
Update the PDB2PQR log parser with new bytes of data. This method appends the new bytes to the byte collector and processes the sections in the log file.
Parameters
----------
bytes : bytes
The bytes to update the log parser with.
"""
super().update(bytes)
separator_idx = [0] + [m.start() for m in re.finditer(self.section_separator, self.byte_collector)]
# logger.debug(f'update: found {len(separator_idx)-1} sections in {self.basename}')
for i, j in zip(separator_idx[:-1], separator_idx[1:]):
if i not in self.processed_separator_idx:
self.processed_separator_idx.append(i)
self.process_section(self.byte_collector[i+int(i>0)*len(self.section_separator):j])
[docs]
def finalize(self):
"""
Finalize the PDB2PQR log parser by saving the metadata to a YAML file. This method writes the metadata to a YAML file with the base name of the log parser.
"""
separator_idx=[0]+[m.start() for m in re.finditer(self.section_separator,self.byte_collector)]
# logger.debug(f'update: found {len(separator_idx)-1} sections in {self.basename}')
for i,j in zip(separator_idx[:-1],separator_idx[1:]):
if i not in self.processed_separator_idx:
self.processed_separator_idx.append(i)
self.process_section(self.byte_collector[i+len(self.section_separator):j])
[docs]
def process_section(self, bytes: str):
"""
Process a section of the PDB2PQR log file. This method identifies the type of section and processes it accordingly.
"""
# logger.debug(f'process_section: {bytes[:50]}...')
if 'SUMMARY OF THIS PREDICTION' in bytes:
self.process_summary(bytes)
[docs]
def process_summary(self, bytes: str):
"""
Process the summary section of the PDB2PQR log file. This method extracts information from the summary section and updates the `metadata` attribute with relevant data.
Parameters
----------
bytes : bytes
The bytes representing the summary section to be processed. This can be a string or bytes object
containing the summary data.
"""
# logger.debug(f'process_summary: {bytes[:50]}...')
lines = bytes.split(os.linesep)
expected_table = lines[3:]
logger.debug(f'process_summary: expected_table begins with: {expected_table[0]}')
table_lines = []
for line in expected_table:
if len(line.strip()) == 0:
logger.debug('process_summary: empty line signals end of table')
break
tokens = [x.strip() for x in line.split() if x.strip()]
resname = tokens[0]
resnum = int(tokens[1])
reschain = tokens[2]
respka = float(tokens[3])
resmodelpka = float(tokens[4])
if len(tokens) > 5:
resatomtype = tokens[5]
else:
resatomtype = None
table_lines.append(dict(resname=resname, resnum=resnum, reschain=reschain, respka=respka, resmodelpka=resmodelpka, resatomtype=resatomtype))
if len(table_lines) > 0:
self.metadata['pka_table'] = pd.DataFrame(table_lines)