Source code for pestifer.util.patch

# Author: ChatGPT 5
"""
A simple unified diff parser and applier.
"""

import re
from typing import List, Tuple

[docs] class PatchApplyError(Exception): pass
_hunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@') def _parse_unified_hunks(patch_text: str) -> List[Tuple[int,int,int,int,List[str]]]: """ Returns a list of hunks as tuples: (old_start, old_count, new_start, new_count, hunk_lines) where hunk_lines include the leading ' ', '+', '-' markers and keep newlines. """ lines = patch_text.splitlines(keepends=True) hunks = [] i = 0 # Skip file header lines ('---', '+++') and any preamble while i < len(lines) and not lines[i].startswith('@@'): i += 1 while i < len(lines): m = _hunk_re.match(lines[i]) if not m: # allow interspersed non-hunk lines (e.g., additional headers) i += 1 continue old_start = int(m.group(1)) old_count = int(m.group(2) or '1') new_start = int(m.group(3)) new_count = int(m.group(4) or '1') i += 1 hunk_lines: List[str] = [] # Collect lines until next '@@' or end. We can't rely only on counts # because context lines are mixed with additions/deletions. while i < len(lines) and not lines[i].startswith('@@'): # Stop when a new file header begins (rare inside a single-file patch) if lines[i].startswith(('--- ', '+++ ')) and hunk_lines: break hunk_lines.append(lines[i]) i += 1 hunks.append((old_start, old_count, new_start, new_count, hunk_lines)) if not hunks: raise PatchApplyError("No unified diff hunks ('@@ ... @@') found. " "Make sure this is a unified diff (use `diff -u`).") return hunks
[docs] def apply_unified_diff(content: str, patch_text: str, *, reverse: bool=False) -> str: """ Apply a unified diff to `content` and return the patched string. - Set `reverse=True` to apply the patch in reverse (like `patch -R`). - Raises PatchApplyError if context does not match. """ # Normalize to line-wise operations while preserving exact line endings src_lines = content.splitlines(keepends=True) out_lines: List[str] = [] src_idx = 0 # 0-based index in src_lines hunks = _parse_unified_hunks(patch_text) for (old_start, old_count, new_start, new_count, hunk_lines) in hunks: # Unified diffs use 1-based line numbers for the OLD file target_idx = old_start - 1 # Copy through all unchanged lines before this hunk if target_idx < src_idx: raise PatchApplyError( f"Hunk starting at old line {old_start} overlaps earlier edits." ) out_lines.extend(src_lines[src_idx:target_idx]) src_idx = target_idx # Apply hunk body for raw in hunk_lines: if raw.startswith('\\ No newline at end of file'): # meta line; ignore continue if reverse: # Swap meanings for reverse apply head = raw[0] if head == '+': head = '-' elif head == '-': head = '+' line = head + raw[1:] else: line = raw tag = line[:1] text = line[1:] if tag == ' ': # Context: must match input exactly if src_idx >= len(src_lines) or src_lines[src_idx] != text: got = src_lines[src_idx] if src_idx < len(src_lines) else '<EOF>' raise PatchApplyError( "Context mismatch applying hunk.\n" f"Expected: {text!r}\nGot : {got!r}" ) out_lines.append(src_lines[src_idx]) src_idx += 1 elif tag == '-': # Deletion: input must match; do not copy to output if src_idx >= len(src_lines) or src_lines[src_idx] != text: got = src_lines[src_idx] if src_idx < len(src_lines) else '<EOF>' raise PatchApplyError( "Deletion mismatch applying hunk.\n" f"Expected to remove: {text!r}\nBut found : {got!r}" ) src_idx += 1 elif tag == '+': # Addition: copy to output, do not consume input out_lines.append(text) elif tag in ('@', '-', '+'): # Should have been handled; included for completeness raise PatchApplyError(f"Unexpected hunk line: {line!r}") else: # Unknown prefix (could be stray headers); be strict raise PatchApplyError(f"Invalid hunk line (no prefix): {line!r}") # Copy any remaining source lines out_lines.extend(src_lines[src_idx:]) return ''.join(out_lines)