Source code for pestifer.util.patch

# Author: ChatGPT 5
"""
A simple unified diff parser and applier.
"""

import re
from typing import List, Tuple


[docs]
class PatchApplyError(Exception):
    pass


_hunk_re = re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@')

def _parse_unified_hunks(patch_text: str) -> List[Tuple[int,int,int,int,List[str]]]:
    """
    Returns a list of hunks as tuples:
      (old_start, old_count, new_start, new_count, hunk_lines)
    where hunk_lines include the leading ' ', '+', '-' markers and keep newlines.
    """
    lines = patch_text.splitlines(keepends=True)
    hunks = []
    i = 0

    # Skip file header lines ('---', '+++') and any preamble
    while i < len(lines) and not lines[i].startswith('@@'):
        i += 1

    while i < len(lines):
        m = _hunk_re.match(lines[i])
        if not m:
            # allow interspersed non-hunk lines (e.g., additional headers)
            i += 1
            continue

        old_start = int(m.group(1))
        old_count = int(m.group(2) or '1')
        new_start = int(m.group(3))
        new_count = int(m.group(4) or '1')
        i += 1

        hunk_lines: List[str] = []
        # Collect lines until next '@@' or end. We can't rely only on counts
        # because context lines are mixed with additions/deletions.
        while i < len(lines) and not lines[i].startswith('@@'):
            # Stop when a new file header begins (rare inside a single-file patch)
            if lines[i].startswith(('--- ', '+++ ')) and hunk_lines:
                break
            hunk_lines.append(lines[i])
            i += 1

        hunks.append((old_start, old_count, new_start, new_count, hunk_lines))

    if not hunks:
        raise PatchApplyError("No unified diff hunks ('@@ ... @@') found. "
                              "Make sure this is a unified diff (use `diff -u`).")
    return hunks


[docs]
def apply_unified_diff(content: str, patch_text: str, *, reverse: bool=False) -> str:
    """
    Apply a unified diff to `content` and return the patched string.

    - Set `reverse=True` to apply the patch in reverse (like `patch -R`).
    - Raises PatchApplyError if context does not match.
    """
    # Normalize to line-wise operations while preserving exact line endings
    src_lines = content.splitlines(keepends=True)
    out_lines: List[str] = []
    src_idx = 0  # 0-based index in src_lines

    hunks = _parse_unified_hunks(patch_text)

    for (old_start, old_count, new_start, new_count, hunk_lines) in hunks:
        # Unified diffs use 1-based line numbers for the OLD file
        target_idx = old_start - 1

        # Copy through all unchanged lines before this hunk
        if target_idx < src_idx:
            raise PatchApplyError(
                f"Hunk starting at old line {old_start} overlaps earlier edits."
            )
        out_lines.extend(src_lines[src_idx:target_idx])
        src_idx = target_idx

        # Apply hunk body
        for raw in hunk_lines:
            if raw.startswith('\\ No newline at end of file'):
                # meta line; ignore
                continue

            if reverse:
                # Swap meanings for reverse apply
                head = raw[0]
                if head == '+': head = '-'
                elif head == '-': head = '+'
                line = head + raw[1:]
            else:
                line = raw

            tag = line[:1]
            text = line[1:]

            if tag == ' ':
                # Context: must match input exactly
                if src_idx >= len(src_lines) or src_lines[src_idx] != text:
                    got = src_lines[src_idx] if src_idx < len(src_lines) else '<EOF>'
                    raise PatchApplyError(
                        "Context mismatch applying hunk.\n"
                        f"Expected: {text!r}\nGot     : {got!r}"
                    )
                out_lines.append(src_lines[src_idx])
                src_idx += 1

            elif tag == '-':
                # Deletion: input must match; do not copy to output
                if src_idx >= len(src_lines) or src_lines[src_idx] != text:
                    got = src_lines[src_idx] if src_idx < len(src_lines) else '<EOF>'
                    raise PatchApplyError(
                        "Deletion mismatch applying hunk.\n"
                        f"Expected to remove: {text!r}\nBut found          : {got!r}"
                    )
                src_idx += 1

            elif tag == '+':
                # Addition: copy to output, do not consume input
                out_lines.append(text)

            elif tag in ('@', '-', '+'):
                # Should have been handled; included for completeness
                raise PatchApplyError(f"Unexpected hunk line: {line!r}")

            else:
                # Unknown prefix (could be stray headers); be strict
                raise PatchApplyError(f"Invalid hunk line (no prefix): {line!r}")

    # Copy any remaining source lines
    out_lines.extend(src_lines[src_idx:])
    return ''.join(out_lines)