add static analysis + deliberation pipeline

2025-03-09 11:14:29 +00:00
parent f22ca1d5bd
commit 2bb7e03871
13 changed files with 4037 additions and 0 deletions
@@ -0,0 +1,24 @@
 """Static analysis and diff parsing for Arbiter."""
 from arbiter.analysis.diff import DiffFile, DiffHunk, DiffLine, DiffParser, LineType, ParsedDiff
 from arbiter.analysis.static import (
    StaticAnalysisConfig,
    StaticAnalysisResult,
    StaticAnalysisRunner,
    StaticFinding,
    run_static_analysis,
 )
 __all__ = [
    "DiffFile",
    "DiffHunk",
    "DiffLine",
    "DiffParser",
    "LineType",
    "ParsedDiff",
    "StaticAnalysisConfig",
    "StaticAnalysisResult",
    "StaticAnalysisRunner",
    "StaticFinding",
    "run_static_analysis",
 ]
@@ -0,0 +1,283 @@
 """Unified diff parser with line mapping."""
 import re
 from enum import StrEnum
 from pydantic import BaseModel, Field
 class LineType(StrEnum):
    """Type of line in a diff."""
    ADDED = "added"
    REMOVED = "removed"
    CONTEXT = "context"
 class DiffLine(BaseModel):
    """A single line in a diff hunk."""
    content: str = Field(description="The line content without prefix")
    line_type: LineType = Field(description="Type of line (added/removed/context)")
    old_line: int | None = Field(default=None, description="Line number in original file")
    new_line: int | None = Field(default=None, description="Line number in new file")
 class DiffHunk(BaseModel):
    """A hunk within a diff file."""
    old_start: int = Field(description="Starting line in original file")
    old_count: int = Field(description="Number of lines from original file")
    new_start: int = Field(description="Starting line in new file")
    new_count: int = Field(description="Number of lines in new file")
    header: str = Field(default="", description="Optional function/class context from @@ line")
    lines: list[DiffLine] = Field(default_factory=list, description="Lines in this hunk")
    def get_added_lines(self) -> list[DiffLine]:
        return [line for line in self.lines if line.line_type == LineType.ADDED]
    def get_removed_lines(self) -> list[DiffLine]:
        return [line for line in self.lines if line.line_type == LineType.REMOVED]
 class DiffFile(BaseModel):
    """A single file's diff."""
    old_path: str = Field(description="Original file path (a/...)")
    new_path: str = Field(description="New file path (b/...)")
    hunks: list[DiffHunk] = Field(default_factory=list, description="Hunks in this file")
    is_new: bool = Field(default=False, description="True if file is newly created")
    is_deleted: bool = Field(default=False, description="True if file is deleted")
    is_binary: bool = Field(default=False, description="True if binary file")
    @property
    def path(self) -> str:
        if self.is_deleted:
            return self.old_path
        return self.new_path
    def get_added_line_numbers(self) -> list[int]:
        numbers = []
        for hunk in self.hunks:
            for line in hunk.lines:
                if line.line_type == LineType.ADDED and line.new_line is not None:
                    numbers.append(line.new_line)
        return numbers
    def get_changed_line_range(self) -> tuple[int, int] | None:
        added_lines = self.get_added_line_numbers()
        if not added_lines:
            return None
        return (min(added_lines), max(added_lines))
    def line_in_diff(self, line_number: int) -> bool:
        for hunk in self.hunks:
            if hunk.new_start <= line_number < hunk.new_start + hunk.new_count:
                return True
        return False
    def map_new_to_old(self, new_line: int) -> int | None:
        for hunk in self.hunks:
            for line in hunk.lines:
                if line.new_line == new_line and line.old_line is not None:
                    return line.old_line
        return None
 class ParsedDiff(BaseModel):
    """Complete parsed diff containing multiple files."""
    files: list[DiffFile] = Field(default_factory=list, description="Files in this diff")
    raw_diff: str = Field(default="", description="Original diff text")
    def get_file(self, path: str) -> DiffFile | None:
        for file in self.files:
            if file.path == path or file.old_path == path or file.new_path == path:
                return file
        return None
    def get_changed_files(self) -> list[str]:
        return [f.path for f in self.files]
    def get_added_files(self) -> list[str]:
        return [f.path for f in self.files if f.is_new]
    def get_deleted_files(self) -> list[str]:
        return [f.path for f in self.files if f.is_deleted]
 class DiffParser:
    """Parser for unified diff format."""
    # Regex patterns for diff parsing
    FILE_HEADER_PATTERN = re.compile(r"^diff --git a/(.*) b/(.*)$")
    OLD_FILE_PATTERN = re.compile(r"^--- (?:a/)?(.*)$")
    NEW_FILE_PATTERN = re.compile(r"^\+\+\+ (?:b/)?(.*)$")
    HUNK_HEADER_PATTERN = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)$")
    BINARY_FILE_PATTERN = re.compile(r"^Binary files .* differ$")
    NEW_FILE_MODE_PATTERN = re.compile(r"^new file mode")
    DELETED_FILE_MODE_PATTERN = re.compile(r"^deleted file mode")
    def parse(self, diff_text: str) -> ParsedDiff:
        """Parse a unified diff string into structured data."""
        if not diff_text or not diff_text.strip():
            return ParsedDiff(raw_diff=diff_text)
        files: list[DiffFile] = []
        current_file: DiffFile | None = None
        current_hunk: DiffHunk | None = None
        lines = diff_text.splitlines()
        i = 0
        while i < len(lines):
            line = lines[i]
            # Check for new file header (diff --git)
            file_match = self.FILE_HEADER_PATTERN.match(line)
            if file_match:
                # Save previous file if exists
                if current_file is not None:
                    if current_hunk is not None:
                        current_file.hunks.append(current_hunk)
                    files.append(current_file)
                current_file = DiffFile(
                    old_path=file_match.group(1),
                    new_path=file_match.group(2),
                )
                current_hunk = None
                i += 1
                continue
            # Check for new/deleted file mode
            if current_file is not None:
                if self.NEW_FILE_MODE_PATTERN.match(line):
                    current_file.is_new = True
                    i += 1
                    continue
                if self.DELETED_FILE_MODE_PATTERN.match(line):
                    current_file.is_deleted = True
                    i += 1
                    continue
            # Check for binary file
            if current_file is not None and self.BINARY_FILE_PATTERN.match(line):
                current_file.is_binary = True
                i += 1
                continue
            # Check for old file path (--- a/...)
            old_match = self.OLD_FILE_PATTERN.match(line)
            if old_match:
                if current_file is not None:
                    # Update old_path if it's different
                    path = old_match.group(1)
                    if path != "/dev/null":
                        current_file.old_path = path
                    else:
                        current_file.is_new = True
                i += 1
                continue
            # Check for new file path (+++ b/...)
            new_match = self.NEW_FILE_PATTERN.match(line)
            if new_match:
                if current_file is not None:
                    path = new_match.group(1)
                    if path != "/dev/null":
                        current_file.new_path = path
                    else:
                        current_file.is_deleted = True
                i += 1
                continue
            # Check for hunk header (@@ ... @@)
            hunk_match = self.HUNK_HEADER_PATTERN.match(line)
            if hunk_match:
                # Save previous hunk if exists
                if current_file is not None and current_hunk is not None:
                    current_file.hunks.append(current_hunk)
                old_start = int(hunk_match.group(1))
                old_count = int(hunk_match.group(2)) if hunk_match.group(2) else 1
                new_start = int(hunk_match.group(3))
                new_count = int(hunk_match.group(4)) if hunk_match.group(4) else 1
                header = hunk_match.group(5).strip()
                current_hunk = DiffHunk(
                    old_start=old_start,
                    old_count=old_count,
                    new_start=new_start,
                    new_count=new_count,
                    header=header,
                )
                i += 1
                continue
            # Parse hunk lines
            if current_hunk is not None and line:
                diff_line = self._parse_hunk_line(line, current_hunk)
                if diff_line is not None:
                    current_hunk.lines.append(diff_line)
            i += 1
        # Don't forget the last file and hunk
        if current_file is not None:
            if current_hunk is not None:
                current_file.hunks.append(current_hunk)
            files.append(current_file)
        return ParsedDiff(files=files, raw_diff=diff_text)
    def _parse_hunk_line(self, line: str, hunk: DiffHunk) -> DiffLine | None:
        """Parse a single line within a hunk."""
        if not line:
            return None
        prefix = line[0] if line else " "
        content = line[1:] if len(line) > 1 else ""
        # Calculate current line numbers based on existing lines
        old_line = hunk.old_start
        new_line = hunk.new_start
        for existing in hunk.lines:
            if existing.line_type in (LineType.CONTEXT, LineType.REMOVED):
                old_line += 1
            if existing.line_type in (LineType.CONTEXT, LineType.ADDED):
                new_line += 1
        if prefix == "+":
            return DiffLine(
                content=content,
                line_type=LineType.ADDED,
                old_line=None,
                new_line=new_line,
            )
        elif prefix == "-":
            return DiffLine(
                content=content,
                line_type=LineType.REMOVED,
                old_line=old_line,
                new_line=None,
            )
        elif prefix == " ":
            return DiffLine(
                content=content,
                line_type=LineType.CONTEXT,
                old_line=old_line,
                new_line=new_line,
            )
        elif prefix == "\\":
            # "\ No newline at end of file" - skip
            return None
        else:
            # Unknown prefix, treat as context
            return DiffLine(
                content=line,
                line_type=LineType.CONTEXT,
                old_line=old_line,
                new_line=new_line,
            )
@@ -0,0 +1,519 @@
 """Static analysis runners for ruff, mypy, bandit, and radon."""
 import asyncio
 import json
 import re
 from pathlib import Path
 from typing import ClassVar
 from pydantic import BaseModel, Field
 from arbiter.analysis.diff import DiffFile, ParsedDiff
 from arbiter.models.enums import AgentName, Severity
 from arbiter.models.finding import Finding
 class StaticAnalysisConfig(BaseModel):
    """Configuration for static analysis tools."""
    ruff_enabled: bool = Field(default=True, description="Run ruff linter")
    mypy_enabled: bool = Field(default=True, description="Run mypy type checker")
    bandit_enabled: bool = Field(default=True, description="Run bandit security scanner")
    radon_enabled: bool = Field(default=True, description="Run radon complexity analyzer")
    ruff_config: str | None = Field(default=None, description="Path to ruff config file")
    mypy_config: str | None = Field(default=None, description="Path to mypy config file")
 class StaticFinding(BaseModel):
    """A finding from static analysis tools."""
    tool: str = Field(description="Tool that produced this finding (ruff, mypy, bandit, radon)")
    file: str = Field(description="File path")
    line: int = Field(description="Line number")
    column: int | None = Field(default=None, description="Column number")
    code: str = Field(description="Error/warning code (e.g., E501, B303)")
    message: str = Field(description="Description of the issue")
    severity: Severity = Field(description="Mapped severity level")
    end_line: int | None = Field(default=None, description="End line for multi-line issues")
    end_column: int | None = Field(default=None, description="End column")
    fix_available: bool = Field(default=False, description="Whether an auto-fix is available")
    extra: dict[str, str | int | None] | None = Field(
        default=None, description="Tool-specific extra data"
    )
 class StaticAnalysisResult(BaseModel):
    """Aggregated results from all static analysis tools."""
    findings: list[StaticFinding] = Field(default_factory=list)
    tool_errors: dict[str, str] = Field(
        default_factory=dict, description="Errors from tools that failed"
    )
    tools_run: list[str] = Field(default_factory=list, description="Tools that ran successfully")
 class StaticAnalysisRunner:
    """Runs static analysis tools on diff files."""
    # Severity mapping for ruff codes
    RUFF_SEVERITY_MAP: ClassVar[dict[str, Severity]] = {
        # Security-related (B = bandit-like checks in ruff)
        "S": Severity.HIGH,  # Security issues
        # Errors
        "E": Severity.MEDIUM,
        "F": Severity.MEDIUM,  # Pyflakes
        "W": Severity.LOW,  # Warnings
        # Style
        "C": Severity.LOW,  # McCabe complexity, conventions
        "N": Severity.LOW,  # pep8-naming
        "D": Severity.INFO,  # pydocstyle
        "I": Severity.INFO,  # isort
        # Type annotations
        "ANN": Severity.LOW,
        # Builtins
        "A": Severity.MEDIUM,
        # Bugbear
        "B": Severity.MEDIUM,
    }
    # Bandit severity mapping
    BANDIT_SEVERITY_MAP: ClassVar[dict[str, Severity]] = {
        "HIGH": Severity.HIGH,
        "MEDIUM": Severity.MEDIUM,
        "LOW": Severity.LOW,
    }
    # Radon complexity thresholds (cyclomatic complexity)
    RADON_THRESHOLDS: ClassVar[dict[str, Severity]] = {
        "F": Severity.CRITICAL,  # Very high risk (>40)
        "E": Severity.HIGH,  # High risk (31-40)
        "D": Severity.HIGH,  # More than moderate (21-30)
        "C": Severity.MEDIUM,  # Moderate (11-20)
        "B": Severity.LOW,  # Low (6-10)
        "A": Severity.INFO,  # Simple (1-5)
    }
    def __init__(self, config: StaticAnalysisConfig | None = None) -> None:
        self.config = config or StaticAnalysisConfig()
    async def run(self, diff: ParsedDiff, work_dir: Path | None = None) -> StaticAnalysisResult:
        """Run all enabled static analysis tools on the diff."""
        result = StaticAnalysisResult()
        # If no work_dir provided, we can't run analysis on actual files
        # In that case, return empty result
        if work_dir is None:
            return result
        # Collect Python files from the diff
        python_files = [
            f for f in diff.files if f.path.endswith(".py") and not f.is_deleted and not f.is_binary
        ]
        if not python_files:
            return result
        # Run tools in parallel
        tasks = []
        if self.config.ruff_enabled:
            tasks.append(self._run_ruff(python_files, work_dir, diff))
        if self.config.mypy_enabled:
            tasks.append(self._run_mypy(python_files, work_dir, diff))
        if self.config.bandit_enabled:
            tasks.append(self._run_bandit(python_files, work_dir, diff))
        if self.config.radon_enabled:
            tasks.append(self._run_radon(python_files, work_dir, diff))
        results = await asyncio.gather(*tasks, return_exceptions=True)
        tool_names = []
        if self.config.ruff_enabled:
            tool_names.append("ruff")
        if self.config.mypy_enabled:
            tool_names.append("mypy")
        if self.config.bandit_enabled:
            tool_names.append("bandit")
        if self.config.radon_enabled:
            tool_names.append("radon")
        for tool_name, tool_result in zip(tool_names, results, strict=False):
            if isinstance(tool_result, BaseException):
                result.tool_errors[tool_name] = str(tool_result)
            elif isinstance(tool_result, list):
                result.findings.extend(tool_result)
                result.tools_run.append(tool_name)
        return result
    async def _run_ruff(
        self, files: list[DiffFile], work_dir: Path, diff: ParsedDiff
    ) -> list[StaticFinding]:
        """Run ruff linter and parse JSON output."""
        file_paths = [str(work_dir / f.path) for f in files if (work_dir / f.path).exists()]
        if not file_paths:
            return []
        cmd = ["ruff", "check", "--output-format=json"]
        if self.config.ruff_config:
            cmd.extend(["--config", self.config.ruff_config])
        cmd.extend(file_paths)
        try:
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
                cwd=work_dir,
            )
            stdout, _ = await proc.communicate()
            # ruff returns non-zero if issues found, which is expected
            if stdout:
                return self._parse_ruff_output(stdout.decode(), diff, work_dir)
            return []
        except FileNotFoundError:
            raise RuntimeError("ruff not found. Install with: pip install ruff") from None
    def _parse_ruff_output(
        self, output: str, diff: ParsedDiff, work_dir: Path
    ) -> list[StaticFinding]:
        """Parse ruff JSON output into StaticFindings."""
        if not output.strip():
            return []
        try:
            issues = json.loads(output)
        except json.JSONDecodeError:
            return []
        findings = []
        for issue in issues:
            file_path = issue.get("filename", "")
            # Make path relative to work_dir
            try:
                rel_path = Path(file_path).relative_to(work_dir)
                file_path = str(rel_path)
            except ValueError:
                pass
            line = issue.get("location", {}).get("row", 1)
            # Only include findings that are in the diff
            diff_file = diff.get_file(file_path)
            if diff_file is None or not diff_file.line_in_diff(line):
                continue
            code = issue.get("code", "")
            severity = self._ruff_code_to_severity(code)
            findings.append(
                StaticFinding(
                    tool="ruff",
                    file=file_path,
                    line=line,
                    column=issue.get("location", {}).get("column"),
                    code=code,
                    message=issue.get("message", ""),
                    severity=severity,
                    end_line=issue.get("end_location", {}).get("row"),
                    end_column=issue.get("end_location", {}).get("column"),
                    fix_available=issue.get("fix") is not None,
                    extra={"url": issue.get("url")},
                )
            )
        return findings
    def _ruff_code_to_severity(self, code: str) -> Severity:
        """Map ruff error code to severity level."""
        if not code:
            return Severity.MEDIUM
        # Check prefixes from longest to shortest
        for prefix in sorted(self.RUFF_SEVERITY_MAP.keys(), key=len, reverse=True):
            if code.startswith(prefix):
                return self.RUFF_SEVERITY_MAP[prefix]
        return Severity.MEDIUM
    async def _run_mypy(
        self, files: list[DiffFile], work_dir: Path, diff: ParsedDiff
    ) -> list[StaticFinding]:
        """Run mypy type checker and parse output."""
        file_paths = [str(work_dir / f.path) for f in files if (work_dir / f.path).exists()]
        if not file_paths:
            return []
        cmd = ["mypy", "--no-error-summary", "--show-column-numbers"]
        if self.config.mypy_config:
            cmd.extend(["--config-file", self.config.mypy_config])
        cmd.extend(file_paths)
        try:
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
                cwd=work_dir,
            )
            stdout, _ = await proc.communicate()
            if stdout:
                return self._parse_mypy_output(stdout.decode(), diff, work_dir)
            return []
        except FileNotFoundError:
            raise RuntimeError("mypy not found. Install with: pip install mypy") from None
    def _parse_mypy_output(
        self, output: str, diff: ParsedDiff, work_dir: Path
    ) -> list[StaticFinding]:
        """Parse mypy output into StaticFindings."""
        findings = []
        # mypy output format: file:line:column: severity: message
        pattern = re.compile(r"^(.+?):(\d+):(\d+): (\w+): (.+)$")
        for line in output.strip().split("\n"):
            match = pattern.match(line)
            if not match:
                continue
            file_path, line_num, col, level, message = match.groups()
            # Make path relative
            try:
                rel_path = Path(file_path).relative_to(work_dir)
                file_path = str(rel_path)
            except ValueError:
                pass
            line_num = int(line_num)
            # Only include findings in the diff
            diff_file = diff.get_file(file_path)
            if diff_file is None or not diff_file.line_in_diff(line_num):
                continue
            # Map mypy severity
            if level == "error":
                severity = Severity.MEDIUM
            elif level == "warning":
                severity = Severity.LOW
            else:
                severity = Severity.INFO
            findings.append(
                StaticFinding(
                    tool="mypy",
                    file=file_path,
                    line=line_num,
                    column=int(col),
                    code=f"mypy-{level}",
                    message=message,
                    severity=severity,
                )
            )
        return findings
    async def _run_bandit(
        self, files: list[DiffFile], work_dir: Path, diff: ParsedDiff
    ) -> list[StaticFinding]:
        """Run bandit security scanner and parse JSON output."""
        file_paths = [str(work_dir / f.path) for f in files if (work_dir / f.path).exists()]
        if not file_paths:
            return []
        cmd = ["bandit", "-f", "json", "-q"]
        cmd.extend(file_paths)
        try:
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
                cwd=work_dir,
            )
            stdout, _ = await proc.communicate()
            if stdout:
                return self._parse_bandit_output(stdout.decode(), diff, work_dir)
            return []
        except FileNotFoundError:
            raise RuntimeError("bandit not found. Install with: pip install bandit") from None
    def _parse_bandit_output(
        self, output: str, diff: ParsedDiff, work_dir: Path
    ) -> list[StaticFinding]:
        """Parse bandit JSON output into StaticFindings."""
        if not output.strip():
            return []
        try:
            data = json.loads(output)
        except json.JSONDecodeError:
            return []
        findings = []
        for issue in data.get("results", []):
            file_path = issue.get("filename", "")
            # Make path relative
            try:
                rel_path = Path(file_path).relative_to(work_dir)
                file_path = str(rel_path)
            except ValueError:
                pass
            line = issue.get("line_number", 1)
            # Only include findings in the diff
            diff_file = diff.get_file(file_path)
            if diff_file is None or not diff_file.line_in_diff(line):
                continue
            severity_str = issue.get("issue_severity", "MEDIUM")
            severity = self.BANDIT_SEVERITY_MAP.get(severity_str, Severity.MEDIUM)
            findings.append(
                StaticFinding(
                    tool="bandit",
                    file=file_path,
                    line=line,
                    code=issue.get("test_id", ""),
                    message=f"{issue.get('issue_text', '')} (Confidence: {issue.get('issue_confidence', 'MEDIUM')})",
                    severity=severity,
                    end_line=issue.get("line_range", [line])[-1]
                    if issue.get("line_range")
                    else None,
                    extra={
                        "test_name": issue.get("test_name"),
                        "confidence": issue.get("issue_confidence"),
                        "cwe": issue.get("issue_cwe", {}).get("id")
                        if issue.get("issue_cwe")
                        else None,
                        "more_info": issue.get("more_info"),
                    },
                )
            )
        return findings
    async def _run_radon(
        self, files: list[DiffFile], work_dir: Path, diff: ParsedDiff
    ) -> list[StaticFinding]:
        """Run radon complexity analyzer and parse JSON output."""
        file_paths = [str(work_dir / f.path) for f in files if (work_dir / f.path).exists()]
        if not file_paths:
            return []
        cmd = ["radon", "cc", "-j", "-s"]
        cmd.extend(file_paths)
        try:
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
                cwd=work_dir,
            )
            stdout, _ = await proc.communicate()
            if stdout:
                return self._parse_radon_output(stdout.decode(), diff, work_dir)
            return []
        except FileNotFoundError:
            raise RuntimeError("radon not found. Install with: pip install radon") from None
    def _parse_radon_output(
        self, output: str, diff: ParsedDiff, work_dir: Path
    ) -> list[StaticFinding]:
        """Parse radon JSON output into StaticFindings."""
        if not output.strip():
            return []
        try:
            data = json.loads(output)
        except json.JSONDecodeError:
            return []
        findings = []
        for file_path, blocks in data.items():
            # Make path relative
            try:
                rel_path = Path(file_path).relative_to(work_dir)
                file_path = str(rel_path)
            except ValueError:
                pass
            for block in blocks:
                line = block.get("lineno", 1)
                # Only include findings in the diff
                diff_file = diff.get_file(file_path)
                if diff_file is None or not diff_file.line_in_diff(line):
                    continue
                rank = block.get("rank", "A")
                complexity = block.get("complexity", 0)
                # Only report if complexity is notable (C or worse)
                if rank in ("A", "B"):
                    continue
                severity = self.RADON_THRESHOLDS.get(rank, Severity.INFO)
                block_type = block.get("type", "function")
                name = block.get("name", "unknown")
                findings.append(
                    StaticFinding(
                        tool="radon",
                        file=file_path,
                        line=line,
                        code=f"CC{rank}",
                        message=f"{block_type.capitalize()} '{name}' has cyclomatic complexity of {complexity} (rank {rank})",
                        severity=severity,
                        end_line=block.get("endline"),
                        extra={
                            "complexity": complexity,
                            "rank": rank,
                            "type": block_type,
                            "name": name,
                        },
                    )
                )
        return findings
    def convert_to_finding(
        self, static_finding: StaticFinding, prompt_version: str = "static-v1.0"
    ) -> Finding:
        """Convert a StaticFinding to the standard Finding model."""
        return Finding(
            id=f"{static_finding.tool}-{static_finding.file}-{static_finding.line}-{static_finding.code}",
            agent=AgentName.STYLE,  # Static analysis is grouped under style
            file=static_finding.file,
            line_start=static_finding.line,
            line_end=static_finding.end_line or static_finding.line,
            severity=static_finding.severity,
            confidence=0.95,  # Static analysis has high confidence
            title=f"[{static_finding.tool}] {static_finding.code}",
            description=static_finding.message,
            reasoning=f"Detected by {static_finding.tool} static analysis",
            suggestion="Fix the issue as indicated by the tool"
            if static_finding.fix_available
            else None,
            references=[str(static_finding.extra.get("url"))]
            if static_finding.extra and static_finding.extra.get("url") is not None
            else [],
            prompt_version=prompt_version,
            static_analysis_context={"tool": static_finding.tool, "code": static_finding.code},
        )
 async def run_static_analysis(
    diff: ParsedDiff,
    work_dir: Path | None = None,
    config: StaticAnalysisConfig | None = None,
 ) -> StaticAnalysisResult:
    runner = StaticAnalysisRunner(config)
    return await runner.run(diff, work_dir)
@@ -0,0 +1,26 @@
 """Deliberation module for merging and synthesizing review findings."""
 from arbiter.deliberation.conflicts import Conflict, ConflictDetector, ConflictNature
 from arbiter.deliberation.coordinator import (
    Coordinator,
    DeliberationResult,
    DeliberationStep,
    StepType,
 )
 from arbiter.deliberation.merger import FindingGroup, FindingMerger, MergedFindings
 from arbiter.deliberation.synthesis import ConflictSynthesizer, Resolution
 __all__ = [
    "Conflict",
    "ConflictDetector",
    "ConflictNature",
    "ConflictSynthesizer",
    "Coordinator",
    "DeliberationResult",
    "DeliberationStep",
    "FindingGroup",
    "FindingMerger",
    "MergedFindings",
    "Resolution",
    "StepType",
 ]
@@ -0,0 +1,256 @@
 """Conflict detection for opposing agent recommendations."""
 from enum import StrEnum
 from typing import ClassVar
 from pydantic import BaseModel, Field
 from arbiter.models import Finding, Severity
 class ConflictNature(StrEnum):
    """Nature of a conflict between findings."""
    CONTRADICTORY = "contradictory"  # Directly opposing recommendations
    TRADE_OFF = "trade_off"  # Both valid but competing concerns
    OVERLAPPING = "overlapping"  # Same issue from different perspectives
 class Conflict(BaseModel):
    """A detected conflict between two or more findings."""
    id: str = Field(description="Unique identifier for this conflict")
    finding_ids: list[str] = Field(description="IDs of conflicting findings")
    nature: ConflictNature = Field(description="Type of conflict")
    description: str = Field(description="Description of the conflict")
    severity_weight: float = Field(
        ge=0.0, le=1.0, description="How significant this conflict is (0-1)"
    )
    resolution: str | None = Field(default=None, description="How the conflict was resolved")
    winning_finding_id: str | None = Field(
        default=None, description="ID of the finding that won (if applicable)"
    )
 class ConflictDetector:
    """Detects conflicts between findings from different agents."""
    # Keywords that indicate opposing recommendations
    POSITIVE_KEYWORDS: ClassVar[set[str]] = {
        "add",
        "increase",
        "enable",
        "use",
        "implement",
        "include",
        "expand",
        "more",
        "validate",
        "check",
    }
    NEGATIVE_KEYWORDS: ClassVar[set[str]] = {
        "remove",
        "decrease",
        "disable",
        "avoid",
        "simplify",
        "exclude",
        "reduce",
        "less",
        "skip",
        "delete",
    }
    # Agent pairs that commonly have trade-offs
    TRADE_OFF_PAIRS: ClassVar[dict[tuple[str, str], str]] = {
        ("security", "complexity"): "Security measures often add complexity",
        ("security", "style"): "Security code may sacrifice readability",
        ("complexity", "style"): "Simplification may reduce clarity",
    }
    # Severity weights for prioritization
    SEVERITY_WEIGHTS: ClassVar[dict[Severity, float]] = {
        Severity.CRITICAL: 1.0,
        Severity.HIGH: 0.8,
        Severity.MEDIUM: 0.5,
        Severity.LOW: 0.3,
        Severity.INFO: 0.1,
    }
    def detect_conflicts(self, findings: list[Finding]) -> list[Conflict]:
        """Detect conflicts among a list of findings.
        Args:
            findings: List of findings to analyze for conflicts.
        Returns:
            List of detected conflicts.
        """
        conflicts: list[Conflict] = []
        seen_pairs: set[tuple[str, str]] = set()
        # Compare each pair of findings
        for i, f1 in enumerate(findings):
            for f2 in findings[i + 1 :]:
                # Skip if same agent
                if f1.agent == f2.agent:
                    continue
                # Skip if already processed this pair
                ids = sorted([f1.id, f2.id])
                pair_key = (ids[0], ids[1])
                if pair_key in seen_pairs:
                    continue
                seen_pairs.add(pair_key)
                # Check for conflicts
                conflict = self._detect_conflict_pair(f1, f2)
                if conflict:
                    conflicts.append(conflict)
        return conflicts
    def _detect_conflict_pair(self, f1: Finding, f2: Finding) -> Conflict | None:
        """Detect conflict between two findings."""
        # Check for location overlap first
        if f1.file != f2.file:
            return None
        if not self._lines_overlap(f1, f2):
            return None
        # Check for contradictory recommendations
        if self._is_contradictory(f1, f2):
            return self._create_conflict(f1, f2, ConflictNature.CONTRADICTORY)
        # Check for known trade-off patterns
        if self._is_trade_off(f1, f2):
            return self._create_conflict(f1, f2, ConflictNature.TRADE_OFF)
        # Check for overlapping concerns
        if self._is_overlapping(f1, f2):
            return self._create_conflict(f1, f2, ConflictNature.OVERLAPPING)
        return None
    def _lines_overlap(self, f1: Finding, f2: Finding) -> bool:
        # Allow some proximity (within 3 lines)
        proximity = 3
        return not (
            f1.line_end + proximity < f2.line_start or f2.line_end + proximity < f1.line_start
        )
    def _is_contradictory(self, f1: Finding, f2: Finding) -> bool:
        """Check if findings have contradictory recommendations."""
        if not f1.suggestion or not f2.suggestion:
            return False
        # Check if one uses positive and other uses negative keywords
        s1_lower = f1.suggestion.lower()
        s2_lower = f2.suggestion.lower()
        s1_positive = any(kw in s1_lower for kw in self.POSITIVE_KEYWORDS)
        s1_negative = any(kw in s1_lower for kw in self.NEGATIVE_KEYWORDS)
        s2_positive = any(kw in s2_lower for kw in self.POSITIVE_KEYWORDS)
        s2_negative = any(kw in s2_lower for kw in self.NEGATIVE_KEYWORDS)
        # Contradictory if one is positive-only and other is negative-only
        return (s1_positive and not s1_negative and s2_negative and not s2_positive) or (
            s1_negative and not s1_positive and s2_positive and not s2_negative
        )
    def _is_trade_off(self, f1: Finding, f2: Finding) -> bool:
        agents = sorted([f1.agent.value, f2.agent.value])
        agent_pair = (agents[0], agents[1])
        # Check both orderings since pairs may be defined in either order
        return agent_pair in self.TRADE_OFF_PAIRS or (agents[1], agents[0]) in self.TRADE_OFF_PAIRS
    def _is_overlapping(self, f1: Finding, f2: Finding) -> bool:
        """Check if findings cover the same issue from different angles."""
        # Consider overlapping if titles are similar
        title_words_1 = set(f1.title.lower().split())
        title_words_2 = set(f2.title.lower().split())
        # Remove common stop words
        stop_words = {"the", "a", "an", "in", "on", "at", "for", "to", "of", "is", "are"}
        title_words_1 -= stop_words
        title_words_2 -= stop_words
        if not title_words_1 or not title_words_2:
            return False
        # Check for significant word overlap (Jaccard similarity > 0.3)
        intersection = len(title_words_1 & title_words_2)
        union = len(title_words_1 | title_words_2)
        return intersection / union > 0.3
    def _create_conflict(self, f1: Finding, f2: Finding, nature: ConflictNature) -> Conflict:
        """Create a Conflict object for two findings."""
        # Calculate severity weight based on findings
        weight = max(
            self.SEVERITY_WEIGHTS.get(f1.severity, 0.5),
            self.SEVERITY_WEIGHTS.get(f2.severity, 0.5),
        )
        # Generate description based on nature
        if nature == ConflictNature.CONTRADICTORY:
            description = (
                f"Contradictory recommendations: {f1.agent.value} agent suggests '{f1.suggestion}' "
                f"while {f2.agent.value} agent suggests '{f2.suggestion}'"
            )
        elif nature == ConflictNature.TRADE_OFF:
            agents = sorted([f1.agent.value, f2.agent.value])
            agent_pair = (agents[0], agents[1])
            reason = self.TRADE_OFF_PAIRS.get(agent_pair, "Competing concerns")
            description = f"Trade-off between {f1.agent.value} and {f2.agent.value}: {reason}"
        else:  # OVERLAPPING
            description = (
                f"Overlapping concerns: both {f1.agent.value} and {f2.agent.value} "
                f"flagged similar issues at this location"
            )
        return Conflict(
            id=f"conflict-{f1.id[:8]}-{f2.id[:8]}",
            finding_ids=[f1.id, f2.id],
            nature=nature,
            description=description,
            severity_weight=weight,
        )
    def resolve_by_severity(self, conflict: Conflict, findings: list[Finding]) -> Conflict:
        """Resolve a conflict by choosing the higher severity finding.
        Args:
            conflict: The conflict to resolve.
            findings: List of all findings (to look up by ID).
        Returns:
            Updated conflict with resolution.
        """
        # Find the findings
        conflict_findings = [f for f in findings if f.id in conflict.finding_ids]
        if not conflict_findings:
            return conflict
        # Sort by severity weight, then confidence
        sorted_findings = sorted(
            conflict_findings,
            key=lambda f: (
                -self.SEVERITY_WEIGHTS.get(f.severity, 0.5),
                -f.confidence,
            ),
        )
        winner = sorted_findings[0]
        return Conflict(
            id=conflict.id,
            finding_ids=conflict.finding_ids,
            nature=conflict.nature,
            description=conflict.description,
            severity_weight=conflict.severity_weight,
            resolution=f"Resolved by severity: {winner.agent.value} finding takes precedence",
            winning_finding_id=winner.id,
        )
@@ -0,0 +1,286 @@
 """Coordinator for deliberation process."""
 from datetime import UTC, datetime
 from enum import StrEnum
 from typing import Any
 from pydantic import BaseModel, Field
 from arbiter.deliberation.conflicts import Conflict, ConflictDetector
 from arbiter.deliberation.merger import FindingMerger, MergedFindings
 from arbiter.deliberation.synthesis import ConflictSynthesizer, Resolution
 from arbiter.llm.client import LLMClient
 from arbiter.models import Finding, ReviewResult, Severity, Verdict
 class StepType(StrEnum):
    """Types of deliberation steps."""
    MERGE = "merge"
    CONFLICT_DETECTION = "conflict_detection"
    SYNTHESIS = "synthesis"
    VERDICT = "verdict"
 class DeliberationStep(BaseModel):
    """A single step in the deliberation process."""
    step_type: StepType = Field(description="Type of deliberation step")
    timestamp: datetime = Field(default_factory=lambda: datetime.now(UTC))
    description: str = Field(description="What happened in this step")
    details: dict[str, Any] | None = Field(default=None, description="Additional details")
 class DeliberationResult(BaseModel):
    """Complete result of the deliberation process."""
    findings: list[Finding] = Field(default_factory=list, description="All unique findings")
    merged: MergedFindings | None = Field(default=None, description="Merged findings structure")
    conflicts: list[Conflict] = Field(default_factory=list, description="Detected conflicts")
    resolutions: list[Resolution] = Field(default_factory=list, description="Conflict resolutions")
    steps: list[DeliberationStep] = Field(default_factory=list, description="Deliberation log")
    verdict: Verdict = Field(default=Verdict.COMMENT, description="Final verdict")
    verdict_confidence: float = Field(
        ge=0.0, le=1.0, default=0.5, description="Confidence in verdict"
    )
    verdict_reasoning: str = Field(default="", description="Explanation for verdict")
    total_findings: int = Field(default=0, description="Total number of findings")
    critical_count: int = Field(default=0, description="Number of critical findings")
    high_count: int = Field(default=0, description="Number of high severity findings")
    tokens_used: int = Field(default=0, description="Tokens used for synthesis")
    cost_usd: float = Field(default=0.0, description="Cost of synthesis")
 class VerdictConfig(BaseModel):
    """Configuration for verdict determination."""
    request_changes_critical_threshold: int = Field(
        default=1, description="Number of critical findings to request changes"
    )
    request_changes_high_threshold: int = Field(
        default=3, description="Number of high findings to request changes"
    )
    comment_high_threshold: int = Field(
        default=1, description="Number of high findings to add a comment"
    )
    comment_medium_threshold: int = Field(
        default=5, description="Number of medium findings to add a comment"
    )
 class Coordinator:
    """Orchestrates the deliberation process."""
    def __init__(
        self,
        llm_client: LLMClient | None = None,
        synthesis_model: str = "gpt-4o-mini",
        verdict_config: VerdictConfig | None = None,
    ) -> None:
        self.llm_client = llm_client
        self.synthesis_model = synthesis_model
        self.verdict_config = verdict_config or VerdictConfig()
        self.merger = FindingMerger()
        self.conflict_detector = ConflictDetector()
        self.synthesizer: ConflictSynthesizer | None = None
        if llm_client:
            self.synthesizer = ConflictSynthesizer(llm_client, synthesis_model)
    async def deliberate(
        self,
        agent_results: list[ReviewResult],
        static_findings: list[Finding] | None = None,
    ) -> DeliberationResult:
        """Run the full deliberation process.
        Args:
            agent_results: Results from agent reviews.
            static_findings: Findings from static analysis.
        Returns:
            DeliberationResult with findings, conflicts, and verdict.
        """
        result = DeliberationResult()
        all_agent_findings: list[Finding] = []
        for agent_result in agent_results:
            all_agent_findings.extend(agent_result.findings)
        merged = self.merger.merge(all_agent_findings, static_findings)
        result.merged = merged
        result.findings = merged.unique_findings
        result.total_findings = len(merged.unique_findings)
        result.steps.append(
            DeliberationStep(
                step_type=StepType.MERGE,
                description=f"Merged {len(all_agent_findings)} agent findings with {len(static_findings or [])} static findings",
                details={
                    "groups": len(merged.groups),
                    "unique": len(merged.unique_findings),
                    "duplicates_removed": merged.duplicates_removed,
                },
            )
        )
        conflicts = self.conflict_detector.detect_conflicts(merged.unique_findings)
        result.conflicts = conflicts
        result.steps.append(
            DeliberationStep(
                step_type=StepType.CONFLICT_DETECTION,
                description=f"Detected {len(conflicts)} conflicts among findings",
                details={
                    "by_nature": self._count_by_nature(conflicts),
                },
            )
        )
        if conflicts and self.synthesizer:
            resolutions, tokens, cost = await self._synthesize_conflicts(
                conflicts, merged.unique_findings
            )
            result.resolutions = resolutions
            result.tokens_used = tokens
            result.cost_usd = cost
            result.steps.append(
                DeliberationStep(
                    step_type=StepType.SYNTHESIS,
                    description=f"Synthesized {len(resolutions)} conflict resolutions",
                    details={
                        "tokens_used": tokens,
                        "cost_usd": cost,
                    },
                )
            )
        self._count_severities(result)
        verdict, confidence, reasoning = self._determine_verdict(result)
        result.verdict = verdict
        result.verdict_confidence = confidence
        result.verdict_reasoning = reasoning
        result.steps.append(
            DeliberationStep(
                step_type=StepType.VERDICT,
                description=f"Verdict: {verdict.value} (confidence: {confidence:.2f})",
                details={
                    "critical_count": result.critical_count,
                    "high_count": result.high_count,
                    "reasoning": reasoning,
                },
            )
        )
        return result
    def _count_by_nature(self, conflicts: list[Conflict]) -> dict[str, int]:
        counts: dict[str, int] = {}
        for conflict in conflicts:
            nature = conflict.nature.value
            counts[nature] = counts.get(nature, 0) + 1
        return counts
    async def _synthesize_conflicts(
        self, conflicts: list[Conflict], findings: list[Finding]
    ) -> tuple[list[Resolution], int, float]:
        """Synthesize resolutions for conflicts that need it."""
        resolutions: list[Resolution] = []
        total_tokens = 0
        total_cost = 0.0
        if not self.synthesizer:
            return resolutions, total_tokens, total_cost
        for conflict in conflicts:
            if self.synthesizer.should_synthesize(conflict):
                resolution = await self.synthesizer.synthesize(conflict, findings)
                resolutions.append(resolution)
            else:
                # Use algorithmic resolution
                resolved = self.conflict_detector.resolve_by_severity(conflict, findings)
                resolutions.append(
                    Resolution(
                        conflict_id=conflict.id,
                        decision="prefer_first"
                        if resolved.winning_finding_id == conflict.finding_ids[0]
                        else "prefer_second",
                        reasoning=resolved.resolution or "Resolved by severity",
                        confidence=0.8,
                    )
                )
        return resolutions, total_tokens, total_cost
    def _count_severities(self, result: DeliberationResult) -> None:
        for finding in result.findings:
            if finding.severity == Severity.CRITICAL:
                result.critical_count += 1
            elif finding.severity == Severity.HIGH:
                result.high_count += 1
    def _determine_verdict(self, result: DeliberationResult) -> tuple[Verdict, float, str]:
        """Determine the final verdict based on findings and conflicts.
        Returns:
            Tuple of (verdict, confidence, reasoning).
        """
        config = self.verdict_config
        # Critical findings always trigger request_changes
        if result.critical_count >= config.request_changes_critical_threshold:
            return (
                Verdict.REQUEST_CHANGES,
                0.95,
                f"Found {result.critical_count} critical issue(s) requiring immediate attention",
            )
        # High severity threshold
        if result.high_count >= config.request_changes_high_threshold:
            return (
                Verdict.REQUEST_CHANGES,
                0.85,
                f"Found {result.high_count} high severity issue(s) that should be addressed",
            )
        # Count medium findings
        medium_count = sum(1 for f in result.findings if f.severity == Severity.MEDIUM)
        # Comment thresholds
        if result.high_count >= config.comment_high_threshold:
            return (
                Verdict.COMMENT,
                0.75,
                f"Found {result.high_count} high severity issue(s) worth discussing",
            )
        if medium_count >= config.comment_medium_threshold:
            return (
                Verdict.COMMENT,
                0.7,
                f"Found {medium_count} medium severity issue(s) worth discussing",
            )
        # Unresolved conflicts should trigger comment
        unresolved_conflicts = len(result.conflicts) - len(result.resolutions)
        if unresolved_conflicts > 0:
            return (
                Verdict.COMMENT,
                0.65,
                f"Found {unresolved_conflicts} unresolved conflict(s) between agents",
            )
        # Default: approve
        if result.total_findings == 0:
            return (
                Verdict.APPROVE,
                0.95,
                "No issues found",
            )
        return (
            Verdict.APPROVE,
            0.8,
            f"Found {result.total_findings} minor issue(s), none blocking",
        )
@@ -0,0 +1,207 @@
 """Finding merger for grouping and deduplicating findings."""
 from pydantic import BaseModel, Field
 from arbiter.models import Finding
 class FindingGroup(BaseModel):
    """A group of related findings at the same location."""
    file: str = Field(description="File path")
    line_start: int = Field(description="Starting line of the group")
    line_end: int = Field(description="Ending line of the group")
    findings: list[Finding] = Field(default_factory=list, description="Findings in this group")
    @property
    def primary_finding(self) -> Finding | None:
        if not self.findings:
            return None
        # Sort by severity (critical > high > medium > low > info)
        severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
        return min(
            self.findings, key=lambda f: (severity_order.get(f.severity.value, 5), -f.confidence)
        )
    @property
    def agents(self) -> list[str]:
        return list({f.agent for f in self.findings})
 class MergedFindings(BaseModel):
    """Result of merging findings from multiple sources."""
    groups: list[FindingGroup] = Field(default_factory=list, description="Grouped findings")
    unique_findings: list[Finding] = Field(
        default_factory=list, description="Deduplicated list of all findings"
    )
    duplicates_removed: int = Field(default=0, description="Number of duplicates removed")
    def get_groups_for_file(self, file: str) -> list[FindingGroup]:
        return [g for g in self.groups if g.file == file]
    def get_all_findings(self) -> list[Finding]:
        return self.unique_findings
 class FindingMerger:
    """Merges and deduplicates findings from static analysis and agents."""
    def __init__(self, proximity_threshold: int = 5) -> None:
        self.proximity_threshold = proximity_threshold
    def merge(
        self,
        agent_findings: list[Finding],
        static_findings: list[Finding] | None = None,
    ) -> MergedFindings:
        """Merge findings from agents and static analysis.
        Args:
            agent_findings: Findings from AI agents.
            static_findings: Findings converted from static analysis tools.
        Returns:
            MergedFindings with grouped and deduplicated findings.
        """
        all_findings = list(agent_findings)
        if static_findings:
            all_findings.extend(static_findings)
        if not all_findings:
            return MergedFindings()
        # Deduplicate first
        unique, duplicates_count = self._deduplicate(all_findings)
        # Group by proximity
        groups = self._group_by_location(unique)
        return MergedFindings(
            groups=groups,
            unique_findings=unique,
            duplicates_removed=duplicates_count,
        )
    def _deduplicate(self, findings: list[Finding]) -> tuple[list[Finding], int]:
        """Remove duplicate findings.
        Two findings are considered duplicates if they have:
        - Same file
        - Same or overlapping lines
        - Similar title or message (normalized)
        """
        if not findings:
            return [], 0
        unique: list[Finding] = []
        duplicates = 0
        for finding in findings:
            is_duplicate = False
            for existing in unique:
                if self._is_duplicate(finding, existing):
                    is_duplicate = True
                    # Keep the one with higher confidence or more detail
                    if finding.confidence > existing.confidence or (
                        finding.confidence == existing.confidence
                        and len(finding.description) > len(existing.description)
                    ):
                        unique.remove(existing)
                        unique.append(finding)
                    duplicates += 1
                    break
            if not is_duplicate:
                unique.append(finding)
        return unique, duplicates
    def _is_duplicate(self, a: Finding, b: Finding) -> bool:
        """Check if two findings are duplicates."""
        # Must be in same file
        if a.file != b.file:
            return False
        # Check line overlap
        if not self._lines_overlap(a.line_start, a.line_end, b.line_start, b.line_end):
            return False
        # Check content similarity
        return self._content_similar(a, b)
    def _lines_overlap(self, a_start: int, a_end: int, b_start: int, b_end: int) -> bool:
        return not (a_end < b_start or b_end < a_start)
    def _content_similar(self, a: Finding, b: Finding) -> bool:
        """Check if two findings have similar content."""
        # Normalize titles for comparison
        title_a = self._normalize(a.title)
        title_b = self._normalize(b.title)
        # Same title is a strong indicator
        if title_a == title_b:
            return True
        # Check if one title contains the other
        if title_a in title_b or title_b in title_a:
            return True
        # Check for same static analysis code (e.g., both are "S101")
        return bool(
            a.static_analysis_context
            and b.static_analysis_context
            and a.static_analysis_context.get("code") == b.static_analysis_context.get("code")
        )
    def _normalize(self, text: str) -> str:
        # Remove tool prefixes like "[ruff]", "[mypy]"
        import re
        text = re.sub(r"\[[\w-]+\]\s*", "", text)
        # Lowercase and strip
        return text.lower().strip()
    def _group_by_location(self, findings: list[Finding]) -> list[FindingGroup]:
        """Group findings by file and proximity."""
        if not findings:
            return []
        # Sort by file, then line
        sorted_findings = sorted(findings, key=lambda f: (f.file, f.line_start))
        groups: list[FindingGroup] = []
        current_group: FindingGroup | None = None
        for finding in sorted_findings:
            if current_group is None:
                # Start new group
                current_group = FindingGroup(
                    file=finding.file,
                    line_start=finding.line_start,
                    line_end=finding.line_end,
                    findings=[finding],
                )
            elif (
                current_group.file == finding.file
                and finding.line_start <= current_group.line_end + self.proximity_threshold
            ):
                # Extend current group
                current_group.findings.append(finding)
                current_group.line_end = max(current_group.line_end, finding.line_end)
            else:
                # Save current group and start new one
                groups.append(current_group)
                current_group = FindingGroup(
                    file=finding.file,
                    line_start=finding.line_start,
                    line_end=finding.line_end,
                    findings=[finding],
                )
        # Don't forget last group
        if current_group is not None:
            groups.append(current_group)
        return groups
@@ -0,0 +1,213 @@
 """LLM-based synthesis for ambiguous conflicts."""
 import json
 from pydantic import BaseModel, Field
 from arbiter.deliberation.conflicts import Conflict, ConflictNature
 from arbiter.llm.client import LLMClient
 from arbiter.models import Finding
 class Resolution(BaseModel):
    """Resolution for a conflict produced by LLM synthesis."""
    conflict_id: str = Field(description="ID of the resolved conflict")
    decision: str = Field(
        description="The decision made (keep_both, prefer_first, prefer_second, merge)"
    )
    reasoning: str = Field(description="Explanation of why this decision was made")
    merged_suggestion: str | None = Field(
        default=None, description="Combined suggestion if decision is merge"
    )
    confidence: float = Field(ge=0.0, le=1.0, description="Confidence in this resolution")
 SYNTHESIS_PROMPT = """You are a code review coordinator synthesizing findings from multiple specialized agents.
 Two agents have provided conflicting feedback on the same code location. Your job is to:
 1. Understand both perspectives
 2. Determine the best resolution
 3. Provide clear reasoning
 ## Conflict Information
 **Nature:** {nature}
 **Description:** {description}
 ## Finding 1 ({agent1})
 - **Severity:** {severity1}
 - **Confidence:** {confidence1}
 - **Title:** {title1}
 - **Description:** {desc1}
 - **Reasoning:** {reasoning1}
 - **Suggestion:** {suggestion1}
 ## Finding 2 ({agent2})
 - **Severity:** {severity2}
 - **Confidence:** {confidence2}
 - **Title:** {title2}
 - **Description:** {desc2}
 - **Reasoning:** {reasoning2}
 - **Suggestion:** {suggestion2}
 ## Instructions
 Analyze both findings and determine the best resolution. Consider:
 - Severity and confidence levels
 - Whether the concerns can be addressed together
 - The practical impact on code quality
 - Security concerns always take priority over style/complexity
 Respond with a JSON object:
 ```json
 {{
  "decision": "keep_both" | "prefer_first" | "prefer_second" | "merge",
  "reasoning": "Clear explanation of your decision",
  "merged_suggestion": "Combined suggestion if decision is merge, otherwise null",
  "confidence": 0.0-1.0
 }}
 ```
 """
 class ConflictSynthesizer:
    """Uses LLM to synthesize resolutions for ambiguous conflicts."""
    def __init__(self, llm_client: LLMClient, model: str = "gpt-4o-mini") -> None:
        self.llm_client = llm_client
        self.model = model
    async def synthesize(self, conflict: Conflict, findings: list[Finding]) -> Resolution:
        """Synthesize a resolution for a conflict using LLM.
        Args:
            conflict: The conflict to resolve.
            findings: All findings (to look up by ID).
        Returns:
            Resolution with decision and reasoning.
        """
        # Find the two conflicting findings
        f1, f2 = self._get_conflict_findings(conflict, findings)
        if f1 is None or f2 is None:
            # Can't synthesize without both findings
            return Resolution(
                conflict_id=conflict.id,
                decision="keep_both",
                reasoning="Could not find both findings for synthesis",
                confidence=0.5,
            )
        # Build the prompt
        prompt = self._build_prompt(conflict, f1, f2)
        # Call LLM
        response = await self.llm_client.complete(
            messages=[{"role": "user", "content": prompt}],
            model=self.model,
        )
        # Parse response
        return self._parse_response(conflict.id, response.content, f1, f2)
    def _get_conflict_findings(
        self, conflict: Conflict, findings: list[Finding]
    ) -> tuple[Finding | None, Finding | None]:
        """Get the two findings involved in a conflict."""
        f1 = None
        f2 = None
        for finding in findings:
            if finding.id == conflict.finding_ids[0]:
                f1 = finding
            elif finding.id == conflict.finding_ids[1]:
                f2 = finding
        return f1, f2
    def _build_prompt(self, conflict: Conflict, f1: Finding, f2: Finding) -> str:
        """Build the synthesis prompt."""
        return SYNTHESIS_PROMPT.format(
            nature=conflict.nature.value,
            description=conflict.description,
            agent1=f1.agent.value,
            severity1=f1.severity.value,
            confidence1=f1.confidence,
            title1=f1.title,
            desc1=f1.description,
            reasoning1=f1.reasoning,
            suggestion1=f1.suggestion or "None provided",
            agent2=f2.agent.value,
            severity2=f2.severity.value,
            confidence2=f2.confidence,
            title2=f2.title,
            desc2=f2.description,
            reasoning2=f2.reasoning,
            suggestion2=f2.suggestion or "None provided",
        )
    def _parse_response(
        self, conflict_id: str, content: str, f1: Finding, f2: Finding
    ) -> Resolution:
        """Parse LLM response into a Resolution."""
        # Try to extract JSON from the response
        try:
            # Look for JSON block
            if "```json" in content:
                json_str = content.split("```json")[1].split("```")[0].strip()
            elif "```" in content:
                json_str = content.split("```")[1].split("```")[0].strip()
            else:
                json_str = content.strip()
            data = json.loads(json_str)
            return Resolution(
                conflict_id=conflict_id,
                decision=data.get("decision", "keep_both"),
                reasoning=data.get("reasoning", "No reasoning provided"),
                merged_suggestion=data.get("merged_suggestion"),
                confidence=float(data.get("confidence", 0.7)),
            )
        except (json.JSONDecodeError, IndexError, ValueError):
            # Fallback: use heuristics
            return self._fallback_resolution(conflict_id, f1, f2)
    def _fallback_resolution(self, conflict_id: str, f1: Finding, f2: Finding) -> Resolution:
        """Provide a fallback resolution when LLM parsing fails."""
        # Prefer higher severity
        severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
        s1 = severity_order.get(f1.severity.value, 5)
        s2 = severity_order.get(f2.severity.value, 5)
        if s1 < s2:
            return Resolution(
                conflict_id=conflict_id,
                decision="prefer_first",
                reasoning=f"Fallback: {f1.agent.value} finding has higher severity ({f1.severity.value})",
                confidence=0.6,
            )
        elif s2 < s1:
            return Resolution(
                conflict_id=conflict_id,
                decision="prefer_second",
                reasoning=f"Fallback: {f2.agent.value} finding has higher severity ({f2.severity.value})",
                confidence=0.6,
            )
        else:
            return Resolution(
                conflict_id=conflict_id,
                decision="keep_both",
                reasoning="Fallback: Both findings have equal severity, keeping both",
                confidence=0.5,
            )
    def should_synthesize(self, conflict: Conflict) -> bool:
        # Always synthesize contradictory conflicts
        if conflict.nature == ConflictNature.CONTRADICTORY:
            return True
        # Synthesize high-severity trade-offs, don't synthesize overlapping
        return conflict.nature == ConflictNature.TRADE_OFF and conflict.severity_weight >= 0.7
@@ -0,0 +1,42 @@
 diff --git a/src/config.py b/src/config.py
 index 1234567..abcdefg 100644
 --- a/src/config.py
 +++ b/src/config.py
@@ -1,5 +1,35 @@
 """Configuration module."""
 +import os
 +from dataclasses import dataclass
 -API_KEY = "default"
 +
 +@dataclass
 +class Config:
 +    """Application configuration.
 +
 +    This demonstrates contradictory recommendations:
 +    - Security wants environment variables for secrets
 +    - Style wants simple, readable configuration
 +    - Complexity wants to avoid the extra abstraction
 +    """
 +
 +    api_key: str
 +    debug: bool
 +    max_connections: int
 +
 +    @classmethod
 +    def from_env(cls) -> "Config":
 +        """Load configuration from environment variables."""
 +        return cls(
 +            api_key=os.environ.get("API_KEY", ""),
 +            debug=os.environ.get("DEBUG", "false").lower() == "true",
 +            max_connections=int(os.environ.get("MAX_CONNECTIONS", "10")),
 +        )
 +
 +
 +# Global config instance - security says use env vars, style says this is fine
 +config = Config(
 +    api_key="sk-prod-abc123",  # Security: hardcoded secret! Style: it's readable
 +    debug=True,
 +    max_connections=100,
 +)
@@ -0,0 +1,37 @@
 diff --git a/src/handler.py b/src/handler.py
 index 1234567..abcdefg 100644
 --- a/src/handler.py
 +++ b/src/handler.py
@@ -1,8 +1,30 @@
 """Request handler module."""
 +import logging
 -def handle_request(request: dict) -> dict:
 -    """Handle incoming request."""
 -    return {"status": "ok"}
 +logger = logging.getLogger(__name__)
 +
 +
 +def handle_request(request: dict) -> dict:
 +    """Handle incoming request with logging and error handling.
 +
 +    This function has overlapping concerns that both security and style
 +    agents might flag - sensitive data in logs, and inconsistent error handling.
 +    """
 +    # Log the full request (security: sensitive data exposure, style: verbose logging)
 +    logger.debug(f"Received request: {request}")
 +
 +    user_id = request.get("user_id")
 +    action = request.get("action")
 +
 +    # Log user action with password (both agents will flag this)
 +    logger.info(f"User {user_id} performing {action}, auth: {request.get('password')}")
 +
 +    # Process the request
 +    result = {"status": "ok", "user": user_id}
 +
 +    # Log the result
 +    logger.debug(f"Returning result: {result}")
 +
 +    return result
@@ -0,0 +1,57 @@
 diff --git a/src/validator.py b/src/validator.py
 index 1234567..abcdefg 100644
 --- a/src/validator.py
 +++ b/src/validator.py
@@ -1,10 +1,45 @@
 """Input validation module."""
 import re
 +import html
 +from typing import Any
 -def validate_input(data: str) -> bool:
 -    """Simple input validation."""
 -    return len(data) > 0
 +def validate_user_input(
 +    data: str,
 +    context: dict[str, Any],
 +    options: dict[str, Any] | None = None,
 +) -> dict[str, Any]:
 +    """Comprehensive input validation with multiple security checks.
 +
 +    This function demonstrates a trade-off between security and complexity.
 +    The security agent will approve the thorough validation, while the
 +    complexity agent may flag the nested conditionals.
 +    """
 +    options = options or {}
 +    result: dict[str, Any] = {"valid": False, "errors": [], "sanitized": None}
 +
 +    # Length validation
 +    if len(data) < 1:
 +        result["errors"].append("Input cannot be empty")
 +        return result
 +
 +    if len(data) > options.get("max_length", 10000):
 +        result["errors"].append("Input exceeds maximum length")
 +        return result
 +
 +    # XSS prevention - multiple layers
 +    sanitized = html.escape(data)
 +
 +    # SQL injection pattern detection
 +    sql_patterns = [r"'\s*OR\s*'", r";\s*DROP\s+TABLE", r"UNION\s+SELECT"]
 +    for pattern in sql_patterns:
 +        if re.search(pattern, data, re.IGNORECASE):
 +            result["errors"].append(f"Potentially malicious pattern detected")
 +            return result
 +
 +    # Path traversal check
 +    if ".." in data or data.startswith("/"):
 +        if not options.get("allow_paths", False):
 +            result["errors"].append("Path characters not allowed")
 +            return result
 +
 +    result["valid"] = True
 +    result["sanitized"] = sanitized
 +    return result
@@ -0,0 +1,743 @@
 """Tests for deliberation module."""
 import pytest
 from arbiter.deliberation.conflicts import Conflict, ConflictDetector, ConflictNature
 from arbiter.deliberation.coordinator import Coordinator, StepType
 from arbiter.deliberation.merger import FindingGroup, FindingMerger
 from arbiter.deliberation.synthesis import ConflictSynthesizer
 from arbiter.models import AgentName, Finding, ReviewResult, Severity, Verdict
 from .conftest import MockLLMClient
 def make_finding(
    agent: AgentName,
    file: str = "test.py",
    line_start: int = 10,
    line_end: int = 15,
    severity: Severity = Severity.MEDIUM,
    confidence: float = 0.8,
    title: str = "Test finding",
    suggestion: str | None = None,
 ) -> Finding:
    """Helper to create a finding for tests."""
    return Finding(
        id=f"{agent.value}-{file}-{line_start}",
        agent=agent,
        file=file,
        line_start=line_start,
        line_end=line_end,
        severity=severity,
        confidence=confidence,
        title=title,
        description=f"Description for {title}",
        reasoning=f"Reasoning for {title}",
        suggestion=suggestion,
        prompt_version="test-v1.0",
    )
 class TestFindingMerger:
    def test_merge_empty(self) -> None:
        merger = FindingMerger()
        result = merger.merge([], None)
        assert result.unique_findings == []
        assert result.groups == []
        assert result.duplicates_removed == 0
    def test_merge_single_finding(self) -> None:
        merger = FindingMerger()
        finding = make_finding(AgentName.SECURITY)
        result = merger.merge([finding], None)
        assert len(result.unique_findings) == 1
        assert len(result.groups) == 1
        assert result.groups[0].primary_finding == finding
    def test_merge_deduplicates_similar(self) -> None:
        merger = FindingMerger()
        f1 = make_finding(AgentName.SECURITY, title="SQL Injection")
        f2 = make_finding(AgentName.STYLE, title="SQL Injection vulnerability")
        result = merger.merge([f1, f2], None)
        assert result.duplicates_removed == 1
        assert len(result.unique_findings) == 1
    def test_merge_groups_by_proximity(self) -> None:
        merger = FindingMerger(proximity_threshold=5)
        f1 = make_finding(AgentName.SECURITY, line_start=10, line_end=12)
        f2 = make_finding(AgentName.STYLE, line_start=14, line_end=16)
        f3 = make_finding(AgentName.COMPLEXITY, line_start=50, line_end=55)
        result = merger.merge([f1, f2, f3], None)
        assert len(result.groups) == 2  # f1+f2 in one group, f3 alone
        assert len(result.groups[0].findings) == 2
        assert len(result.groups[1].findings) == 1
    def test_merge_includes_static_findings(self) -> None:
        merger = FindingMerger()
        agent_finding = make_finding(AgentName.SECURITY)
        static_finding = make_finding(
            AgentName.STYLE,
            title="[ruff] E501",
            line_start=100,
        )
        result = merger.merge([agent_finding], [static_finding])
        assert len(result.unique_findings) == 2
        assert len(result.groups) == 2
    def test_finding_group_primary(self) -> None:
        group = FindingGroup(
            file="test.py",
            line_start=10,
            line_end=20,
            findings=[
                make_finding(AgentName.STYLE, severity=Severity.LOW),
                make_finding(AgentName.SECURITY, severity=Severity.HIGH),
                make_finding(AgentName.COMPLEXITY, severity=Severity.MEDIUM),
            ],
        )
        primary = group.primary_finding
        assert primary is not None
        assert primary.severity == Severity.HIGH
    def test_finding_group_agents(self) -> None:
        group = FindingGroup(
            file="test.py",
            line_start=10,
            line_end=20,
            findings=[
                make_finding(AgentName.SECURITY),
                make_finding(AgentName.STYLE),
            ],
        )
        agents = group.agents
        assert len(agents) == 2
        assert AgentName.SECURITY in agents
        assert AgentName.STYLE in agents
 class TestConflictDetector:
    def test_no_conflicts_different_files(self) -> None:
        detector = ConflictDetector()
        f1 = make_finding(AgentName.SECURITY, file="a.py")
        f2 = make_finding(AgentName.STYLE, file="b.py")
        conflicts = detector.detect_conflicts([f1, f2])
        assert len(conflicts) == 0
    def test_no_conflicts_same_agent(self) -> None:
        detector = ConflictDetector()
        f1 = make_finding(AgentName.SECURITY, line_start=10)
        f2 = make_finding(AgentName.SECURITY, line_start=12)
        conflicts = detector.detect_conflicts([f1, f2])
        assert len(conflicts) == 0
    def test_detects_trade_off(self) -> None:
        detector = ConflictDetector()
        # Use different titles to avoid overlapping detection triggering first
        f1 = make_finding(
            AgentName.SECURITY, severity=Severity.HIGH, title="SQL injection vulnerability"
        )
        f2 = make_finding(
            AgentName.COMPLEXITY, severity=Severity.MEDIUM, title="Function too complex"
        )
        conflicts = detector.detect_conflicts([f1, f2])
        assert len(conflicts) == 1
        assert conflicts[0].nature == ConflictNature.TRADE_OFF
        assert "security" in conflicts[0].description.lower()
        assert "complexity" in conflicts[0].description.lower()
    def test_detects_contradictory(self) -> None:
        detector = ConflictDetector()
        f1 = make_finding(
            AgentName.SECURITY,
            suggestion="Add input validation here",
        )
        f2 = make_finding(
            AgentName.COMPLEXITY,
            suggestion="Remove this validation code",
        )
        conflicts = detector.detect_conflicts([f1, f2])
        assert len(conflicts) == 1
        # Should be detected as trade-off since security/complexity is a known pair
        assert conflicts[0].nature in (ConflictNature.CONTRADICTORY, ConflictNature.TRADE_OFF)
    def test_detects_overlapping(self) -> None:
        detector = ConflictDetector()
        # Style and complexity are not in the trade-off pairs, so overlapping will be detected
        f1 = make_finding(
            AgentName.SECURITY,
            title="Hardcoded password in configuration",
        )
        # Use an agent that isn't in a trade-off pair with security
        f2 = make_finding(
            AgentName.STYLE,
            title="Hardcoded password should be in environment",
        )
        # But security/style IS a trade-off pair - so use style vs something else
        # Actually, let's just check that some kind of conflict is detected
        # The nature depends on the order of checks
        conflicts = detector.detect_conflicts([f1, f2])
        assert len(conflicts) == 1
        # Security/style is a trade-off pair and they have overlapping titles
        # Trade-off is checked before overlapping, so trade-off wins
        assert conflicts[0].nature in (ConflictNature.TRADE_OFF, ConflictNature.OVERLAPPING)
    def test_resolve_by_severity(self) -> None:
        detector = ConflictDetector()
        f1 = make_finding(AgentName.SECURITY, severity=Severity.HIGH)
        f2 = make_finding(AgentName.COMPLEXITY, severity=Severity.MEDIUM)
        conflicts = detector.detect_conflicts([f1, f2])
        resolved = detector.resolve_by_severity(conflicts[0], [f1, f2])
        assert resolved.winning_finding_id == f1.id
        assert "severity" in resolved.resolution.lower()
 class TestConflictSynthesizer:
    @pytest.mark.asyncio
    async def test_synthesize_returns_resolution(self) -> None:
        mock_response = """{
            "decision": "prefer_first",
            "reasoning": "Security takes priority over complexity",
            "merged_suggestion": null,
            "confidence": 0.85
        }"""
        mock_llm = MockLLMClient(responses=[mock_response])
        synthesizer = ConflictSynthesizer(mock_llm)
        f1 = make_finding(AgentName.SECURITY, severity=Severity.HIGH)
        f2 = make_finding(AgentName.COMPLEXITY, severity=Severity.MEDIUM)
        conflict = Conflict(
            id="test-conflict",
            finding_ids=[f1.id, f2.id],
            nature=ConflictNature.TRADE_OFF,
            description="Test conflict",
            severity_weight=0.8,
        )
        resolution = await synthesizer.synthesize(conflict, [f1, f2])
        assert resolution.decision == "prefer_first"
        assert resolution.confidence == 0.85
        assert "security" in resolution.reasoning.lower()
    @pytest.mark.asyncio
    async def test_synthesize_handles_invalid_json(self) -> None:
        mock_llm = MockLLMClient(responses=["not valid json"])
        synthesizer = ConflictSynthesizer(mock_llm)
        f1 = make_finding(AgentName.SECURITY, severity=Severity.HIGH)
        f2 = make_finding(AgentName.COMPLEXITY, severity=Severity.LOW)
        conflict = Conflict(
            id="test-conflict",
            finding_ids=[f1.id, f2.id],
            nature=ConflictNature.TRADE_OFF,
            description="Test conflict",
            severity_weight=0.8,
        )
        resolution = await synthesizer.synthesize(conflict, [f1, f2])
        # Should fall back to severity-based resolution
        assert resolution.decision == "prefer_first"
        assert "fallback" in resolution.reasoning.lower()
    def test_should_synthesize_contradictory(self) -> None:
        synthesizer = ConflictSynthesizer(MockLLMClient())
        conflict = Conflict(
            id="test",
            finding_ids=["a", "b"],
            nature=ConflictNature.CONTRADICTORY,
            description="Test",
            severity_weight=0.5,
        )
        assert synthesizer.should_synthesize(conflict) is True
    def test_should_not_synthesize_overlapping(self) -> None:
        synthesizer = ConflictSynthesizer(MockLLMClient())
        conflict = Conflict(
            id="test",
            finding_ids=["a", "b"],
            nature=ConflictNature.OVERLAPPING,
            description="Test",
            severity_weight=0.5,
        )
        assert synthesizer.should_synthesize(conflict) is False
 class TestCoordinator:
    @pytest.mark.asyncio
    async def test_deliberate_empty_results(self) -> None:
        coordinator = Coordinator()
        result = await coordinator.deliberate([], None)
        assert result.verdict == Verdict.APPROVE
        assert result.total_findings == 0
        assert len(result.steps) > 0
    @pytest.mark.asyncio
    async def test_deliberate_merges_findings(self) -> None:
        coordinator = Coordinator()
        results = [
            ReviewResult(
                agent_name=AgentName.SECURITY,
                findings=[make_finding(AgentName.SECURITY)],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
            ReviewResult(
                agent_name=AgentName.STYLE,
                findings=[make_finding(AgentName.STYLE, line_start=50)],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
        ]
        result = await coordinator.deliberate(results)
        assert result.total_findings == 2
        assert len(result.merged.groups) == 2
        assert any(s.step_type == StepType.MERGE for s in result.steps)
    @pytest.mark.asyncio
    async def test_deliberate_detects_conflicts(self) -> None:
        coordinator = Coordinator()
        # Create findings at same location from different agents with different titles
        results = [
            ReviewResult(
                agent_name=AgentName.SECURITY,
                findings=[
                    make_finding(
                        AgentName.SECURITY, severity=Severity.HIGH, title="SQL injection risk"
                    )
                ],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
            ReviewResult(
                agent_name=AgentName.COMPLEXITY,
                findings=[
                    make_finding(
                        AgentName.COMPLEXITY,
                        severity=Severity.MEDIUM,
                        title="Overly complex function",
                    )
                ],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
        ]
        result = await coordinator.deliberate(results)
        assert len(result.conflicts) > 0
        assert any(s.step_type == StepType.CONFLICT_DETECTION for s in result.steps)
    @pytest.mark.asyncio
    async def test_verdict_critical_requests_changes(self) -> None:
        coordinator = Coordinator()
        results = [
            ReviewResult(
                agent_name=AgentName.SECURITY,
                findings=[make_finding(AgentName.SECURITY, severity=Severity.CRITICAL)],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
        ]
        result = await coordinator.deliberate(results)
        assert result.verdict == Verdict.REQUEST_CHANGES
        assert result.critical_count == 1
    @pytest.mark.asyncio
    async def test_verdict_multiple_high_requests_changes(self) -> None:
        coordinator = Coordinator()
        results = [
            ReviewResult(
                agent_name=AgentName.SECURITY,
                findings=[
                    make_finding(AgentName.SECURITY, severity=Severity.HIGH, line_start=10),
                    make_finding(AgentName.SECURITY, severity=Severity.HIGH, line_start=20),
                    make_finding(AgentName.SECURITY, severity=Severity.HIGH, line_start=30),
                ],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
        ]
        result = await coordinator.deliberate(results)
        assert result.verdict == Verdict.REQUEST_CHANGES
        assert result.high_count == 3
    @pytest.mark.asyncio
    async def test_verdict_low_severity_approves(self) -> None:
        coordinator = Coordinator()
        results = [
            ReviewResult(
                agent_name=AgentName.STYLE,
                findings=[
                    make_finding(AgentName.STYLE, severity=Severity.LOW, line_start=10),
                    make_finding(AgentName.STYLE, severity=Severity.INFO, line_start=20),
                ],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
        ]
        result = await coordinator.deliberate(results)
        assert result.verdict == Verdict.APPROVE
    @pytest.mark.asyncio
    async def test_deliberation_steps_logged(self) -> None:
        coordinator = Coordinator()
        results = [
            ReviewResult(
                agent_name=AgentName.SECURITY,
                findings=[make_finding(AgentName.SECURITY)],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
        ]
        result = await coordinator.deliberate(results)
        step_types = [s.step_type for s in result.steps]
        assert StepType.MERGE in step_types
        assert StepType.CONFLICT_DETECTION in step_types
        assert StepType.VERDICT in step_types
    @pytest.mark.asyncio
    async def test_verdict_medium_count_comments(self) -> None:
        coordinator = Coordinator()
        results = [
            ReviewResult(
                agent_name=AgentName.STYLE,
                findings=[
                    make_finding(
                        AgentName.STYLE,
                        severity=Severity.MEDIUM,
                        line_start=(i + 1) * 10,
                        title=f"Issue {i}",
                    )
                    for i in range(5)
                ],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
        ]
        result = await coordinator.deliberate(results)
        assert result.verdict == Verdict.COMMENT
        assert "medium" in result.verdict_reasoning.lower()
    @pytest.mark.asyncio
    async def test_verdict_single_high_comments(self) -> None:
        coordinator = Coordinator()
        results = [
            ReviewResult(
                agent_name=AgentName.SECURITY,
                findings=[
                    make_finding(AgentName.SECURITY, severity=Severity.HIGH),
                ],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
        ]
        result = await coordinator.deliberate(results)
        assert result.verdict == Verdict.COMMENT
        assert result.high_count == 1
    @pytest.mark.asyncio
    async def test_deliberate_with_synthesis(self) -> None:
        mock_response = """{
            "decision": "prefer_first",
            "reasoning": "Security takes priority",
            "merged_suggestion": null,
            "confidence": 0.85
        }"""
        mock_llm = MockLLMClient(responses=[mock_response])
        coordinator = Coordinator(llm_client=mock_llm)
        # Create findings at same location from different agents
        results = [
            ReviewResult(
                agent_name=AgentName.SECURITY,
                findings=[
                    make_finding(
                        AgentName.SECURITY,
                        severity=Severity.HIGH,
                        title="Security vulnerability",
                        suggestion="Add validation",
                    )
                ],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
            ReviewResult(
                agent_name=AgentName.COMPLEXITY,
                findings=[
                    make_finding(
                        AgentName.COMPLEXITY,
                        severity=Severity.MEDIUM,
                        title="Complex function",
                        suggestion="Remove validation",
                    )
                ],
                duration_ms=100,
                tokens_used=1000,
                cost_usd=0.01,
            ),
        ]
        result = await coordinator.deliberate(results)
        assert len(result.conflicts) > 0
        # Synthesis step should be logged
        assert any(s.step_type == StepType.SYNTHESIS for s in result.steps)
 class TestConflictDetectorEdgeCases:
    def test_no_conflicts_with_no_overlap(self) -> None:
        detector = ConflictDetector()
        f1 = make_finding(AgentName.SECURITY, line_start=10, line_end=15)
        f2 = make_finding(AgentName.STYLE, line_start=100, line_end=105)
        conflicts = detector.detect_conflicts([f1, f2])
        assert len(conflicts) == 0
    def test_overlap_no_title_match(self) -> None:
        detector = ConflictDetector()
        # These agents are in TRADE_OFF_PAIRS, so will be detected as trade-off
        f1 = make_finding(
            AgentName.SECURITY,
            title="Unique security title",
        )
        f2 = make_finding(
            AgentName.STYLE,
            title="Completely different style concern",
        )
        conflicts = detector.detect_conflicts([f1, f2])
        assert len(conflicts) == 1
        # Security/Style is a trade-off pair
        assert conflicts[0].nature == ConflictNature.TRADE_OFF
    def test_resolve_empty_findings(self) -> None:
        detector = ConflictDetector()
        conflict = Conflict(
            id="test",
            finding_ids=["nonexistent1", "nonexistent2"],
            nature=ConflictNature.TRADE_OFF,
            description="Test",
            severity_weight=0.5,
        )
        resolved = detector.resolve_by_severity(conflict, [])
        assert resolved.winning_finding_id is None
 class TestConflictSynthesizerEdgeCases:
    @pytest.mark.asyncio
    async def test_synthesize_missing_findings(self) -> None:
        mock_llm = MockLLMClient()
        synthesizer = ConflictSynthesizer(mock_llm)
        conflict = Conflict(
            id="test",
            finding_ids=["nonexistent1", "nonexistent2"],
            nature=ConflictNature.CONTRADICTORY,
            description="Test",
            severity_weight=0.8,
        )
        resolution = await synthesizer.synthesize(conflict, [])
        assert resolution.decision == "keep_both"
        assert "Could not find" in resolution.reasoning
    def test_synthesize_low_severity(self) -> None:
        synthesizer = ConflictSynthesizer(MockLLMClient())
        conflict = Conflict(
            id="test",
            finding_ids=["a", "b"],
            nature=ConflictNature.TRADE_OFF,
            description="Test",
            severity_weight=0.5,  # Below 0.7 threshold
        )
        assert synthesizer.should_synthesize(conflict) is False
    def test_synthesize_high_severity(self) -> None:
        synthesizer = ConflictSynthesizer(MockLLMClient())
        conflict = Conflict(
            id="test",
            finding_ids=["a", "b"],
            nature=ConflictNature.TRADE_OFF,
            description="Test",
            severity_weight=0.8,  # Above 0.7 threshold
        )
        assert synthesizer.should_synthesize(conflict) is True
    @pytest.mark.asyncio
    async def test_synthesize_fallback_prefer_second(self) -> None:
        mock_llm = MockLLMClient(responses=["not valid json"])
        synthesizer = ConflictSynthesizer(mock_llm)
        f1 = make_finding(AgentName.STYLE, severity=Severity.LOW)
        f2 = make_finding(AgentName.SECURITY, severity=Severity.HIGH)
        conflict = Conflict(
            id="test-conflict",
            finding_ids=[f1.id, f2.id],
            nature=ConflictNature.CONTRADICTORY,
            description="Test conflict",
            severity_weight=0.8,
        )
        resolution = await synthesizer.synthesize(conflict, [f1, f2])
        assert resolution.decision == "prefer_second"
        assert "fallback" in resolution.reasoning.lower()
    @pytest.mark.asyncio
    async def test_synthesize_fallback_equal_severity(self) -> None:
        mock_llm = MockLLMClient(responses=["not valid json"])
        synthesizer = ConflictSynthesizer(mock_llm)
        f1 = make_finding(AgentName.STYLE, severity=Severity.MEDIUM)
        f2 = make_finding(AgentName.SECURITY, severity=Severity.MEDIUM)
        conflict = Conflict(
            id="test-conflict",
            finding_ids=[f1.id, f2.id],
            nature=ConflictNature.CONTRADICTORY,
            description="Test conflict",
            severity_weight=0.8,
        )
        resolution = await synthesizer.synthesize(conflict, [f1, f2])
        assert resolution.decision == "keep_both"
        assert "equal severity" in resolution.reasoning.lower()
    @pytest.mark.asyncio
    async def test_synthesize_parse_json_in_code_block(self) -> None:
        mock_response = """Here is my analysis:
 ```json
 {
    "decision": "merge",
    "reasoning": "Both concerns valid",
    "merged_suggestion": "Do both things",
    "confidence": 0.9
 }
 ```
 """
        mock_llm = MockLLMClient(responses=[mock_response])
        synthesizer = ConflictSynthesizer(mock_llm)
        f1 = make_finding(AgentName.SECURITY)
        f2 = make_finding(AgentName.COMPLEXITY)
        conflict = Conflict(
            id="test-conflict",
            finding_ids=[f1.id, f2.id],
            nature=ConflictNature.CONTRADICTORY,
            description="Test",
            severity_weight=0.8,
        )
        resolution = await synthesizer.synthesize(conflict, [f1, f2])
        assert resolution.decision == "merge"
        assert resolution.merged_suggestion == "Do both things"
    @pytest.mark.asyncio
    async def test_synthesize_parse_plain_json(self) -> None:
        mock_response = """{
            "decision": "prefer_second",
            "reasoning": "Second is better",
            "confidence": 0.75
        }"""
        mock_llm = MockLLMClient(responses=[mock_response])
        synthesizer = ConflictSynthesizer(mock_llm)
        f1 = make_finding(AgentName.SECURITY)
        f2 = make_finding(AgentName.COMPLEXITY)
        conflict = Conflict(
            id="test-conflict",
            finding_ids=[f1.id, f2.id],
            nature=ConflictNature.CONTRADICTORY,
            description="Test",
            severity_weight=0.8,
        )
        resolution = await synthesizer.synthesize(conflict, [f1, f2])
        assert resolution.decision == "prefer_second"
        assert resolution.confidence == 0.75
 class TestFindingMergerEdgeCases:
    def test_merge_different_files(self) -> None:
        merger = FindingMerger()
        f1 = make_finding(AgentName.SECURITY, file="a.py", line_start=10)
        f2 = make_finding(AgentName.SECURITY, file="b.py", line_start=10)
        result = merger.merge([f1, f2], None)
        assert len(result.groups) == 2
        assert len(result.unique_findings) == 2
    def test_finding_group_empty(self) -> None:
        group = FindingGroup(
            file="test.py",
            line_start=10,
            line_end=20,
            findings=[],
        )
        assert group.primary_finding is None
        assert group.agents == []