feat(agents): implement agent framework and CLI

2025-03-08 15:52:29 +00:00
parent 72268ff440
commit f22ca1d5bd
30 changed files with 3466 additions and 0 deletions
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -0,0 +1,561 @@
+"""Tests for CLI commands."""
+
+import json
+from pathlib import Path
+from unittest.mock import AsyncMock, patch
+
+from typer.testing import CliRunner
+
+from arbiter.cli import (
+    _severity_color,
+    _severity_icon,
+    _verdict_color,
+    _verdict_icon,
+    app,
+)
+from arbiter.deliberation import DeliberationResult
+from arbiter.models import AgentName, Finding, ReviewResult, Severity, Verdict
+
+runner = CliRunner()
+
+
+def make_mock_return(
+    findings: list[Finding] | None = None, verdict: Verdict = Verdict.APPROVE
+) -> tuple[list[ReviewResult], DeliberationResult]:
+    """Create a mock return value for _run_review."""
+    agent_results = [
+        ReviewResult(
+            agent_name=AgentName.SECURITY,
+            findings=findings or [],
+            duration_ms=100,
+            tokens_used=100,
+            cost_usd=0.001,
+        )
+    ]
+    deliberation_result = DeliberationResult(
+        findings=findings or [],
+        verdict=verdict,
+        verdict_confidence=0.9,
+        verdict_reasoning="Test reasoning",
+        total_findings=len(findings) if findings else 0,
+    )
+    return agent_results, deliberation_result
+
+
+class TestVersionCommand:
+    def test_version_output(self) -> None:
+        result = runner.invoke(app, ["version"])
+        assert result.exit_code == 0
+        assert "arbiter" in result.output
+        assert "0.3.0" in result.output
+
+
+class TestReviewCommand:
+    def test_file_not_found(self) -> None:
+        result = runner.invoke(app, ["review", "nonexistent.diff"])
+        assert result.exit_code == 1
+        assert "File not found" in result.output
+
+    def test_empty_diff_warning(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "empty.diff"
+        diff_file.write_text("")
+
+        result = runner.invoke(app, ["review", str(diff_file)])
+        assert result.exit_code == 0
+        assert "Empty diff" in result.output
+
+    def test_policy_not_found(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        result = runner.invoke(app, ["review", str(diff_file), "--policy", "nonexistent.yaml"])
+        assert result.exit_code == 1
+        assert "Policy file not found" in result.output
+
+    def test_reads_from_stdin(self) -> None:
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return()
+            result = runner.invoke(app, ["review", "-"], input="+ added line\n")
+
+        assert result.exit_code == 0
+
+    def test_json_output_format(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return()
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "json"])
+
+        assert result.exit_code == 0
+        assert '"verdict"' in result.output
+        assert '"findings"' in result.output
+
+    def test_markdown_output_format(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return()
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
+
+        assert result.exit_code == 0
+        assert "# Arbiter Review" in result.output
+
+    def test_loads_policy_file(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        policy_file = tmp_path / "policy.yaml"
+        policy_file.write_text("""
+version: "1.0"
+agents:
+  security:
+    enabled: true
+  style:
+    enabled: false
+  complexity:
+    enabled: false
+""")
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return()
+            result = runner.invoke(app, ["review", str(diff_file), "--policy", str(policy_file)])
+
+        assert result.exit_code == 0
+        # Verify policy was passed to _run_review
+        call_args = mock_run.call_args
+        policy = call_args[0][1]  # Second positional arg is policy
+        assert len(policy.get_enabled_agents()) == 1
+
+    def test_model_override(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return()
+            result = runner.invoke(app, ["review", str(diff_file), "--model", "gpt-4o-mini"])
+
+        assert result.exit_code == 0
+        # Verify model was set in policy
+        call_args = mock_run.call_args
+        policy = call_args[0][1]
+        for config in policy.agents.values():
+            assert config.model == "gpt-4o-mini"
+
+    def test_static_analysis_flag(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return()
+            result = runner.invoke(app, ["review", str(diff_file), "--no-static-analysis"])
+
+        assert result.exit_code == 0
+        # Verify static_analysis was False
+        call_args = mock_run.call_args
+        assert call_args.kwargs.get("static_analysis") is False
+
+
+class TestNoArgsHelp:
+    def test_no_args_shows_help(self) -> None:
+        result = runner.invoke(app, [])
+        assert result.exit_code == 0
+        assert "A multi-agent code review system" in result.output
+
+
+class TestOutputFormatting:
+    def test_severity_color(self) -> None:
+        assert _severity_color(Severity.CRITICAL) == "red bold"
+        assert _severity_color(Severity.HIGH) == "red"
+        assert _severity_color(Severity.MEDIUM) == "yellow"
+        assert _severity_color(Severity.LOW) == "blue"
+        assert _severity_color(Severity.INFO) == "dim"
+
+    def test_severity_icon(self) -> None:
+        assert _severity_icon(Severity.CRITICAL) == "!!"
+        assert _severity_icon(Severity.HIGH) == "!"
+        assert _severity_icon(Severity.MEDIUM) == "*"
+        assert _severity_icon(Severity.LOW) == "-"
+        assert _severity_icon(Severity.INFO) == "i"
+
+    def test_verdict_color(self) -> None:
+        assert _verdict_color(Verdict.APPROVE) == "green"
+        assert _verdict_color(Verdict.COMMENT) == "yellow"
+        assert _verdict_color(Verdict.REQUEST_CHANGES) == "red"
+
+    def test_verdict_icon(self) -> None:
+        assert _verdict_icon(Verdict.APPROVE) == "[ok]"
+        assert _verdict_icon(Verdict.COMMENT) == "[..]"
+        assert _verdict_icon(Verdict.REQUEST_CHANGES) == "[!!]"
+
+
+class TestRichOutput:
+    def test_rich_format_with_findings(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        finding = Finding(
+            id="test-finding-1",
+            agent=AgentName.SECURITY,
+            file="test.py",
+            line_start=10,
+            line_end=15,
+            severity=Severity.HIGH,
+            confidence=0.9,
+            title="SQL Injection",
+            description="User input in query",
+            reasoning="Direct concatenation",
+            prompt_version="test-v1.0",
+        )
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return(findings=[finding])
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
+
+        assert result.exit_code == 0
+
+    def test_rich_format_no_findings(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return()
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
+
+        assert result.exit_code == 0
+        assert "No issues found" in result.output
+
+    def test_rich_format_critical_findings(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        finding = Finding(
+            id="test-finding-1",
+            agent=AgentName.SECURITY,
+            file="test.py",
+            line_start=10,
+            line_end=10,
+            severity=Severity.CRITICAL,
+            confidence=0.95,
+            title="Critical Issue",
+            description="This is critical",
+            reasoning="Very bad",
+            suggestion="Fix it immediately",
+            prompt_version="test-v1.0",
+        )
+
+        deliberation = DeliberationResult(
+            findings=[finding],
+            verdict=Verdict.REQUEST_CHANGES,
+            verdict_confidence=0.95,
+            verdict_reasoning="Critical issue found",
+            total_findings=1,
+            critical_count=1,
+        )
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = (
+                [
+                    ReviewResult(
+                        agent_name=AgentName.SECURITY,
+                        findings=[finding],
+                        duration_ms=100,
+                        tokens_used=100,
+                        cost_usd=0.001,
+                    )
+                ],
+                deliberation,
+            )
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
+
+        assert result.exit_code == 0
+
+
+class TestMarkdownOutput:
+    def test_markdown_with_findings(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        finding = Finding(
+            id="test-finding-1",
+            agent=AgentName.SECURITY,
+            file="test.py",
+            line_start=10,
+            line_end=15,
+            severity=Severity.HIGH,
+            confidence=0.9,
+            title="SQL Injection",
+            description="User input in query",
+            reasoning="Direct concatenation",
+            suggestion="Use parameterized queries",
+            prompt_version="test-v1.0",
+        )
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return(findings=[finding])
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
+
+        assert result.exit_code == 0
+        assert "## Findings" in result.output
+        assert "SQL Injection" in result.output
+
+    def test_markdown_verdict_badges(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        for verdict in [Verdict.APPROVE, Verdict.COMMENT, Verdict.REQUEST_CHANGES]:
+            deliberation = DeliberationResult(
+                verdict=verdict,
+                verdict_confidence=0.9,
+                verdict_reasoning="Test",
+            )
+
+            with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+                mock_run.return_value = (
+                    [
+                        ReviewResult(
+                            agent_name=AgentName.SECURITY,
+                            findings=[],
+                            duration_ms=100,
+                            tokens_used=100,
+                            cost_usd=0.001,
+                        )
+                    ],
+                    deliberation,
+                )
+                result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
+
+            assert result.exit_code == 0
+            assert verdict.value.upper() in result.output
+
+
+class TestJsonOutput:
+    def test_json_with_conflicts(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        from arbiter.deliberation.conflicts import Conflict, ConflictNature
+
+        conflict = Conflict(
+            id="test-conflict",
+            finding_ids=["f1", "f2"],
+            nature=ConflictNature.TRADE_OFF,
+            description="Test conflict",
+            severity_weight=0.8,
+        )
+
+        deliberation = DeliberationResult(
+            verdict=Verdict.COMMENT,
+            verdict_confidence=0.7,
+            verdict_reasoning="Conflicts found",
+            conflicts=[conflict],
+        )
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = (
+                [
+                    ReviewResult(
+                        agent_name=AgentName.SECURITY,
+                        findings=[],
+                        duration_ms=100,
+                        tokens_used=100,
+                        cost_usd=0.001,
+                    )
+                ],
+                deliberation,
+            )
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "json"])
+
+        assert result.exit_code == 0
+        output = json.loads(result.output)
+        assert "conflicts" in output
+        assert len(output["conflicts"]) == 1
+
+
+class TestWorkDirHandling:
+    def test_work_dir_option(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        work_dir = tmp_path / "src"
+        work_dir.mkdir()
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = make_mock_return()
+            result = runner.invoke(app, ["review", str(diff_file), "--work-dir", str(work_dir)])
+
+        assert result.exit_code == 0
+        call_args = mock_run.call_args
+        assert call_args.kwargs.get("work_dir") == work_dir.resolve()
+
+
+class TestRichOutputWithConflicts:
+    def test_rich_conflicts(self, tmp_path: Path) -> None:
+        from arbiter.deliberation.conflicts import Conflict, ConflictNature
+        from arbiter.deliberation.synthesis import Resolution
+
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        conflict = Conflict(
+            id="test-conflict",
+            finding_ids=["f1", "f2"],
+            nature=ConflictNature.TRADE_OFF,
+            description="Security vs complexity trade-off",
+            severity_weight=0.8,
+        )
+
+        resolution = Resolution(
+            conflict_id="test-conflict",
+            decision="prefer_first",
+            reasoning="Security takes priority",
+            confidence=0.9,
+        )
+
+        deliberation = DeliberationResult(
+            verdict=Verdict.COMMENT,
+            verdict_confidence=0.7,
+            verdict_reasoning="Conflicts found",
+            conflicts=[conflict],
+            resolutions=[resolution],
+        )
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = (
+                [
+                    ReviewResult(
+                        agent_name=AgentName.SECURITY,
+                        findings=[],
+                        duration_ms=100,
+                        tokens_used=100,
+                        cost_usd=0.001,
+                    )
+                ],
+                deliberation,
+            )
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
+
+        assert result.exit_code == 0
+
+
+class TestMarkdownOutputWithConflicts:
+    def test_markdown_conflicts(self, tmp_path: Path) -> None:
+        from arbiter.deliberation.conflicts import Conflict, ConflictNature
+        from arbiter.deliberation.synthesis import Resolution
+
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        conflict = Conflict(
+            id="test-conflict",
+            finding_ids=["f1", "f2"],
+            nature=ConflictNature.CONTRADICTORY,
+            description="Contradictory recommendations",
+            severity_weight=0.9,
+        )
+
+        resolution = Resolution(
+            conflict_id="test-conflict",
+            decision="merge",
+            reasoning="Both concerns addressed by combined fix",
+            merged_suggestion="Do both things",
+            confidence=0.85,
+        )
+
+        deliberation = DeliberationResult(
+            verdict=Verdict.COMMENT,
+            verdict_confidence=0.7,
+            verdict_reasoning="Conflicts found",
+            conflicts=[conflict],
+            resolutions=[resolution],
+        )
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = (
+                [
+                    ReviewResult(
+                        agent_name=AgentName.SECURITY,
+                        findings=[],
+                        duration_ms=100,
+                        tokens_used=100,
+                        cost_usd=0.001,
+                    )
+                ],
+                deliberation,
+            )
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
+
+        assert result.exit_code == 0
+        assert "## Conflicts" in result.output
+        assert "Contradictory" in result.output
+        assert "Resolution" in result.output
+
+    def test_markdown_findings(self, tmp_path: Path) -> None:
+        diff_file = tmp_path / "test.diff"
+        diff_file.write_text("+ some change")
+
+        findings = [
+            Finding(
+                id="f1",
+                agent=AgentName.SECURITY,
+                file="test.py",
+                line_start=10,
+                line_end=15,
+                severity=Severity.HIGH,
+                confidence=0.9,
+                title="Security Issue",
+                description="Vulnerable code",
+                reasoning="Bad pattern",
+                suggestion="Fix it this way",
+                prompt_version="test-v1.0",
+            ),
+            Finding(
+                id="f2",
+                agent=AgentName.STYLE,
+                file="test.py",
+                line_start=20,
+                line_end=25,
+                severity=Severity.LOW,
+                confidence=0.8,
+                title="Style Issue",
+                description="Could be cleaner",
+                reasoning="Convention",
+                prompt_version="test-v1.0",
+            ),
+        ]
+
+        deliberation = DeliberationResult(
+            findings=findings,
+            verdict=Verdict.COMMENT,
+            verdict_confidence=0.75,
+            verdict_reasoning="Issues found",
+            total_findings=2,
+        )
+
+        with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
+            mock_run.return_value = (
+                [
+                    ReviewResult(
+                        agent_name=AgentName.SECURITY,
+                        findings=[findings[0]],
+                        duration_ms=100,
+                        tokens_used=100,
+                        cost_usd=0.001,
+                    ),
+                    ReviewResult(
+                        agent_name=AgentName.STYLE,
+                        findings=[findings[1]],
+                        duration_ms=100,
+                        tokens_used=100,
+                        cost_usd=0.001,
+                    ),
+                ],
+                deliberation,
+            )
+            result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
+
+        assert result.exit_code == 0
+        assert "Security Issue" in result.output
+        assert "Style Issue" in result.output
+        assert "Fix it this way" in result.output