feat(agents): implement agent framework and CLI

This commit is contained in:
2025-03-08 15:52:29 +00:00
parent 72268ff440
commit f22ca1d5bd
30 changed files with 3466 additions and 0 deletions

561
tests/test_cli.py Normal file
View File

@@ -0,0 +1,561 @@
"""Tests for CLI commands."""
import json
from pathlib import Path
from unittest.mock import AsyncMock, patch
from typer.testing import CliRunner
from arbiter.cli import (
_severity_color,
_severity_icon,
_verdict_color,
_verdict_icon,
app,
)
from arbiter.deliberation import DeliberationResult
from arbiter.models import AgentName, Finding, ReviewResult, Severity, Verdict
runner = CliRunner()
def make_mock_return(
findings: list[Finding] | None = None, verdict: Verdict = Verdict.APPROVE
) -> tuple[list[ReviewResult], DeliberationResult]:
"""Create a mock return value for _run_review."""
agent_results = [
ReviewResult(
agent_name=AgentName.SECURITY,
findings=findings or [],
duration_ms=100,
tokens_used=100,
cost_usd=0.001,
)
]
deliberation_result = DeliberationResult(
findings=findings or [],
verdict=verdict,
verdict_confidence=0.9,
verdict_reasoning="Test reasoning",
total_findings=len(findings) if findings else 0,
)
return agent_results, deliberation_result
class TestVersionCommand:
def test_version_output(self) -> None:
result = runner.invoke(app, ["version"])
assert result.exit_code == 0
assert "arbiter" in result.output
assert "0.3.0" in result.output
class TestReviewCommand:
def test_file_not_found(self) -> None:
result = runner.invoke(app, ["review", "nonexistent.diff"])
assert result.exit_code == 1
assert "File not found" in result.output
def test_empty_diff_warning(self, tmp_path: Path) -> None:
diff_file = tmp_path / "empty.diff"
diff_file.write_text("")
result = runner.invoke(app, ["review", str(diff_file)])
assert result.exit_code == 0
assert "Empty diff" in result.output
def test_policy_not_found(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
result = runner.invoke(app, ["review", str(diff_file), "--policy", "nonexistent.yaml"])
assert result.exit_code == 1
assert "Policy file not found" in result.output
def test_reads_from_stdin(self) -> None:
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return()
result = runner.invoke(app, ["review", "-"], input="+ added line\n")
assert result.exit_code == 0
def test_json_output_format(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return()
result = runner.invoke(app, ["review", str(diff_file), "--format", "json"])
assert result.exit_code == 0
assert '"verdict"' in result.output
assert '"findings"' in result.output
def test_markdown_output_format(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return()
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
assert result.exit_code == 0
assert "# Arbiter Review" in result.output
def test_loads_policy_file(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
policy_file = tmp_path / "policy.yaml"
policy_file.write_text("""
version: "1.0"
agents:
security:
enabled: true
style:
enabled: false
complexity:
enabled: false
""")
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return()
result = runner.invoke(app, ["review", str(diff_file), "--policy", str(policy_file)])
assert result.exit_code == 0
# Verify policy was passed to _run_review
call_args = mock_run.call_args
policy = call_args[0][1] # Second positional arg is policy
assert len(policy.get_enabled_agents()) == 1
def test_model_override(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return()
result = runner.invoke(app, ["review", str(diff_file), "--model", "gpt-4o-mini"])
assert result.exit_code == 0
# Verify model was set in policy
call_args = mock_run.call_args
policy = call_args[0][1]
for config in policy.agents.values():
assert config.model == "gpt-4o-mini"
def test_static_analysis_flag(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return()
result = runner.invoke(app, ["review", str(diff_file), "--no-static-analysis"])
assert result.exit_code == 0
# Verify static_analysis was False
call_args = mock_run.call_args
assert call_args.kwargs.get("static_analysis") is False
class TestNoArgsHelp:
def test_no_args_shows_help(self) -> None:
result = runner.invoke(app, [])
assert result.exit_code == 0
assert "A multi-agent code review system" in result.output
class TestOutputFormatting:
def test_severity_color(self) -> None:
assert _severity_color(Severity.CRITICAL) == "red bold"
assert _severity_color(Severity.HIGH) == "red"
assert _severity_color(Severity.MEDIUM) == "yellow"
assert _severity_color(Severity.LOW) == "blue"
assert _severity_color(Severity.INFO) == "dim"
def test_severity_icon(self) -> None:
assert _severity_icon(Severity.CRITICAL) == "!!"
assert _severity_icon(Severity.HIGH) == "!"
assert _severity_icon(Severity.MEDIUM) == "*"
assert _severity_icon(Severity.LOW) == "-"
assert _severity_icon(Severity.INFO) == "i"
def test_verdict_color(self) -> None:
assert _verdict_color(Verdict.APPROVE) == "green"
assert _verdict_color(Verdict.COMMENT) == "yellow"
assert _verdict_color(Verdict.REQUEST_CHANGES) == "red"
def test_verdict_icon(self) -> None:
assert _verdict_icon(Verdict.APPROVE) == "[ok]"
assert _verdict_icon(Verdict.COMMENT) == "[..]"
assert _verdict_icon(Verdict.REQUEST_CHANGES) == "[!!]"
class TestRichOutput:
def test_rich_format_with_findings(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
finding = Finding(
id="test-finding-1",
agent=AgentName.SECURITY,
file="test.py",
line_start=10,
line_end=15,
severity=Severity.HIGH,
confidence=0.9,
title="SQL Injection",
description="User input in query",
reasoning="Direct concatenation",
prompt_version="test-v1.0",
)
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return(findings=[finding])
result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
assert result.exit_code == 0
def test_rich_format_no_findings(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return()
result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
assert result.exit_code == 0
assert "No issues found" in result.output
def test_rich_format_critical_findings(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
finding = Finding(
id="test-finding-1",
agent=AgentName.SECURITY,
file="test.py",
line_start=10,
line_end=10,
severity=Severity.CRITICAL,
confidence=0.95,
title="Critical Issue",
description="This is critical",
reasoning="Very bad",
suggestion="Fix it immediately",
prompt_version="test-v1.0",
)
deliberation = DeliberationResult(
findings=[finding],
verdict=Verdict.REQUEST_CHANGES,
verdict_confidence=0.95,
verdict_reasoning="Critical issue found",
total_findings=1,
critical_count=1,
)
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
[
ReviewResult(
agent_name=AgentName.SECURITY,
findings=[finding],
duration_ms=100,
tokens_used=100,
cost_usd=0.001,
)
],
deliberation,
)
result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
assert result.exit_code == 0
class TestMarkdownOutput:
def test_markdown_with_findings(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
finding = Finding(
id="test-finding-1",
agent=AgentName.SECURITY,
file="test.py",
line_start=10,
line_end=15,
severity=Severity.HIGH,
confidence=0.9,
title="SQL Injection",
description="User input in query",
reasoning="Direct concatenation",
suggestion="Use parameterized queries",
prompt_version="test-v1.0",
)
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return(findings=[finding])
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
assert result.exit_code == 0
assert "## Findings" in result.output
assert "SQL Injection" in result.output
def test_markdown_verdict_badges(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
for verdict in [Verdict.APPROVE, Verdict.COMMENT, Verdict.REQUEST_CHANGES]:
deliberation = DeliberationResult(
verdict=verdict,
verdict_confidence=0.9,
verdict_reasoning="Test",
)
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
[
ReviewResult(
agent_name=AgentName.SECURITY,
findings=[],
duration_ms=100,
tokens_used=100,
cost_usd=0.001,
)
],
deliberation,
)
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
assert result.exit_code == 0
assert verdict.value.upper() in result.output
class TestJsonOutput:
def test_json_with_conflicts(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
from arbiter.deliberation.conflicts import Conflict, ConflictNature
conflict = Conflict(
id="test-conflict",
finding_ids=["f1", "f2"],
nature=ConflictNature.TRADE_OFF,
description="Test conflict",
severity_weight=0.8,
)
deliberation = DeliberationResult(
verdict=Verdict.COMMENT,
verdict_confidence=0.7,
verdict_reasoning="Conflicts found",
conflicts=[conflict],
)
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
[
ReviewResult(
agent_name=AgentName.SECURITY,
findings=[],
duration_ms=100,
tokens_used=100,
cost_usd=0.001,
)
],
deliberation,
)
result = runner.invoke(app, ["review", str(diff_file), "--format", "json"])
assert result.exit_code == 0
output = json.loads(result.output)
assert "conflicts" in output
assert len(output["conflicts"]) == 1
class TestWorkDirHandling:
def test_work_dir_option(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
work_dir = tmp_path / "src"
work_dir.mkdir()
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = make_mock_return()
result = runner.invoke(app, ["review", str(diff_file), "--work-dir", str(work_dir)])
assert result.exit_code == 0
call_args = mock_run.call_args
assert call_args.kwargs.get("work_dir") == work_dir.resolve()
class TestRichOutputWithConflicts:
def test_rich_conflicts(self, tmp_path: Path) -> None:
from arbiter.deliberation.conflicts import Conflict, ConflictNature
from arbiter.deliberation.synthesis import Resolution
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
conflict = Conflict(
id="test-conflict",
finding_ids=["f1", "f2"],
nature=ConflictNature.TRADE_OFF,
description="Security vs complexity trade-off",
severity_weight=0.8,
)
resolution = Resolution(
conflict_id="test-conflict",
decision="prefer_first",
reasoning="Security takes priority",
confidence=0.9,
)
deliberation = DeliberationResult(
verdict=Verdict.COMMENT,
verdict_confidence=0.7,
verdict_reasoning="Conflicts found",
conflicts=[conflict],
resolutions=[resolution],
)
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
[
ReviewResult(
agent_name=AgentName.SECURITY,
findings=[],
duration_ms=100,
tokens_used=100,
cost_usd=0.001,
)
],
deliberation,
)
result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
assert result.exit_code == 0
class TestMarkdownOutputWithConflicts:
def test_markdown_conflicts(self, tmp_path: Path) -> None:
from arbiter.deliberation.conflicts import Conflict, ConflictNature
from arbiter.deliberation.synthesis import Resolution
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
conflict = Conflict(
id="test-conflict",
finding_ids=["f1", "f2"],
nature=ConflictNature.CONTRADICTORY,
description="Contradictory recommendations",
severity_weight=0.9,
)
resolution = Resolution(
conflict_id="test-conflict",
decision="merge",
reasoning="Both concerns addressed by combined fix",
merged_suggestion="Do both things",
confidence=0.85,
)
deliberation = DeliberationResult(
verdict=Verdict.COMMENT,
verdict_confidence=0.7,
verdict_reasoning="Conflicts found",
conflicts=[conflict],
resolutions=[resolution],
)
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
[
ReviewResult(
agent_name=AgentName.SECURITY,
findings=[],
duration_ms=100,
tokens_used=100,
cost_usd=0.001,
)
],
deliberation,
)
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
assert result.exit_code == 0
assert "## Conflicts" in result.output
assert "Contradictory" in result.output
assert "Resolution" in result.output
def test_markdown_findings(self, tmp_path: Path) -> None:
diff_file = tmp_path / "test.diff"
diff_file.write_text("+ some change")
findings = [
Finding(
id="f1",
agent=AgentName.SECURITY,
file="test.py",
line_start=10,
line_end=15,
severity=Severity.HIGH,
confidence=0.9,
title="Security Issue",
description="Vulnerable code",
reasoning="Bad pattern",
suggestion="Fix it this way",
prompt_version="test-v1.0",
),
Finding(
id="f2",
agent=AgentName.STYLE,
file="test.py",
line_start=20,
line_end=25,
severity=Severity.LOW,
confidence=0.8,
title="Style Issue",
description="Could be cleaner",
reasoning="Convention",
prompt_version="test-v1.0",
),
]
deliberation = DeliberationResult(
findings=findings,
verdict=Verdict.COMMENT,
verdict_confidence=0.75,
verdict_reasoning="Issues found",
total_findings=2,
)
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (
[
ReviewResult(
agent_name=AgentName.SECURITY,
findings=[findings[0]],
duration_ms=100,
tokens_used=100,
cost_usd=0.001,
),
ReviewResult(
agent_name=AgentName.STYLE,
findings=[findings[1]],
duration_ms=100,
tokens_used=100,
cost_usd=0.001,
),
],
deliberation,
)
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
assert result.exit_code == 0
assert "Security Issue" in result.output
assert "Style Issue" in result.output
assert "Fix it this way" in result.output