feat(agents): implement agent framework and CLI
This commit is contained in:
561
tests/test_cli.py
Normal file
561
tests/test_cli.py
Normal file
@@ -0,0 +1,561 @@
|
||||
"""Tests for CLI commands."""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from arbiter.cli import (
|
||||
_severity_color,
|
||||
_severity_icon,
|
||||
_verdict_color,
|
||||
_verdict_icon,
|
||||
app,
|
||||
)
|
||||
from arbiter.deliberation import DeliberationResult
|
||||
from arbiter.models import AgentName, Finding, ReviewResult, Severity, Verdict
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
|
||||
def make_mock_return(
|
||||
findings: list[Finding] | None = None, verdict: Verdict = Verdict.APPROVE
|
||||
) -> tuple[list[ReviewResult], DeliberationResult]:
|
||||
"""Create a mock return value for _run_review."""
|
||||
agent_results = [
|
||||
ReviewResult(
|
||||
agent_name=AgentName.SECURITY,
|
||||
findings=findings or [],
|
||||
duration_ms=100,
|
||||
tokens_used=100,
|
||||
cost_usd=0.001,
|
||||
)
|
||||
]
|
||||
deliberation_result = DeliberationResult(
|
||||
findings=findings or [],
|
||||
verdict=verdict,
|
||||
verdict_confidence=0.9,
|
||||
verdict_reasoning="Test reasoning",
|
||||
total_findings=len(findings) if findings else 0,
|
||||
)
|
||||
return agent_results, deliberation_result
|
||||
|
||||
|
||||
class TestVersionCommand:
|
||||
def test_version_output(self) -> None:
|
||||
result = runner.invoke(app, ["version"])
|
||||
assert result.exit_code == 0
|
||||
assert "arbiter" in result.output
|
||||
assert "0.3.0" in result.output
|
||||
|
||||
|
||||
class TestReviewCommand:
|
||||
def test_file_not_found(self) -> None:
|
||||
result = runner.invoke(app, ["review", "nonexistent.diff"])
|
||||
assert result.exit_code == 1
|
||||
assert "File not found" in result.output
|
||||
|
||||
def test_empty_diff_warning(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "empty.diff"
|
||||
diff_file.write_text("")
|
||||
|
||||
result = runner.invoke(app, ["review", str(diff_file)])
|
||||
assert result.exit_code == 0
|
||||
assert "Empty diff" in result.output
|
||||
|
||||
def test_policy_not_found(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--policy", "nonexistent.yaml"])
|
||||
assert result.exit_code == 1
|
||||
assert "Policy file not found" in result.output
|
||||
|
||||
def test_reads_from_stdin(self) -> None:
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return()
|
||||
result = runner.invoke(app, ["review", "-"], input="+ added line\n")
|
||||
|
||||
assert result.exit_code == 0
|
||||
|
||||
def test_json_output_format(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return()
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "json"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert '"verdict"' in result.output
|
||||
assert '"findings"' in result.output
|
||||
|
||||
def test_markdown_output_format(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return()
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "# Arbiter Review" in result.output
|
||||
|
||||
def test_loads_policy_file(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
policy_file = tmp_path / "policy.yaml"
|
||||
policy_file.write_text("""
|
||||
version: "1.0"
|
||||
agents:
|
||||
security:
|
||||
enabled: true
|
||||
style:
|
||||
enabled: false
|
||||
complexity:
|
||||
enabled: false
|
||||
""")
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return()
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--policy", str(policy_file)])
|
||||
|
||||
assert result.exit_code == 0
|
||||
# Verify policy was passed to _run_review
|
||||
call_args = mock_run.call_args
|
||||
policy = call_args[0][1] # Second positional arg is policy
|
||||
assert len(policy.get_enabled_agents()) == 1
|
||||
|
||||
def test_model_override(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return()
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--model", "gpt-4o-mini"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
# Verify model was set in policy
|
||||
call_args = mock_run.call_args
|
||||
policy = call_args[0][1]
|
||||
for config in policy.agents.values():
|
||||
assert config.model == "gpt-4o-mini"
|
||||
|
||||
def test_static_analysis_flag(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return()
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--no-static-analysis"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
# Verify static_analysis was False
|
||||
call_args = mock_run.call_args
|
||||
assert call_args.kwargs.get("static_analysis") is False
|
||||
|
||||
|
||||
class TestNoArgsHelp:
|
||||
def test_no_args_shows_help(self) -> None:
|
||||
result = runner.invoke(app, [])
|
||||
assert result.exit_code == 0
|
||||
assert "A multi-agent code review system" in result.output
|
||||
|
||||
|
||||
class TestOutputFormatting:
|
||||
def test_severity_color(self) -> None:
|
||||
assert _severity_color(Severity.CRITICAL) == "red bold"
|
||||
assert _severity_color(Severity.HIGH) == "red"
|
||||
assert _severity_color(Severity.MEDIUM) == "yellow"
|
||||
assert _severity_color(Severity.LOW) == "blue"
|
||||
assert _severity_color(Severity.INFO) == "dim"
|
||||
|
||||
def test_severity_icon(self) -> None:
|
||||
assert _severity_icon(Severity.CRITICAL) == "!!"
|
||||
assert _severity_icon(Severity.HIGH) == "!"
|
||||
assert _severity_icon(Severity.MEDIUM) == "*"
|
||||
assert _severity_icon(Severity.LOW) == "-"
|
||||
assert _severity_icon(Severity.INFO) == "i"
|
||||
|
||||
def test_verdict_color(self) -> None:
|
||||
assert _verdict_color(Verdict.APPROVE) == "green"
|
||||
assert _verdict_color(Verdict.COMMENT) == "yellow"
|
||||
assert _verdict_color(Verdict.REQUEST_CHANGES) == "red"
|
||||
|
||||
def test_verdict_icon(self) -> None:
|
||||
assert _verdict_icon(Verdict.APPROVE) == "[ok]"
|
||||
assert _verdict_icon(Verdict.COMMENT) == "[..]"
|
||||
assert _verdict_icon(Verdict.REQUEST_CHANGES) == "[!!]"
|
||||
|
||||
|
||||
class TestRichOutput:
|
||||
def test_rich_format_with_findings(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
finding = Finding(
|
||||
id="test-finding-1",
|
||||
agent=AgentName.SECURITY,
|
||||
file="test.py",
|
||||
line_start=10,
|
||||
line_end=15,
|
||||
severity=Severity.HIGH,
|
||||
confidence=0.9,
|
||||
title="SQL Injection",
|
||||
description="User input in query",
|
||||
reasoning="Direct concatenation",
|
||||
prompt_version="test-v1.0",
|
||||
)
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return(findings=[finding])
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
|
||||
def test_rich_format_no_findings(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return()
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "No issues found" in result.output
|
||||
|
||||
def test_rich_format_critical_findings(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
finding = Finding(
|
||||
id="test-finding-1",
|
||||
agent=AgentName.SECURITY,
|
||||
file="test.py",
|
||||
line_start=10,
|
||||
line_end=10,
|
||||
severity=Severity.CRITICAL,
|
||||
confidence=0.95,
|
||||
title="Critical Issue",
|
||||
description="This is critical",
|
||||
reasoning="Very bad",
|
||||
suggestion="Fix it immediately",
|
||||
prompt_version="test-v1.0",
|
||||
)
|
||||
|
||||
deliberation = DeliberationResult(
|
||||
findings=[finding],
|
||||
verdict=Verdict.REQUEST_CHANGES,
|
||||
verdict_confidence=0.95,
|
||||
verdict_reasoning="Critical issue found",
|
||||
total_findings=1,
|
||||
critical_count=1,
|
||||
)
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
[
|
||||
ReviewResult(
|
||||
agent_name=AgentName.SECURITY,
|
||||
findings=[finding],
|
||||
duration_ms=100,
|
||||
tokens_used=100,
|
||||
cost_usd=0.001,
|
||||
)
|
||||
],
|
||||
deliberation,
|
||||
)
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
class TestMarkdownOutput:
|
||||
def test_markdown_with_findings(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
finding = Finding(
|
||||
id="test-finding-1",
|
||||
agent=AgentName.SECURITY,
|
||||
file="test.py",
|
||||
line_start=10,
|
||||
line_end=15,
|
||||
severity=Severity.HIGH,
|
||||
confidence=0.9,
|
||||
title="SQL Injection",
|
||||
description="User input in query",
|
||||
reasoning="Direct concatenation",
|
||||
suggestion="Use parameterized queries",
|
||||
prompt_version="test-v1.0",
|
||||
)
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return(findings=[finding])
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "## Findings" in result.output
|
||||
assert "SQL Injection" in result.output
|
||||
|
||||
def test_markdown_verdict_badges(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
for verdict in [Verdict.APPROVE, Verdict.COMMENT, Verdict.REQUEST_CHANGES]:
|
||||
deliberation = DeliberationResult(
|
||||
verdict=verdict,
|
||||
verdict_confidence=0.9,
|
||||
verdict_reasoning="Test",
|
||||
)
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
[
|
||||
ReviewResult(
|
||||
agent_name=AgentName.SECURITY,
|
||||
findings=[],
|
||||
duration_ms=100,
|
||||
tokens_used=100,
|
||||
cost_usd=0.001,
|
||||
)
|
||||
],
|
||||
deliberation,
|
||||
)
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert verdict.value.upper() in result.output
|
||||
|
||||
|
||||
class TestJsonOutput:
|
||||
def test_json_with_conflicts(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
from arbiter.deliberation.conflicts import Conflict, ConflictNature
|
||||
|
||||
conflict = Conflict(
|
||||
id="test-conflict",
|
||||
finding_ids=["f1", "f2"],
|
||||
nature=ConflictNature.TRADE_OFF,
|
||||
description="Test conflict",
|
||||
severity_weight=0.8,
|
||||
)
|
||||
|
||||
deliberation = DeliberationResult(
|
||||
verdict=Verdict.COMMENT,
|
||||
verdict_confidence=0.7,
|
||||
verdict_reasoning="Conflicts found",
|
||||
conflicts=[conflict],
|
||||
)
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
[
|
||||
ReviewResult(
|
||||
agent_name=AgentName.SECURITY,
|
||||
findings=[],
|
||||
duration_ms=100,
|
||||
tokens_used=100,
|
||||
cost_usd=0.001,
|
||||
)
|
||||
],
|
||||
deliberation,
|
||||
)
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "json"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
output = json.loads(result.output)
|
||||
assert "conflicts" in output
|
||||
assert len(output["conflicts"]) == 1
|
||||
|
||||
|
||||
class TestWorkDirHandling:
|
||||
def test_work_dir_option(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
work_dir = tmp_path / "src"
|
||||
work_dir.mkdir()
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = make_mock_return()
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--work-dir", str(work_dir)])
|
||||
|
||||
assert result.exit_code == 0
|
||||
call_args = mock_run.call_args
|
||||
assert call_args.kwargs.get("work_dir") == work_dir.resolve()
|
||||
|
||||
|
||||
class TestRichOutputWithConflicts:
|
||||
def test_rich_conflicts(self, tmp_path: Path) -> None:
|
||||
from arbiter.deliberation.conflicts import Conflict, ConflictNature
|
||||
from arbiter.deliberation.synthesis import Resolution
|
||||
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
conflict = Conflict(
|
||||
id="test-conflict",
|
||||
finding_ids=["f1", "f2"],
|
||||
nature=ConflictNature.TRADE_OFF,
|
||||
description="Security vs complexity trade-off",
|
||||
severity_weight=0.8,
|
||||
)
|
||||
|
||||
resolution = Resolution(
|
||||
conflict_id="test-conflict",
|
||||
decision="prefer_first",
|
||||
reasoning="Security takes priority",
|
||||
confidence=0.9,
|
||||
)
|
||||
|
||||
deliberation = DeliberationResult(
|
||||
verdict=Verdict.COMMENT,
|
||||
verdict_confidence=0.7,
|
||||
verdict_reasoning="Conflicts found",
|
||||
conflicts=[conflict],
|
||||
resolutions=[resolution],
|
||||
)
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
[
|
||||
ReviewResult(
|
||||
agent_name=AgentName.SECURITY,
|
||||
findings=[],
|
||||
duration_ms=100,
|
||||
tokens_used=100,
|
||||
cost_usd=0.001,
|
||||
)
|
||||
],
|
||||
deliberation,
|
||||
)
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
|
||||
|
||||
class TestMarkdownOutputWithConflicts:
|
||||
def test_markdown_conflicts(self, tmp_path: Path) -> None:
|
||||
from arbiter.deliberation.conflicts import Conflict, ConflictNature
|
||||
from arbiter.deliberation.synthesis import Resolution
|
||||
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
conflict = Conflict(
|
||||
id="test-conflict",
|
||||
finding_ids=["f1", "f2"],
|
||||
nature=ConflictNature.CONTRADICTORY,
|
||||
description="Contradictory recommendations",
|
||||
severity_weight=0.9,
|
||||
)
|
||||
|
||||
resolution = Resolution(
|
||||
conflict_id="test-conflict",
|
||||
decision="merge",
|
||||
reasoning="Both concerns addressed by combined fix",
|
||||
merged_suggestion="Do both things",
|
||||
confidence=0.85,
|
||||
)
|
||||
|
||||
deliberation = DeliberationResult(
|
||||
verdict=Verdict.COMMENT,
|
||||
verdict_confidence=0.7,
|
||||
verdict_reasoning="Conflicts found",
|
||||
conflicts=[conflict],
|
||||
resolutions=[resolution],
|
||||
)
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
[
|
||||
ReviewResult(
|
||||
agent_name=AgentName.SECURITY,
|
||||
findings=[],
|
||||
duration_ms=100,
|
||||
tokens_used=100,
|
||||
cost_usd=0.001,
|
||||
)
|
||||
],
|
||||
deliberation,
|
||||
)
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "## Conflicts" in result.output
|
||||
assert "Contradictory" in result.output
|
||||
assert "Resolution" in result.output
|
||||
|
||||
def test_markdown_findings(self, tmp_path: Path) -> None:
|
||||
diff_file = tmp_path / "test.diff"
|
||||
diff_file.write_text("+ some change")
|
||||
|
||||
findings = [
|
||||
Finding(
|
||||
id="f1",
|
||||
agent=AgentName.SECURITY,
|
||||
file="test.py",
|
||||
line_start=10,
|
||||
line_end=15,
|
||||
severity=Severity.HIGH,
|
||||
confidence=0.9,
|
||||
title="Security Issue",
|
||||
description="Vulnerable code",
|
||||
reasoning="Bad pattern",
|
||||
suggestion="Fix it this way",
|
||||
prompt_version="test-v1.0",
|
||||
),
|
||||
Finding(
|
||||
id="f2",
|
||||
agent=AgentName.STYLE,
|
||||
file="test.py",
|
||||
line_start=20,
|
||||
line_end=25,
|
||||
severity=Severity.LOW,
|
||||
confidence=0.8,
|
||||
title="Style Issue",
|
||||
description="Could be cleaner",
|
||||
reasoning="Convention",
|
||||
prompt_version="test-v1.0",
|
||||
),
|
||||
]
|
||||
|
||||
deliberation = DeliberationResult(
|
||||
findings=findings,
|
||||
verdict=Verdict.COMMENT,
|
||||
verdict_confidence=0.75,
|
||||
verdict_reasoning="Issues found",
|
||||
total_findings=2,
|
||||
)
|
||||
|
||||
with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (
|
||||
[
|
||||
ReviewResult(
|
||||
agent_name=AgentName.SECURITY,
|
||||
findings=[findings[0]],
|
||||
duration_ms=100,
|
||||
tokens_used=100,
|
||||
cost_usd=0.001,
|
||||
),
|
||||
ReviewResult(
|
||||
agent_name=AgentName.STYLE,
|
||||
findings=[findings[1]],
|
||||
duration_ms=100,
|
||||
tokens_used=100,
|
||||
cost_usd=0.001,
|
||||
),
|
||||
],
|
||||
deliberation,
|
||||
)
|
||||
result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"])
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "Security Issue" in result.output
|
||||
assert "Style Issue" in result.output
|
||||
assert "Fix it this way" in result.output
|
||||
Reference in New Issue
Block a user