"""Tests for CLI commands.""" import json from pathlib import Path from unittest.mock import AsyncMock, patch from typer.testing import CliRunner from arbiter.cli import ( _severity_color, _severity_icon, _verdict_color, _verdict_icon, app, ) from arbiter.deliberation import DeliberationResult from arbiter.models import AgentName, Finding, ReviewResult, Severity, Verdict runner = CliRunner() def make_mock_return( findings: list[Finding] | None = None, verdict: Verdict = Verdict.APPROVE ) -> tuple[list[ReviewResult], DeliberationResult]: """Create a mock return value for _run_review.""" agent_results = [ ReviewResult( agent_name=AgentName.SECURITY, findings=findings or [], duration_ms=100, tokens_used=100, cost_usd=0.001, ) ] deliberation_result = DeliberationResult( findings=findings or [], verdict=verdict, verdict_confidence=0.9, verdict_reasoning="Test reasoning", total_findings=len(findings) if findings else 0, ) return agent_results, deliberation_result class TestVersionCommand: def test_version_output(self) -> None: result = runner.invoke(app, ["version"]) assert result.exit_code == 0 assert "arbiter" in result.output assert "0.3.0" in result.output class TestReviewCommand: def test_file_not_found(self) -> None: result = runner.invoke(app, ["review", "nonexistent.diff"]) assert result.exit_code == 1 assert "File not found" in result.output def test_empty_diff_warning(self, tmp_path: Path) -> None: diff_file = tmp_path / "empty.diff" diff_file.write_text("") result = runner.invoke(app, ["review", str(diff_file)]) assert result.exit_code == 0 assert "Empty diff" in result.output def test_policy_not_found(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") result = runner.invoke(app, ["review", str(diff_file), "--policy", "nonexistent.yaml"]) assert result.exit_code == 1 assert "Policy file not found" in result.output def test_reads_from_stdin(self) -> None: with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return() result = runner.invoke(app, ["review", "-"], input="+ added line\n") assert result.exit_code == 0 def test_json_output_format(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return() result = runner.invoke(app, ["review", str(diff_file), "--format", "json"]) assert result.exit_code == 0 assert '"verdict"' in result.output assert '"findings"' in result.output def test_markdown_output_format(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return() result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"]) assert result.exit_code == 0 assert "# Arbiter Review" in result.output def test_loads_policy_file(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") policy_file = tmp_path / "policy.yaml" policy_file.write_text(""" version: "1.0" agents: security: enabled: true style: enabled: false complexity: enabled: false """) with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return() result = runner.invoke(app, ["review", str(diff_file), "--policy", str(policy_file)]) assert result.exit_code == 0 # Verify policy was passed to _run_review call_args = mock_run.call_args policy = call_args[0][1] # Second positional arg is policy assert len(policy.get_enabled_agents()) == 1 def test_model_override(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return() result = runner.invoke(app, ["review", str(diff_file), "--model", "gpt-4o-mini"]) assert result.exit_code == 0 # Verify model was set in policy call_args = mock_run.call_args policy = call_args[0][1] for config in policy.agents.values(): assert config.model == "gpt-4o-mini" def test_static_analysis_flag(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return() result = runner.invoke(app, ["review", str(diff_file), "--no-static-analysis"]) assert result.exit_code == 0 # Verify static_analysis was False call_args = mock_run.call_args assert call_args.kwargs.get("static_analysis") is False class TestNoArgsHelp: def test_no_args_shows_help(self) -> None: result = runner.invoke(app, []) assert result.exit_code == 0 assert "A multi-agent code review system" in result.output class TestOutputFormatting: def test_severity_color(self) -> None: assert _severity_color(Severity.CRITICAL) == "red bold" assert _severity_color(Severity.HIGH) == "red" assert _severity_color(Severity.MEDIUM) == "yellow" assert _severity_color(Severity.LOW) == "blue" assert _severity_color(Severity.INFO) == "dim" def test_severity_icon(self) -> None: assert _severity_icon(Severity.CRITICAL) == "!!" assert _severity_icon(Severity.HIGH) == "!" assert _severity_icon(Severity.MEDIUM) == "*" assert _severity_icon(Severity.LOW) == "-" assert _severity_icon(Severity.INFO) == "i" def test_verdict_color(self) -> None: assert _verdict_color(Verdict.APPROVE) == "green" assert _verdict_color(Verdict.COMMENT) == "yellow" assert _verdict_color(Verdict.REQUEST_CHANGES) == "red" def test_verdict_icon(self) -> None: assert _verdict_icon(Verdict.APPROVE) == "[ok]" assert _verdict_icon(Verdict.COMMENT) == "[..]" assert _verdict_icon(Verdict.REQUEST_CHANGES) == "[!!]" class TestRichOutput: def test_rich_format_with_findings(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") finding = Finding( id="test-finding-1", agent=AgentName.SECURITY, file="test.py", line_start=10, line_end=15, severity=Severity.HIGH, confidence=0.9, title="SQL Injection", description="User input in query", reasoning="Direct concatenation", prompt_version="test-v1.0", ) with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return(findings=[finding]) result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"]) assert result.exit_code == 0 def test_rich_format_no_findings(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return() result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"]) assert result.exit_code == 0 assert "No issues found" in result.output def test_rich_format_critical_findings(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") finding = Finding( id="test-finding-1", agent=AgentName.SECURITY, file="test.py", line_start=10, line_end=10, severity=Severity.CRITICAL, confidence=0.95, title="Critical Issue", description="This is critical", reasoning="Very bad", suggestion="Fix it immediately", prompt_version="test-v1.0", ) deliberation = DeliberationResult( findings=[finding], verdict=Verdict.REQUEST_CHANGES, verdict_confidence=0.95, verdict_reasoning="Critical issue found", total_findings=1, critical_count=1, ) with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = ( [ ReviewResult( agent_name=AgentName.SECURITY, findings=[finding], duration_ms=100, tokens_used=100, cost_usd=0.001, ) ], deliberation, ) result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"]) assert result.exit_code == 0 class TestMarkdownOutput: def test_markdown_with_findings(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") finding = Finding( id="test-finding-1", agent=AgentName.SECURITY, file="test.py", line_start=10, line_end=15, severity=Severity.HIGH, confidence=0.9, title="SQL Injection", description="User input in query", reasoning="Direct concatenation", suggestion="Use parameterized queries", prompt_version="test-v1.0", ) with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return(findings=[finding]) result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"]) assert result.exit_code == 0 assert "## Findings" in result.output assert "SQL Injection" in result.output def test_markdown_verdict_badges(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") for verdict in [Verdict.APPROVE, Verdict.COMMENT, Verdict.REQUEST_CHANGES]: deliberation = DeliberationResult( verdict=verdict, verdict_confidence=0.9, verdict_reasoning="Test", ) with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = ( [ ReviewResult( agent_name=AgentName.SECURITY, findings=[], duration_ms=100, tokens_used=100, cost_usd=0.001, ) ], deliberation, ) result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"]) assert result.exit_code == 0 assert verdict.value.upper() in result.output class TestJsonOutput: def test_json_with_conflicts(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") from arbiter.deliberation.conflicts import Conflict, ConflictNature conflict = Conflict( id="test-conflict", finding_ids=["f1", "f2"], nature=ConflictNature.TRADE_OFF, description="Test conflict", severity_weight=0.8, ) deliberation = DeliberationResult( verdict=Verdict.COMMENT, verdict_confidence=0.7, verdict_reasoning="Conflicts found", conflicts=[conflict], ) with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = ( [ ReviewResult( agent_name=AgentName.SECURITY, findings=[], duration_ms=100, tokens_used=100, cost_usd=0.001, ) ], deliberation, ) result = runner.invoke(app, ["review", str(diff_file), "--format", "json"]) assert result.exit_code == 0 output = json.loads(result.output) assert "conflicts" in output assert len(output["conflicts"]) == 1 class TestWorkDirHandling: def test_work_dir_option(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") work_dir = tmp_path / "src" work_dir.mkdir() with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = make_mock_return() result = runner.invoke(app, ["review", str(diff_file), "--work-dir", str(work_dir)]) assert result.exit_code == 0 call_args = mock_run.call_args assert call_args.kwargs.get("work_dir") == work_dir.resolve() class TestRichOutputWithConflicts: def test_rich_conflicts(self, tmp_path: Path) -> None: from arbiter.deliberation.conflicts import Conflict, ConflictNature from arbiter.deliberation.synthesis import Resolution diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") conflict = Conflict( id="test-conflict", finding_ids=["f1", "f2"], nature=ConflictNature.TRADE_OFF, description="Security vs complexity trade-off", severity_weight=0.8, ) resolution = Resolution( conflict_id="test-conflict", decision="prefer_first", reasoning="Security takes priority", confidence=0.9, ) deliberation = DeliberationResult( verdict=Verdict.COMMENT, verdict_confidence=0.7, verdict_reasoning="Conflicts found", conflicts=[conflict], resolutions=[resolution], ) with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = ( [ ReviewResult( agent_name=AgentName.SECURITY, findings=[], duration_ms=100, tokens_used=100, cost_usd=0.001, ) ], deliberation, ) result = runner.invoke(app, ["review", str(diff_file), "--format", "rich"]) assert result.exit_code == 0 class TestMarkdownOutputWithConflicts: def test_markdown_conflicts(self, tmp_path: Path) -> None: from arbiter.deliberation.conflicts import Conflict, ConflictNature from arbiter.deliberation.synthesis import Resolution diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") conflict = Conflict( id="test-conflict", finding_ids=["f1", "f2"], nature=ConflictNature.CONTRADICTORY, description="Contradictory recommendations", severity_weight=0.9, ) resolution = Resolution( conflict_id="test-conflict", decision="merge", reasoning="Both concerns addressed by combined fix", merged_suggestion="Do both things", confidence=0.85, ) deliberation = DeliberationResult( verdict=Verdict.COMMENT, verdict_confidence=0.7, verdict_reasoning="Conflicts found", conflicts=[conflict], resolutions=[resolution], ) with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = ( [ ReviewResult( agent_name=AgentName.SECURITY, findings=[], duration_ms=100, tokens_used=100, cost_usd=0.001, ) ], deliberation, ) result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"]) assert result.exit_code == 0 assert "## Conflicts" in result.output assert "Contradictory" in result.output assert "Resolution" in result.output def test_markdown_findings(self, tmp_path: Path) -> None: diff_file = tmp_path / "test.diff" diff_file.write_text("+ some change") findings = [ Finding( id="f1", agent=AgentName.SECURITY, file="test.py", line_start=10, line_end=15, severity=Severity.HIGH, confidence=0.9, title="Security Issue", description="Vulnerable code", reasoning="Bad pattern", suggestion="Fix it this way", prompt_version="test-v1.0", ), Finding( id="f2", agent=AgentName.STYLE, file="test.py", line_start=20, line_end=25, severity=Severity.LOW, confidence=0.8, title="Style Issue", description="Could be cleaner", reasoning="Convention", prompt_version="test-v1.0", ), ] deliberation = DeliberationResult( findings=findings, verdict=Verdict.COMMENT, verdict_confidence=0.75, verdict_reasoning="Issues found", total_findings=2, ) with patch("arbiter.cli._run_review", new_callable=AsyncMock) as mock_run: mock_run.return_value = ( [ ReviewResult( agent_name=AgentName.SECURITY, findings=[findings[0]], duration_ms=100, tokens_used=100, cost_usd=0.001, ), ReviewResult( agent_name=AgentName.STYLE, findings=[findings[1]], duration_ms=100, tokens_used=100, cost_usd=0.001, ), ], deliberation, ) result = runner.invoke(app, ["review", str(diff_file), "--format", "markdown"]) assert result.exit_code == 0 assert "Security Issue" in result.output assert "Style Issue" in result.output assert "Fix it this way" in result.output