"""Tests for review agents.""" import pytest from arbiter.agents import ComplexityAgent, ReviewContext, SecurityAgent, StyleAgent from arbiter.llm.prompts import PromptRegistry from arbiter.models import AgentConfig, AgentName, Policy, Severity from tests.conftest import MockLLMClient class TestSecurityAgent: @pytest.mark.asyncio async def test_review_returns_result( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=["[]"]) agent = SecurityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ some code", policy=Policy()) result = await agent.review(context) assert result.agent_name == AgentName.SECURITY assert result.findings == [] assert result.duration_ms >= 0 assert result.tokens_used == 150 # 100 in + 50 out from mock assert result.cost_usd == 0.001 @pytest.mark.asyncio async def test_parses_json_findings( self, prompt_registry: PromptRegistry, ) -> None: response = """```json [ { "file": "src/auth.py", "line_start": 10, "line_end": 15, "severity": "high", "confidence": 0.9, "title": "SQL Injection", "description": "User input concatenated", "reasoning": "Allows SQL injection", "suggestion": "Use parameterized queries", "references": ["https://owasp.org"] } ] ```""" mock_llm = MockLLMClient(responses=[response]) agent = SecurityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ query = ...", policy=Policy()) result = await agent.review(context) assert len(result.findings) == 1 finding = result.findings[0] assert finding.file == "src/auth.py" assert finding.severity == Severity.HIGH assert finding.confidence == 0.9 assert finding.title == "SQL Injection" @pytest.mark.asyncio async def test_uses_configured_model( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=["[]"]) agent = SecurityAgent(mock_llm, prompt_registry) policy = Policy( agents={ AgentName.SECURITY: AgentConfig(model="gpt-4o-mini"), AgentName.STYLE: AgentConfig(), AgentName.COMPLEXITY: AgentConfig(), } ) context = ReviewContext(diff="+ code", policy=policy) await agent.review(context) assert mock_llm.calls[0]["model"] == "gpt-4o-mini" @pytest.mark.asyncio async def test_filters_by_severity( self, prompt_registry: PromptRegistry, ) -> None: response = """[ {"file": "a.py", "line_start": 1, "line_end": 1, "severity": "high", "confidence": 0.9, "title": "High", "description": "", "reasoning": ""}, {"file": "b.py", "line_start": 1, "line_end": 1, "severity": "low", "confidence": 0.9, "title": "Low", "description": "", "reasoning": ""}, {"file": "c.py", "line_start": 1, "line_end": 1, "severity": "info", "confidence": 0.9, "title": "Info", "description": "", "reasoning": ""} ]""" mock_llm = MockLLMClient(responses=[response]) agent = SecurityAgent(mock_llm, prompt_registry) policy = Policy( agents={ AgentName.SECURITY: AgentConfig(severity_threshold=Severity.MEDIUM), AgentName.STYLE: AgentConfig(), AgentName.COMPLEXITY: AgentConfig(), } ) context = ReviewContext(diff="+ code", policy=policy) result = await agent.review(context) # Only high severity should pass (medium threshold filters low and info) assert len(result.findings) == 1 assert result.findings[0].severity == Severity.HIGH class TestStyleAgent: @pytest.mark.asyncio async def test_review_returns_result( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=["[]"]) agent = StyleAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ some code", policy=Policy()) result = await agent.review(context) assert result.agent_name == AgentName.STYLE assert result.findings == [] @pytest.mark.asyncio async def test_uses_default_model( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=["[]"]) agent = StyleAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ code", policy=Policy()) await agent.review(context) assert mock_llm.calls[0]["model"] == "gpt-4o-mini" class TestComplexityAgent: @pytest.mark.asyncio async def test_review_returns_result( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=["[]"]) agent = ComplexityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ some code", policy=Policy()) result = await agent.review(context) assert result.agent_name == AgentName.COMPLEXITY assert result.findings == [] @pytest.mark.asyncio async def test_parses_complexity_findings( self, prompt_registry: PromptRegistry, ) -> None: response = """[ { "file": "processor.py", "line_start": 1, "line_end": 50, "severity": "medium", "confidence": 0.8, "title": "High cyclomatic complexity", "description": "Function has 15 branches", "reasoning": "Makes testing and maintenance difficult" } ]""" mock_llm = MockLLMClient(responses=[response]) agent = ComplexityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ complex code", policy=Policy()) result = await agent.review(context) assert len(result.findings) == 1 assert result.findings[0].severity == Severity.MEDIUM assert "complexity" in result.findings[0].title.lower() class TestAgentResponseParsing: @pytest.mark.asyncio async def test_handles_empty_response( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=[""]) agent = SecurityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ code", policy=Policy()) result = await agent.review(context) assert result.findings == [] @pytest.mark.asyncio async def test_handles_invalid_json( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=["not valid json"]) agent = SecurityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ code", policy=Policy()) result = await agent.review(context) assert result.findings == [] @pytest.mark.asyncio async def test_handles_json_without_code_block( self, prompt_registry: PromptRegistry, ) -> None: response = '[{"file": "a.py", "line_start": 1, "line_end": 1, "severity": "info", "confidence": 0.5, "title": "Test", "description": "", "reasoning": ""}]' mock_llm = MockLLMClient(responses=[response]) agent = SecurityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ code", policy=Policy()) result = await agent.review(context) assert len(result.findings) == 1 @pytest.mark.asyncio async def test_handles_malformed_finding( self, prompt_registry: PromptRegistry, ) -> None: response = """[ {"file": "a.py", "line_start": 1, "severity": "invalid_severity", "confidence": 0.5, "title": "Bad", "description": "", "reasoning": ""}, {"file": "b.py", "line_start": 1, "line_end": 1, "severity": "info", "confidence": 0.5, "title": "Valid", "description": "", "reasoning": ""} ]""" mock_llm = MockLLMClient(responses=[response]) agent = SecurityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ code", policy=Policy()) result = await agent.review(context) # Only the valid finding should be included (first has invalid severity) assert len(result.findings) == 1 assert result.findings[0].title == "Valid" @pytest.mark.asyncio async def test_includes_prompt_additions( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=["[]"]) agent = SecurityAgent(mock_llm, prompt_registry) policy = Policy( agents={ AgentName.SECURITY: AgentConfig(prompt_additions="Focus on authentication"), AgentName.STYLE: AgentConfig(), AgentName.COMPLEXITY: AgentConfig(), } ) context = ReviewContext(diff="+ code", policy=policy) await agent.review(context) message_content = mock_llm.calls[0]["messages"][0]["content"] assert "Focus on authentication" in message_content @pytest.mark.asyncio async def test_handles_non_list_json( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=['{"not": "a list"}']) agent = SecurityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ code", policy=Policy()) result = await agent.review(context) assert result.findings == [] @pytest.mark.asyncio async def test_handles_non_dict_items( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=['["string", 123, null]']) agent = SecurityAgent(mock_llm, prompt_registry) context = ReviewContext(diff="+ code", policy=Policy()) result = await agent.review(context) assert result.findings == [] @pytest.mark.asyncio async def test_agent_without_config_uses_defaults( self, prompt_registry: PromptRegistry, ) -> None: mock_llm = MockLLMClient(responses=["[]"]) agent = SecurityAgent(mock_llm, prompt_registry) # Create policy with empty agents dict policy = Policy(agents={}) context = ReviewContext(diff="+ code", policy=policy) result = await agent.review(context) # Should use default model (gpt-4o for security) assert mock_llm.calls[0]["model"] == "gpt-4o" assert result.findings == []