feat(agents): implement agent framework and CLI
This commit is contained in:
305
tests/test_agents.py
Normal file
305
tests/test_agents.py
Normal file
@@ -0,0 +1,305 @@
|
||||
"""Tests for review agents."""
|
||||
|
||||
import pytest
|
||||
|
||||
from arbiter.agents import ComplexityAgent, ReviewContext, SecurityAgent, StyleAgent
|
||||
from arbiter.llm.prompts import PromptRegistry
|
||||
from arbiter.models import AgentConfig, AgentName, Policy, Severity
|
||||
from tests.conftest import MockLLMClient
|
||||
|
||||
|
||||
class TestSecurityAgent:
|
||||
@pytest.mark.asyncio
|
||||
async def test_review_returns_result(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=["[]"])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ some code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert result.agent_name == AgentName.SECURITY
|
||||
assert result.findings == []
|
||||
assert result.duration_ms >= 0
|
||||
assert result.tokens_used == 150 # 100 in + 50 out from mock
|
||||
assert result.cost_usd == 0.001
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parses_json_findings(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
response = """```json
|
||||
[
|
||||
{
|
||||
"file": "src/auth.py",
|
||||
"line_start": 10,
|
||||
"line_end": 15,
|
||||
"severity": "high",
|
||||
"confidence": 0.9,
|
||||
"title": "SQL Injection",
|
||||
"description": "User input concatenated",
|
||||
"reasoning": "Allows SQL injection",
|
||||
"suggestion": "Use parameterized queries",
|
||||
"references": ["https://owasp.org"]
|
||||
}
|
||||
]
|
||||
```"""
|
||||
mock_llm = MockLLMClient(responses=[response])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ query = ...", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert len(result.findings) == 1
|
||||
finding = result.findings[0]
|
||||
assert finding.file == "src/auth.py"
|
||||
assert finding.severity == Severity.HIGH
|
||||
assert finding.confidence == 0.9
|
||||
assert finding.title == "SQL Injection"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_uses_configured_model(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=["[]"])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
policy = Policy(
|
||||
agents={
|
||||
AgentName.SECURITY: AgentConfig(model="gpt-4o-mini"),
|
||||
AgentName.STYLE: AgentConfig(),
|
||||
AgentName.COMPLEXITY: AgentConfig(),
|
||||
}
|
||||
)
|
||||
context = ReviewContext(diff="+ code", policy=policy)
|
||||
|
||||
await agent.review(context)
|
||||
|
||||
assert mock_llm.calls[0]["model"] == "gpt-4o-mini"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_filters_by_severity(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
response = """[
|
||||
{"file": "a.py", "line_start": 1, "line_end": 1, "severity": "high", "confidence": 0.9, "title": "High", "description": "", "reasoning": ""},
|
||||
{"file": "b.py", "line_start": 1, "line_end": 1, "severity": "low", "confidence": 0.9, "title": "Low", "description": "", "reasoning": ""},
|
||||
{"file": "c.py", "line_start": 1, "line_end": 1, "severity": "info", "confidence": 0.9, "title": "Info", "description": "", "reasoning": ""}
|
||||
]"""
|
||||
mock_llm = MockLLMClient(responses=[response])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
policy = Policy(
|
||||
agents={
|
||||
AgentName.SECURITY: AgentConfig(severity_threshold=Severity.MEDIUM),
|
||||
AgentName.STYLE: AgentConfig(),
|
||||
AgentName.COMPLEXITY: AgentConfig(),
|
||||
}
|
||||
)
|
||||
context = ReviewContext(diff="+ code", policy=policy)
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
# Only high severity should pass (medium threshold filters low and info)
|
||||
assert len(result.findings) == 1
|
||||
assert result.findings[0].severity == Severity.HIGH
|
||||
|
||||
|
||||
class TestStyleAgent:
|
||||
@pytest.mark.asyncio
|
||||
async def test_review_returns_result(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=["[]"])
|
||||
agent = StyleAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ some code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert result.agent_name == AgentName.STYLE
|
||||
assert result.findings == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_uses_default_model(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=["[]"])
|
||||
agent = StyleAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ code", policy=Policy())
|
||||
|
||||
await agent.review(context)
|
||||
|
||||
assert mock_llm.calls[0]["model"] == "gpt-4o-mini"
|
||||
|
||||
|
||||
class TestComplexityAgent:
|
||||
@pytest.mark.asyncio
|
||||
async def test_review_returns_result(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=["[]"])
|
||||
agent = ComplexityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ some code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert result.agent_name == AgentName.COMPLEXITY
|
||||
assert result.findings == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parses_complexity_findings(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
response = """[
|
||||
{
|
||||
"file": "processor.py",
|
||||
"line_start": 1,
|
||||
"line_end": 50,
|
||||
"severity": "medium",
|
||||
"confidence": 0.8,
|
||||
"title": "High cyclomatic complexity",
|
||||
"description": "Function has 15 branches",
|
||||
"reasoning": "Makes testing and maintenance difficult"
|
||||
}
|
||||
]"""
|
||||
mock_llm = MockLLMClient(responses=[response])
|
||||
agent = ComplexityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ complex code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert len(result.findings) == 1
|
||||
assert result.findings[0].severity == Severity.MEDIUM
|
||||
assert "complexity" in result.findings[0].title.lower()
|
||||
|
||||
|
||||
class TestAgentResponseParsing:
|
||||
@pytest.mark.asyncio
|
||||
async def test_handles_empty_response(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=[""])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert result.findings == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handles_invalid_json(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=["not valid json"])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert result.findings == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handles_json_without_code_block(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
response = '[{"file": "a.py", "line_start": 1, "line_end": 1, "severity": "info", "confidence": 0.5, "title": "Test", "description": "", "reasoning": ""}]'
|
||||
mock_llm = MockLLMClient(responses=[response])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert len(result.findings) == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handles_malformed_finding(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
response = """[
|
||||
{"file": "a.py", "line_start": 1, "severity": "invalid_severity", "confidence": 0.5, "title": "Bad", "description": "", "reasoning": ""},
|
||||
{"file": "b.py", "line_start": 1, "line_end": 1, "severity": "info", "confidence": 0.5, "title": "Valid", "description": "", "reasoning": ""}
|
||||
]"""
|
||||
mock_llm = MockLLMClient(responses=[response])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
# Only the valid finding should be included (first has invalid severity)
|
||||
assert len(result.findings) == 1
|
||||
assert result.findings[0].title == "Valid"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_includes_prompt_additions(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=["[]"])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
policy = Policy(
|
||||
agents={
|
||||
AgentName.SECURITY: AgentConfig(prompt_additions="Focus on authentication"),
|
||||
AgentName.STYLE: AgentConfig(),
|
||||
AgentName.COMPLEXITY: AgentConfig(),
|
||||
}
|
||||
)
|
||||
context = ReviewContext(diff="+ code", policy=policy)
|
||||
|
||||
await agent.review(context)
|
||||
|
||||
message_content = mock_llm.calls[0]["messages"][0]["content"]
|
||||
assert "Focus on authentication" in message_content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handles_non_list_json(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=['{"not": "a list"}'])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert result.findings == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_handles_non_dict_items(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=['["string", 123, null]'])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
context = ReviewContext(diff="+ code", policy=Policy())
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
assert result.findings == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_agent_without_config_uses_defaults(
|
||||
self,
|
||||
prompt_registry: PromptRegistry,
|
||||
) -> None:
|
||||
mock_llm = MockLLMClient(responses=["[]"])
|
||||
agent = SecurityAgent(mock_llm, prompt_registry)
|
||||
# Create policy with empty agents dict
|
||||
policy = Policy(agents={})
|
||||
context = ReviewContext(diff="+ code", policy=policy)
|
||||
|
||||
result = await agent.review(context)
|
||||
|
||||
# Should use default model (gpt-4o for security)
|
||||
assert mock_llm.calls[0]["model"] == "gpt-4o"
|
||||
assert result.findings == []
|
||||
Reference in New Issue
Block a user