feat(agents): implement agent framework and CLI

This commit is contained in:
2025-03-08 15:52:29 +00:00
parent 72268ff440
commit f22ca1d5bd
30 changed files with 3466 additions and 0 deletions

305
tests/test_agents.py Normal file
View File

@@ -0,0 +1,305 @@
"""Tests for review agents."""
import pytest
from arbiter.agents import ComplexityAgent, ReviewContext, SecurityAgent, StyleAgent
from arbiter.llm.prompts import PromptRegistry
from arbiter.models import AgentConfig, AgentName, Policy, Severity
from tests.conftest import MockLLMClient
class TestSecurityAgent:
@pytest.mark.asyncio
async def test_review_returns_result(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=["[]"])
agent = SecurityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ some code", policy=Policy())
result = await agent.review(context)
assert result.agent_name == AgentName.SECURITY
assert result.findings == []
assert result.duration_ms >= 0
assert result.tokens_used == 150 # 100 in + 50 out from mock
assert result.cost_usd == 0.001
@pytest.mark.asyncio
async def test_parses_json_findings(
self,
prompt_registry: PromptRegistry,
) -> None:
response = """```json
[
{
"file": "src/auth.py",
"line_start": 10,
"line_end": 15,
"severity": "high",
"confidence": 0.9,
"title": "SQL Injection",
"description": "User input concatenated",
"reasoning": "Allows SQL injection",
"suggestion": "Use parameterized queries",
"references": ["https://owasp.org"]
}
]
```"""
mock_llm = MockLLMClient(responses=[response])
agent = SecurityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ query = ...", policy=Policy())
result = await agent.review(context)
assert len(result.findings) == 1
finding = result.findings[0]
assert finding.file == "src/auth.py"
assert finding.severity == Severity.HIGH
assert finding.confidence == 0.9
assert finding.title == "SQL Injection"
@pytest.mark.asyncio
async def test_uses_configured_model(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=["[]"])
agent = SecurityAgent(mock_llm, prompt_registry)
policy = Policy(
agents={
AgentName.SECURITY: AgentConfig(model="gpt-4o-mini"),
AgentName.STYLE: AgentConfig(),
AgentName.COMPLEXITY: AgentConfig(),
}
)
context = ReviewContext(diff="+ code", policy=policy)
await agent.review(context)
assert mock_llm.calls[0]["model"] == "gpt-4o-mini"
@pytest.mark.asyncio
async def test_filters_by_severity(
self,
prompt_registry: PromptRegistry,
) -> None:
response = """[
{"file": "a.py", "line_start": 1, "line_end": 1, "severity": "high", "confidence": 0.9, "title": "High", "description": "", "reasoning": ""},
{"file": "b.py", "line_start": 1, "line_end": 1, "severity": "low", "confidence": 0.9, "title": "Low", "description": "", "reasoning": ""},
{"file": "c.py", "line_start": 1, "line_end": 1, "severity": "info", "confidence": 0.9, "title": "Info", "description": "", "reasoning": ""}
]"""
mock_llm = MockLLMClient(responses=[response])
agent = SecurityAgent(mock_llm, prompt_registry)
policy = Policy(
agents={
AgentName.SECURITY: AgentConfig(severity_threshold=Severity.MEDIUM),
AgentName.STYLE: AgentConfig(),
AgentName.COMPLEXITY: AgentConfig(),
}
)
context = ReviewContext(diff="+ code", policy=policy)
result = await agent.review(context)
# Only high severity should pass (medium threshold filters low and info)
assert len(result.findings) == 1
assert result.findings[0].severity == Severity.HIGH
class TestStyleAgent:
@pytest.mark.asyncio
async def test_review_returns_result(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=["[]"])
agent = StyleAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ some code", policy=Policy())
result = await agent.review(context)
assert result.agent_name == AgentName.STYLE
assert result.findings == []
@pytest.mark.asyncio
async def test_uses_default_model(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=["[]"])
agent = StyleAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ code", policy=Policy())
await agent.review(context)
assert mock_llm.calls[0]["model"] == "gpt-4o-mini"
class TestComplexityAgent:
@pytest.mark.asyncio
async def test_review_returns_result(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=["[]"])
agent = ComplexityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ some code", policy=Policy())
result = await agent.review(context)
assert result.agent_name == AgentName.COMPLEXITY
assert result.findings == []
@pytest.mark.asyncio
async def test_parses_complexity_findings(
self,
prompt_registry: PromptRegistry,
) -> None:
response = """[
{
"file": "processor.py",
"line_start": 1,
"line_end": 50,
"severity": "medium",
"confidence": 0.8,
"title": "High cyclomatic complexity",
"description": "Function has 15 branches",
"reasoning": "Makes testing and maintenance difficult"
}
]"""
mock_llm = MockLLMClient(responses=[response])
agent = ComplexityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ complex code", policy=Policy())
result = await agent.review(context)
assert len(result.findings) == 1
assert result.findings[0].severity == Severity.MEDIUM
assert "complexity" in result.findings[0].title.lower()
class TestAgentResponseParsing:
@pytest.mark.asyncio
async def test_handles_empty_response(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=[""])
agent = SecurityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ code", policy=Policy())
result = await agent.review(context)
assert result.findings == []
@pytest.mark.asyncio
async def test_handles_invalid_json(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=["not valid json"])
agent = SecurityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ code", policy=Policy())
result = await agent.review(context)
assert result.findings == []
@pytest.mark.asyncio
async def test_handles_json_without_code_block(
self,
prompt_registry: PromptRegistry,
) -> None:
response = '[{"file": "a.py", "line_start": 1, "line_end": 1, "severity": "info", "confidence": 0.5, "title": "Test", "description": "", "reasoning": ""}]'
mock_llm = MockLLMClient(responses=[response])
agent = SecurityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ code", policy=Policy())
result = await agent.review(context)
assert len(result.findings) == 1
@pytest.mark.asyncio
async def test_handles_malformed_finding(
self,
prompt_registry: PromptRegistry,
) -> None:
response = """[
{"file": "a.py", "line_start": 1, "severity": "invalid_severity", "confidence": 0.5, "title": "Bad", "description": "", "reasoning": ""},
{"file": "b.py", "line_start": 1, "line_end": 1, "severity": "info", "confidence": 0.5, "title": "Valid", "description": "", "reasoning": ""}
]"""
mock_llm = MockLLMClient(responses=[response])
agent = SecurityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ code", policy=Policy())
result = await agent.review(context)
# Only the valid finding should be included (first has invalid severity)
assert len(result.findings) == 1
assert result.findings[0].title == "Valid"
@pytest.mark.asyncio
async def test_includes_prompt_additions(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=["[]"])
agent = SecurityAgent(mock_llm, prompt_registry)
policy = Policy(
agents={
AgentName.SECURITY: AgentConfig(prompt_additions="Focus on authentication"),
AgentName.STYLE: AgentConfig(),
AgentName.COMPLEXITY: AgentConfig(),
}
)
context = ReviewContext(diff="+ code", policy=policy)
await agent.review(context)
message_content = mock_llm.calls[0]["messages"][0]["content"]
assert "Focus on authentication" in message_content
@pytest.mark.asyncio
async def test_handles_non_list_json(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=['{"not": "a list"}'])
agent = SecurityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ code", policy=Policy())
result = await agent.review(context)
assert result.findings == []
@pytest.mark.asyncio
async def test_handles_non_dict_items(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=['["string", 123, null]'])
agent = SecurityAgent(mock_llm, prompt_registry)
context = ReviewContext(diff="+ code", policy=Policy())
result = await agent.review(context)
assert result.findings == []
@pytest.mark.asyncio
async def test_agent_without_config_uses_defaults(
self,
prompt_registry: PromptRegistry,
) -> None:
mock_llm = MockLLMClient(responses=["[]"])
agent = SecurityAgent(mock_llm, prompt_registry)
# Create policy with empty agents dict
policy = Policy(agents={})
context = ReviewContext(diff="+ code", policy=policy)
result = await agent.review(context)
# Should use default model (gpt-4o for security)
assert mock_llm.calls[0]["model"] == "gpt-4o"
assert result.findings == []