Files
arbiter/tests/test_conversation_detection.py

153 lines
6.0 KiB
Python

"""Tests for question detection in PR comments."""
import pytest
from arbiter.conversation.detection import QuestionDetector
from arbiter.models import AgentName, Finding, Severity
@pytest.fixture
def detector() -> QuestionDetector:
return QuestionDetector(confidence_threshold=0.5)
@pytest.fixture
def sample_findings() -> list[Finding]:
return [
Finding(
id="f1234567-1234-1234-1234-123456789abc",
agent=AgentName.SECURITY,
file="src/auth.py",
line_start=10,
line_end=15,
severity=Severity.HIGH,
confidence=0.9,
title="SQL Injection vulnerability",
description="User input directly concatenated into SQL query",
reasoning="String concatenation allows SQL injection",
prompt_version="security-v1.0",
),
Finding(
id="f2345678-2345-2345-2345-234567890bcd",
agent=AgentName.STYLE,
file="src/auth.py",
line_start=20,
line_end=25,
severity=Severity.LOW,
confidence=0.8,
title="Inconsistent naming convention",
description="Variable name does not follow snake_case",
reasoning="PEP 8 recommends snake_case for variables",
prompt_version="style-v1.0",
),
]
class TestQuestionDetection:
def test_detects_simple_question(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Why is this a problem?")
assert analysis.is_question is True
def test_detects_question_with_why(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Why did you flag this line")
assert analysis.is_question is True
def test_detects_question_with_how(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("How can I fix this issue?")
assert analysis.is_question is True
def test_detects_question_with_what(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("What does this mean?")
assert analysis.is_question is True
def test_detects_explain_request(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Please explain this finding")
assert analysis.is_question is True
def test_statement_not_detected(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("I fixed the issue.")
assert analysis.is_question is False
def test_empty_string(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("")
assert analysis.is_question is False
class TestArbiterDirected:
def test_at_arbiter_mention(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("@arbiter Why is this flagged?")
assert analysis.is_directed_at_arbiter is True
def test_arbiter_keyword(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Arbiter, can you explain?")
assert analysis.is_directed_at_arbiter is True
def test_not_directed(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Why is this a problem?")
assert analysis.is_directed_at_arbiter is False
class TestAgentMentions:
def test_security_keywords(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Why is this a security vulnerability?")
assert AgentName.SECURITY in analysis.mentioned_agents
def test_style_keywords(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Is this naming convention wrong?")
assert AgentName.STYLE in analysis.mentioned_agents
def test_complexity_keywords(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("How can I refactor to reduce complexity?")
assert AgentName.COMPLEXITY in analysis.mentioned_agents
def test_multiple_agents(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Is this a security vulnerability or just a style issue?")
assert AgentName.SECURITY in analysis.mentioned_agents
assert AgentName.STYLE in analysis.mentioned_agents
class TestFindingReferences:
def test_extract_finding_id(
self, detector: QuestionDetector, sample_findings: list[Finding]
) -> None:
analysis = detector.analyze(
"Can you explain finding f1234567-1234-1234-1234-123456789abc?",
findings=sample_findings,
)
assert "f1234567-1234-1234-1234-123456789abc" in analysis.mentioned_finding_ids
def test_invalid_finding_id(
self, detector: QuestionDetector, sample_findings: list[Finding]
) -> None:
analysis = detector.analyze(
"What about finding 00000000-0000-0000-0000-000000000000?",
findings=sample_findings,
)
assert len(analysis.mentioned_finding_ids) == 0
class TestConfidence:
def test_high_confidence_directed_question(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("@arbiter Why is this a security issue?")
assert analysis.confidence >= 0.8
def test_lower_confidence_generic_question(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Why is this flagged?")
assert 0.4 <= analysis.confidence < 0.8
def test_zero_confidence_non_question(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("I fixed the issue.")
assert analysis.confidence == 0.0
class TestQuestionTextExtraction:
def test_removes_at_mentions(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("@arbiter @someone Why is this wrong?")
assert "@arbiter" not in analysis.question_text
assert "@someone" not in analysis.question_text
assert "Why is this wrong?" in analysis.question_text
def test_collapses_whitespace(self, detector: QuestionDetector) -> None:
analysis = detector.analyze("Why is this wrong?")
assert " " not in analysis.question_text