conversation detection tests
This commit is contained in:
152
tests/test_conversation_detection.py
Normal file
152
tests/test_conversation_detection.py
Normal file
@@ -0,0 +1,152 @@
|
||||
"""Tests for question detection in PR comments."""
|
||||
|
||||
import pytest
|
||||
|
||||
from arbiter.conversation.detection import QuestionDetector
|
||||
from arbiter.models import AgentName, Finding, Severity
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def detector() -> QuestionDetector:
|
||||
return QuestionDetector(confidence_threshold=0.5)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_findings() -> list[Finding]:
|
||||
return [
|
||||
Finding(
|
||||
id="f1234567-1234-1234-1234-123456789abc",
|
||||
agent=AgentName.SECURITY,
|
||||
file="src/auth.py",
|
||||
line_start=10,
|
||||
line_end=15,
|
||||
severity=Severity.HIGH,
|
||||
confidence=0.9,
|
||||
title="SQL Injection vulnerability",
|
||||
description="User input directly concatenated into SQL query",
|
||||
reasoning="String concatenation allows SQL injection",
|
||||
prompt_version="security-v1.0",
|
||||
),
|
||||
Finding(
|
||||
id="f2345678-2345-2345-2345-234567890bcd",
|
||||
agent=AgentName.STYLE,
|
||||
file="src/auth.py",
|
||||
line_start=20,
|
||||
line_end=25,
|
||||
severity=Severity.LOW,
|
||||
confidence=0.8,
|
||||
title="Inconsistent naming convention",
|
||||
description="Variable name does not follow snake_case",
|
||||
reasoning="PEP 8 recommends snake_case for variables",
|
||||
prompt_version="style-v1.0",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
class TestQuestionDetection:
|
||||
def test_detects_simple_question(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Why is this a problem?")
|
||||
assert analysis.is_question is True
|
||||
|
||||
def test_detects_question_with_why(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Why did you flag this line")
|
||||
assert analysis.is_question is True
|
||||
|
||||
def test_detects_question_with_how(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("How can I fix this issue?")
|
||||
assert analysis.is_question is True
|
||||
|
||||
def test_detects_question_with_what(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("What does this mean?")
|
||||
assert analysis.is_question is True
|
||||
|
||||
def test_detects_explain_request(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Please explain this finding")
|
||||
assert analysis.is_question is True
|
||||
|
||||
def test_statement_not_detected(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("I fixed the issue.")
|
||||
assert analysis.is_question is False
|
||||
|
||||
def test_empty_string(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("")
|
||||
assert analysis.is_question is False
|
||||
|
||||
|
||||
class TestArbiterDirected:
|
||||
def test_at_arbiter_mention(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("@arbiter Why is this flagged?")
|
||||
assert analysis.is_directed_at_arbiter is True
|
||||
|
||||
def test_arbiter_keyword(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Arbiter, can you explain?")
|
||||
assert analysis.is_directed_at_arbiter is True
|
||||
|
||||
def test_not_directed(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Why is this a problem?")
|
||||
assert analysis.is_directed_at_arbiter is False
|
||||
|
||||
|
||||
class TestAgentMentions:
|
||||
def test_security_keywords(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Why is this a security vulnerability?")
|
||||
assert AgentName.SECURITY in analysis.mentioned_agents
|
||||
|
||||
def test_style_keywords(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Is this naming convention wrong?")
|
||||
assert AgentName.STYLE in analysis.mentioned_agents
|
||||
|
||||
def test_complexity_keywords(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("How can I refactor to reduce complexity?")
|
||||
assert AgentName.COMPLEXITY in analysis.mentioned_agents
|
||||
|
||||
def test_multiple_agents(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Is this a security vulnerability or just a style issue?")
|
||||
assert AgentName.SECURITY in analysis.mentioned_agents
|
||||
assert AgentName.STYLE in analysis.mentioned_agents
|
||||
|
||||
|
||||
class TestFindingReferences:
|
||||
def test_extract_finding_id(
|
||||
self, detector: QuestionDetector, sample_findings: list[Finding]
|
||||
) -> None:
|
||||
analysis = detector.analyze(
|
||||
"Can you explain finding f1234567-1234-1234-1234-123456789abc?",
|
||||
findings=sample_findings,
|
||||
)
|
||||
assert "f1234567-1234-1234-1234-123456789abc" in analysis.mentioned_finding_ids
|
||||
|
||||
def test_invalid_finding_id(
|
||||
self, detector: QuestionDetector, sample_findings: list[Finding]
|
||||
) -> None:
|
||||
analysis = detector.analyze(
|
||||
"What about finding 00000000-0000-0000-0000-000000000000?",
|
||||
findings=sample_findings,
|
||||
)
|
||||
assert len(analysis.mentioned_finding_ids) == 0
|
||||
|
||||
|
||||
class TestConfidence:
|
||||
def test_high_confidence_directed_question(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("@arbiter Why is this a security issue?")
|
||||
assert analysis.confidence >= 0.8
|
||||
|
||||
def test_lower_confidence_generic_question(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Why is this flagged?")
|
||||
assert 0.4 <= analysis.confidence < 0.8
|
||||
|
||||
def test_zero_confidence_non_question(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("I fixed the issue.")
|
||||
assert analysis.confidence == 0.0
|
||||
|
||||
|
||||
class TestQuestionTextExtraction:
|
||||
def test_removes_at_mentions(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("@arbiter @someone Why is this wrong?")
|
||||
assert "@arbiter" not in analysis.question_text
|
||||
assert "@someone" not in analysis.question_text
|
||||
assert "Why is this wrong?" in analysis.question_text
|
||||
|
||||
def test_collapses_whitespace(self, detector: QuestionDetector) -> None:
|
||||
analysis = detector.analyze("Why is this wrong?")
|
||||
assert " " not in analysis.question_text
|
||||
Reference in New Issue
Block a user