conversation detection tests

2025-05-27 20:13:02 +00:00
parent f31272736e
commit e4f814efff
3 changed files with 638 additions and 0 deletions
--- a/tests/test_conversation_detection.py
+++ b/tests/test_conversation_detection.py
@@ -0,0 +1,152 @@
+"""Tests for question detection in PR comments."""
+
+import pytest
+
+from arbiter.conversation.detection import QuestionDetector
+from arbiter.models import AgentName, Finding, Severity
+
+
+@pytest.fixture
+def detector() -> QuestionDetector:
+    return QuestionDetector(confidence_threshold=0.5)
+
+
+@pytest.fixture
+def sample_findings() -> list[Finding]:
+    return [
+        Finding(
+            id="f1234567-1234-1234-1234-123456789abc",
+            agent=AgentName.SECURITY,
+            file="src/auth.py",
+            line_start=10,
+            line_end=15,
+            severity=Severity.HIGH,
+            confidence=0.9,
+            title="SQL Injection vulnerability",
+            description="User input directly concatenated into SQL query",
+            reasoning="String concatenation allows SQL injection",
+            prompt_version="security-v1.0",
+        ),
+        Finding(
+            id="f2345678-2345-2345-2345-234567890bcd",
+            agent=AgentName.STYLE,
+            file="src/auth.py",
+            line_start=20,
+            line_end=25,
+            severity=Severity.LOW,
+            confidence=0.8,
+            title="Inconsistent naming convention",
+            description="Variable name does not follow snake_case",
+            reasoning="PEP 8 recommends snake_case for variables",
+            prompt_version="style-v1.0",
+        ),
+    ]
+
+
+class TestQuestionDetection:
+    def test_detects_simple_question(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Why is this a problem?")
+        assert analysis.is_question is True
+
+    def test_detects_question_with_why(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Why did you flag this line")
+        assert analysis.is_question is True
+
+    def test_detects_question_with_how(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("How can I fix this issue?")
+        assert analysis.is_question is True
+
+    def test_detects_question_with_what(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("What does this mean?")
+        assert analysis.is_question is True
+
+    def test_detects_explain_request(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Please explain this finding")
+        assert analysis.is_question is True
+
+    def test_statement_not_detected(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("I fixed the issue.")
+        assert analysis.is_question is False
+
+    def test_empty_string(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("")
+        assert analysis.is_question is False
+
+
+class TestArbiterDirected:
+    def test_at_arbiter_mention(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("@arbiter Why is this flagged?")
+        assert analysis.is_directed_at_arbiter is True
+
+    def test_arbiter_keyword(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Arbiter, can you explain?")
+        assert analysis.is_directed_at_arbiter is True
+
+    def test_not_directed(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Why is this a problem?")
+        assert analysis.is_directed_at_arbiter is False
+
+
+class TestAgentMentions:
+    def test_security_keywords(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Why is this a security vulnerability?")
+        assert AgentName.SECURITY in analysis.mentioned_agents
+
+    def test_style_keywords(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Is this naming convention wrong?")
+        assert AgentName.STYLE in analysis.mentioned_agents
+
+    def test_complexity_keywords(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("How can I refactor to reduce complexity?")
+        assert AgentName.COMPLEXITY in analysis.mentioned_agents
+
+    def test_multiple_agents(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Is this a security vulnerability or just a style issue?")
+        assert AgentName.SECURITY in analysis.mentioned_agents
+        assert AgentName.STYLE in analysis.mentioned_agents
+
+
+class TestFindingReferences:
+    def test_extract_finding_id(
+        self, detector: QuestionDetector, sample_findings: list[Finding]
+    ) -> None:
+        analysis = detector.analyze(
+            "Can you explain finding f1234567-1234-1234-1234-123456789abc?",
+            findings=sample_findings,
+        )
+        assert "f1234567-1234-1234-1234-123456789abc" in analysis.mentioned_finding_ids
+
+    def test_invalid_finding_id(
+        self, detector: QuestionDetector, sample_findings: list[Finding]
+    ) -> None:
+        analysis = detector.analyze(
+            "What about finding 00000000-0000-0000-0000-000000000000?",
+            findings=sample_findings,
+        )
+        assert len(analysis.mentioned_finding_ids) == 0
+
+
+class TestConfidence:
+    def test_high_confidence_directed_question(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("@arbiter Why is this a security issue?")
+        assert analysis.confidence >= 0.8
+
+    def test_lower_confidence_generic_question(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Why is this flagged?")
+        assert 0.4 <= analysis.confidence < 0.8
+
+    def test_zero_confidence_non_question(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("I fixed the issue.")
+        assert analysis.confidence == 0.0
+
+
+class TestQuestionTextExtraction:
+    def test_removes_at_mentions(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("@arbiter @someone Why is this wrong?")
+        assert "@arbiter" not in analysis.question_text
+        assert "@someone" not in analysis.question_text
+        assert "Why is this wrong?" in analysis.question_text
+
+    def test_collapses_whitespace(self, detector: QuestionDetector) -> None:
+        analysis = detector.analyze("Why    is   this   wrong?")
+        assert "  " not in analysis.question_text