tests for api, worker, cache

2025-03-22 11:04:46 +00:00
parent 41ab2f04df
commit 9a23e2c9c4
5 changed files with 2655 additions and 0 deletions
@@ -0,0 +1,164 @@
 """Tests for the LLM cache module."""
 import pytest
 from arbiter.llm.cache import LLMCache, compute_policy_hash
 from arbiter.llm.client import LLMResponse
 class TestComputePolicyHash:
    def test_compute_policy_hash_deterministic(self) -> None:
        policy = {"agents": {"security": {"enabled": True}}}
        hash1 = compute_policy_hash(policy)
        hash2 = compute_policy_hash(policy)
        assert hash1 == hash2
    def test_policy_hash_varies(self) -> None:
        policy1 = {"agents": {"security": {"enabled": True}}}
        policy2 = {"agents": {"security": {"enabled": False}}}
        assert compute_policy_hash(policy1) != compute_policy_hash(policy2)
    def test_compute_policy_hash_format(self) -> None:
        policy = {"test": "data"}
        hash_value = compute_policy_hash(policy)
        assert len(hash_value) == 16
        assert all(c in "0123456789abcdef" for c in hash_value)
 class MockRedisForCache:
    """Mock Redis client for cache testing."""
    def __init__(self) -> None:
        self._data: dict[str, str] = {}
    async def get(self, key: str) -> str | None:
        return self._data.get(key)
    async def set(self, key: str, value: str, ex: int | None = None) -> bool:  # noqa: ARG002
        self._data[key] = value
        return True
    async def delete(self, key: str) -> int:
        if key in self._data:
            del self._data[key]
            return 1
        return 0
    def scan_iter(self, match: str | None = None):  # noqa: ARG002
        async def _gen():
            for key in list(self._data.keys()):
                yield key
        return _gen()
 class TestLLMCache:
    @pytest.fixture
    def cache(self) -> LLMCache:
        mock_redis = MockRedisForCache()
        return LLMCache(mock_redis)  # type: ignore[arg-type]
    def test_compute_key(self, cache: LLMCache) -> None:
        key = cache._compute_key("diff content", "security", "v1.0", "policy123")
        assert key.startswith("arbiter:llm:cache:")
        assert len(key) > 20  # prefix + hash
    def test_compute_key_deterministic(self, cache: LLMCache) -> None:
        key1 = cache._compute_key("diff", "security", "v1.0")
        key2 = cache._compute_key("diff", "security", "v1.0")
        assert key1 == key2
    def test_compute_key_unique(self, cache: LLMCache) -> None:
        key1 = cache._compute_key("diff1", "security", "v1.0")
        key2 = cache._compute_key("diff2", "security", "v1.0")
        key3 = cache._compute_key("diff1", "style", "v1.0")
        key4 = cache._compute_key("diff1", "security", "v2.0")
        assert len({key1, key2, key3, key4}) == 4
    def test_serialize_deserialize_response(self, cache: LLMCache) -> None:
        response = LLMResponse(
            content="test content",
            model="gpt-4o",
            tokens_in=100,
            tokens_out=50,
            cost_usd=0.01,
        )
        serialized = cache._serialize_response(response)
        deserialized = cache._deserialize_response(serialized)
        assert deserialized.content == response.content
        assert deserialized.model == response.model
        assert deserialized.tokens_in == response.tokens_in
        assert deserialized.tokens_out == response.tokens_out
        assert deserialized.cost_usd == response.cost_usd
    async def test_cache_get_miss(self, cache: LLMCache) -> None:
        result = await cache.get("diff", "security", "v1.0")
        assert result is None
        assert cache._misses == 1
        assert cache._hits == 0
    async def test_cache_set_and_get(self, cache: LLMCache) -> None:
        response = LLMResponse(
            content="cached content",
            model="gpt-4o",
            tokens_in=100,
            tokens_out=50,
            cost_usd=0.01,
        )
        await cache.set("diff", "security", "v1.0", response)
        result = await cache.get("diff", "security", "v1.0")
        assert result is not None
        assert result.content == "cached content"
        assert cache._hits == 1
    async def test_cache_invalidate(self, cache: LLMCache) -> None:
        response = LLMResponse(
            content="test",
            model="gpt-4o",
            tokens_in=100,
            tokens_out=50,
            cost_usd=0.01,
        )
        await cache.set("diff", "security", "v1.0", response)
        deleted = await cache.invalidate("diff", "security", "v1.0")
        assert deleted is True
        result = await cache.get("diff", "security", "v1.0")
        assert result is None
    async def test_cache_invalidate_nonexistent(self, cache: LLMCache) -> None:
        deleted = await cache.invalidate("nonexistent", "security", "v1.0")
        assert deleted is False
    def test_get_stats(self, cache: LLMCache) -> None:
        stats = cache.get_stats()
        assert stats["hits"] == 0
        assert stats["misses"] == 0
        assert stats["total"] == 0
        assert stats["hit_rate"] == 0.0
    async def test_get_stats_after_operations(self, cache: LLMCache) -> None:
        await cache.get("key1", "agent", "v1")  # miss
        await cache.get("key2", "agent", "v1")  # miss
        response = LLMResponse(
            content="test",
            model="gpt-4o",
            tokens_in=100,
            tokens_out=50,
            cost_usd=0.01,
        )
        await cache.set("key1", "agent", "v1", response)
        await cache.get("key1", "agent", "v1")  # hit
        stats = cache.get_stats()
        assert stats["hits"] == 1
        assert stats["misses"] == 2
        assert stats["total"] == 3
        assert stats["hit_rate"] == pytest.approx(1 / 3)
@@ -0,0 +1,157 @@
 """Tests for the cost tracking module."""
 import pytest
 from arbiter.models.cost import AgentCost, CostEstimate, ReviewCost
 from arbiter.models.enums import AgentName
 class TestAgentCost:
    def test_agent_cost_creation(self) -> None:
        cost = AgentCost(
            agent=AgentName.SECURITY,
            tokens_in=100,
            tokens_out=50,
            total_tokens=150,
            cost_usd=0.01,
        )
        assert cost.agent == AgentName.SECURITY
        assert cost.total_tokens == 150
        assert cost.cost_usd == 0.01
    def test_agent_cost_defaults(self) -> None:
        cost = AgentCost(agent=AgentName.STYLE)
        assert cost.tokens_in == 0
        assert cost.tokens_out == 0
        assert cost.total_tokens == 0
        assert cost.cost_usd == 0.0
 class TestReviewCost:
    def test_review_cost_defaults(self) -> None:
        cost = ReviewCost()
        assert cost.total_tokens == 0
        assert cost.total_cost_usd == 0.0
        assert cost.agent_costs == []
        assert cost.cache_hits == 0
        assert cost.cache_misses == 0
    def test_add_agent_cost(self) -> None:
        cost = ReviewCost()
        cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
        cost.add_agent_cost(AgentName.STYLE, tokens_in=80, tokens_out=40, cost_usd=0.008)
        assert len(cost.agent_costs) == 2
        assert cost.total_tokens_in == 180
        assert cost.total_tokens_out == 90
        assert cost.total_tokens == 270
        assert cost.total_cost_usd == pytest.approx(0.018)
    def test_add_deliberation_cost(self) -> None:
        cost = ReviewCost()
        cost.add_deliberation_cost(tokens_in=50, tokens_out=100, cost_usd=0.005)
        assert cost.deliberation_tokens_in == 50
        assert cost.deliberation_tokens_out == 100
        assert cost.deliberation_cost_usd == 0.005
        assert cost.total_tokens == 150
    def test_combined_costs(self) -> None:
        cost = ReviewCost()
        cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
        cost.add_deliberation_cost(tokens_in=50, tokens_out=25, cost_usd=0.005)
        assert cost.total_tokens_in == 150
        assert cost.total_tokens_out == 75
        assert cost.total_tokens == 225
        assert cost.total_cost_usd == pytest.approx(0.015)
    def test_to_agent_dict(self) -> None:
        cost = ReviewCost()
        cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
        cost.add_agent_cost(AgentName.STYLE, tokens_in=80, tokens_out=40, cost_usd=0.008)
        agent_dict = cost.to_agent_dict()
        assert agent_dict == {"security": 150, "style": 120}
    def test_to_cost_dict(self) -> None:
        cost = ReviewCost()
        cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
        cost.add_agent_cost(AgentName.STYLE, tokens_in=80, tokens_out=40, cost_usd=0.008)
        cost_dict = cost.to_cost_dict()
        assert cost_dict == {"security": 0.01, "style": 0.008}
    def test_is_within_budget_true(self) -> None:
        cost = ReviewCost()
        cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=0.01)
        assert cost.is_within_budget(max_tokens=1000, max_cost_usd=0.50) is True
    def test_is_within_budget_false_tokens(self) -> None:
        cost = ReviewCost()
        cost.add_agent_cost(AgentName.SECURITY, tokens_in=1000, tokens_out=500, cost_usd=0.01)
        assert cost.is_within_budget(max_tokens=1000, max_cost_usd=0.50) is False
    def test_is_within_budget_false_cost(self) -> None:
        cost = ReviewCost()
        cost.add_agent_cost(AgentName.SECURITY, tokens_in=100, tokens_out=50, cost_usd=1.0)
        assert cost.is_within_budget(max_tokens=10000, max_cost_usd=0.50) is False
 class TestCostEstimate:
    def test_estimate_small_diff(self) -> None:
        estimate = CostEstimate.estimate(
            diff_size=1000,
            agents=[AgentName.SECURITY, AgentName.STYLE],
            model="gpt-4o-mini",
        )
        assert estimate.estimated_tokens > 0
        assert estimate.estimated_cost_usd > 0
        assert estimate.agents_enabled == [AgentName.SECURITY, AgentName.STYLE]
        assert estimate.model == "gpt-4o-mini"
        assert estimate.within_budget is True
    def test_estimate_large_diff(self) -> None:
        estimate = CostEstimate.estimate(
            diff_size=100000,
            agents=[AgentName.SECURITY, AgentName.STYLE, AgentName.COMPLEXITY],
            model="gpt-4o",
            max_tokens=10000,
            max_cost_usd=0.10,
        )
        # Large diff with expensive model should exceed budget
        assert estimate.within_budget is False
    def test_estimate_gpt4o_vs_mini(self) -> None:
        estimate_4o = CostEstimate.estimate(
            diff_size=10000,
            agents=[AgentName.SECURITY],
            model="gpt-4o",
        )
        estimate_mini = CostEstimate.estimate(
            diff_size=10000,
            agents=[AgentName.SECURITY],
            model="gpt-4o-mini",
        )
        assert estimate_4o.estimated_cost_usd > estimate_mini.estimated_cost_usd
    def test_estimate_more_agents_higher_cost(self) -> None:
        estimate_one = CostEstimate.estimate(
            diff_size=5000,
            agents=[AgentName.SECURITY],
            model="gpt-4o",
        )
        estimate_three = CostEstimate.estimate(
            diff_size=5000,
            agents=[AgentName.SECURITY, AgentName.STYLE, AgentName.COMPLEXITY],
            model="gpt-4o",
        )
        assert estimate_three.estimated_tokens > estimate_one.estimated_tokens
        assert estimate_three.estimated_cost_usd > estimate_one.estimated_cost_usd
@@ -0,0 +1,222 @@
 """Tests for database models."""
 from uuid import uuid4
 from arbiter.db.models import (
    Base,
    ConflictModel,
    DeliberationStepModel,
    FindingModel,
    PolicyModel,
    ReviewModel,
 )
 from arbiter.deliberation.conflicts import ConflictNature
 from arbiter.deliberation.coordinator import StepType
 from arbiter.models.enums import AgentName, Severity, Verdict
 class TestReviewModel:
    def test_review_model_creation(self) -> None:
        review = ReviewModel(
            id=str(uuid4()),
            repository="owner/repo",
            pr_number=42,
            pr_title="Test PR",
            base_sha="abc1234567890123456789012345678901234567",
            head_sha="def1234567890123456789012345678901234567",
            author="testuser",
            is_draft=False,
            status="pending",
        )
        assert review.repository == "owner/repo"
        assert review.pr_number == 42
        assert review.status == "pending"
        assert review.is_draft is False
    def test_review_model_with_verdict(self) -> None:
        review = ReviewModel(
            id=str(uuid4()),
            repository="owner/repo",
            pr_number=1,
            base_sha="a" * 40,
            head_sha="b" * 40,
            status="completed",
            verdict=Verdict.COMMENT,
            verdict_confidence=0.75,
            verdict_reasoning="Found some issues",
        )
        assert review.verdict == Verdict.COMMENT
        assert review.verdict_confidence == 0.75
    def test_review_model_cost_tracking(self) -> None:
        review = ReviewModel(
            id=str(uuid4()),
            repository="owner/repo",
            pr_number=1,
            base_sha="a" * 40,
            head_sha="b" * 40,
            total_tokens=1500,
            total_cost_usd=0.015,
            tokens_by_agent={"security": 500, "style": 500, "complexity": 500},
            cost_by_agent={"security": 0.005, "style": 0.005, "complexity": 0.005},
        )
        assert review.total_tokens == 1500
        assert review.total_cost_usd == 0.015
        assert review.tokens_by_agent["security"] == 500
 class TestFindingModel:
    def test_finding_model_creation(self) -> None:
        finding = FindingModel(
            id=str(uuid4()),
            review_id=str(uuid4()),
            agent=AgentName.SECURITY,
            file="src/auth.py",
            line_start=10,
            line_end=15,
            severity=Severity.HIGH,
            confidence=0.9,
            title="SQL Injection",
            description="User input concatenated in SQL",
            reasoning="String concatenation allows injection",
            suggestion="Use parameterized queries",
            references=["https://owasp.org"],
            prompt_version="security-v1.0",
        )
        assert finding.agent == AgentName.SECURITY
        assert finding.severity == Severity.HIGH
        assert finding.confidence == 0.9
        assert finding.line_start == 10
        assert finding.line_end == 15
 class TestConflictModel:
    def test_conflict_model_creation(self) -> None:
        conflict = ConflictModel(
            id=str(uuid4()),
            review_id=str(uuid4()),
            finding_ids=["finding-1", "finding-2"],
            nature=ConflictNature.TRADE_OFF,
            description="Security vs simplicity trade-off",
            severity_weight=0.7,
        )
        assert conflict.nature == ConflictNature.TRADE_OFF
        assert len(conflict.finding_ids) == 2
        assert conflict.severity_weight == 0.7
    def test_conflict_model_with_resolution(self) -> None:
        conflict = ConflictModel(
            id=str(uuid4()),
            review_id=str(uuid4()),
            finding_ids=["finding-1", "finding-2"],
            nature=ConflictNature.CONTRADICTORY,
            description="Opposing recommendations",
            severity_weight=0.8,
            resolution="Security takes precedence",
            winning_finding_id="finding-1",
        )
        assert conflict.resolution is not None
        assert conflict.winning_finding_id == "finding-1"
 class TestDeliberationStepModel:
    def test_deliberation_step_creation(self) -> None:
        step = DeliberationStepModel(
            id=str(uuid4()),
            review_id=str(uuid4()),
            step_type=StepType.MERGE,
            description="Merged 5 findings",
            details={"groups": 3, "unique": 5},
            sequence=0,
        )
        assert step.step_type == StepType.MERGE
        assert step.sequence == 0
        assert step.details["groups"] == 3
    def test_all_step_types(self) -> None:
        review_id = str(uuid4())
        steps = [
            DeliberationStepModel(
                id=str(uuid4()),
                review_id=review_id,
                step_type=StepType.MERGE,
                description="Merge step",
                sequence=0,
            ),
            DeliberationStepModel(
                id=str(uuid4()),
                review_id=review_id,
                step_type=StepType.CONFLICT_DETECTION,
                description="Conflict detection step",
                sequence=1,
            ),
            DeliberationStepModel(
                id=str(uuid4()),
                review_id=review_id,
                step_type=StepType.SYNTHESIS,
                description="Synthesis step",
                sequence=2,
            ),
            DeliberationStepModel(
                id=str(uuid4()),
                review_id=review_id,
                step_type=StepType.VERDICT,
                description="Verdict step",
                sequence=3,
            ),
        ]
        assert len(steps) == 4
        assert steps[0].step_type == StepType.MERGE
        assert steps[3].step_type == StepType.VERDICT
 class TestPolicyModel:
    def test_policy_model_creation(self) -> None:
        policy = PolicyModel(
            id=str(uuid4()),
            name="default",
            organization="test-org",
            description="Default policy",
            is_active=True,
        )
        assert policy.name == "default"
        assert policy.organization == "test-org"
        assert policy.is_active is True
    def test_policy_model_with_config(self) -> None:
        policy = PolicyModel(
            id=str(uuid4()),
            name="strict",
            agents_config={
                "security": {"enabled": True, "model": "gpt-4o"},
                "style": {"enabled": True},
                "complexity": {"enabled": False},
            },
            cost_controls={
                "max_tokens": 50000,
                "max_cost_usd": 0.50,
            },
            verdict_thresholds={
                "critical_threshold": 1,
                "high_threshold": 3,
            },
        )
        assert policy.agents_config["security"]["model"] == "gpt-4o"
        assert policy.cost_controls["max_tokens"] == 50000
 class TestBase:
    def test_base_is_declarative_base(self) -> None:
        assert hasattr(Base, "metadata")
        assert hasattr(Base, "registry")
@@ -0,0 +1,959 @@
 """Tests for the worker module."""
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
 from unittest.mock import patch
 from uuid import uuid4
 import pytest
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from arbiter.db.models import (
    ConflictModel,
    DeliberationStepModel,
    FindingModel,
    ReviewModel,
 )
 from arbiter.integrations import ARBITER_MARKER
 from arbiter.integrations.base import Comment, CommitStatus, Platform
 from arbiter.models.enums import AgentName, Severity, Verdict
 from arbiter.worker.queue import JobPriority, cancel_job, generate_job_id, get_job_status
 from arbiter.worker.tasks import (
    _post_or_update_comment,
    _verdict_to_status,
    detect_platform,
    get_platform_client,
    process_followup,
    process_review,
 )
 from tests.conftest import MockPlatformClient
 class TestJobQueue:
    def test_generate_job_id_deterministic(self) -> None:
        id1 = generate_job_id("owner/repo", 42, "abc123")
        id2 = generate_job_id("owner/repo", 42, "abc123")
        assert id1 == id2
    def test_job_id_unique(self) -> None:
        id1 = generate_job_id("owner/repo", 42, "abc123")
        id2 = generate_job_id("owner/repo", 42, "def456")  # Different SHA
        id3 = generate_job_id("owner/repo", 43, "abc123")  # Different PR
        id4 = generate_job_id("other/repo", 42, "abc123")  # Different repo
        assert len({id1, id2, id3, id4}) == 4  # All unique
    def test_generate_job_id_format(self) -> None:
        job_id = generate_job_id("owner/repo", 42, "abc123")
        assert len(job_id) == 16
        assert all(c in "0123456789abcdef" for c in job_id)
    def test_job_priority_ordering(self) -> None:
        assert JobPriority.HIGH < JobPriority.NORMAL < JobPriority.LOW
        assert int(JobPriority.HIGH) == 1
        assert int(JobPriority.NORMAL) == 2
        assert int(JobPriority.LOW) == 3
 class TestWorkerSettings:
    def test_worker_settings_has_functions(self) -> None:
        from arbiter.worker.settings import WorkerSettings
        assert WorkerSettings.functions is not None
        assert len(WorkerSettings.functions) > 0
    def test_worker_settings_has_cron_jobs(self) -> None:
        from arbiter.worker.settings import WorkerSettings
        assert WorkerSettings.cron_jobs is not None
    def test_worker_settings_lifecycle_hooks(self) -> None:
        from arbiter.worker.settings import WorkerSettings
        assert WorkerSettings.on_startup is not None
        assert WorkerSettings.on_shutdown is not None
 class TestReviewTask:
    @pytest.fixture
    def mock_context(self) -> dict[str, Any]:
        return {
            "settings": None,
            "redis": None,
        }
    async def test_review_task_requires_diff(self, mock_context: dict[str, Any]) -> None:  # noqa: ARG002
        from arbiter.worker.tasks import process_review
        # Note: This would need database fixtures to run fully
        # For now, we just verify the function signature
        assert callable(process_review)
 class MockRedisForQueue:
    """Mock Redis with enqueue_job support."""
    def __init__(self) -> None:
        self._data: dict[str, Any] = {}
        self._jobs: list[dict[str, Any]] = []
    async def get(self, key: str) -> str | None:
        return self._data.get(key)
    async def set(self, key: str, value: str, ex: int | None = None) -> bool:  # noqa: ARG002
        self._data[key] = value
        return True
    async def delete(self, key: str) -> int:
        if key in self._data:
            del self._data[key]
            return 1
        return 0
    async def enqueue_job(self, func_name: str, **kwargs: Any) -> Any:
        job = {"func": func_name, "kwargs": kwargs}
        self._jobs.append(job)
        return type("Job", (), {"job_id": kwargs.get("_job_id", "test-id")})()
 class TestEnqueueReview:
    @pytest.fixture
    def mock_redis_pool(self, monkeypatch: pytest.MonkeyPatch) -> MockRedisForQueue:
        mock = MockRedisForQueue()
        async def get_pool() -> MockRedisForQueue:
            return mock
        monkeypatch.setattr("arbiter.worker.queue.get_redis_pool", get_pool)
        return mock
    async def test_enqueue_review_creates_job(self, mock_redis_pool: MockRedisForQueue) -> None:
        from arbiter.worker.queue import enqueue_review
        job_id = await enqueue_review(
            repository="owner/repo",
            pr_number=42,
            base_sha="abc123",
            head_sha="def456",
            pr_title="Test PR",
            author="testuser",
            is_draft=False,
        )
        assert job_id is not None
        assert len(mock_redis_pool._jobs) == 1
        assert mock_redis_pool._jobs[0]["func"] == "process_review"
    async def test_enqueue_review_deduplication(
        self,
        mock_redis_pool: MockRedisForQueue,  # noqa: ARG002
    ) -> None:
        from arbiter.worker.queue import enqueue_review
        # First call should succeed
        job_id1 = await enqueue_review(
            repository="owner/repo",
            pr_number=42,
            base_sha="abc123",
            head_sha="def456",
        )
        assert job_id1 is not None
        # Second call with same params should be deduplicated
        job_id2 = await enqueue_review(
            repository="owner/repo",
            pr_number=42,
            base_sha="abc123",
            head_sha="def456",
        )
        assert job_id2 is None
    async def test_enqueue_review_draft_lower_priority(
        self, mock_redis_pool: MockRedisForQueue
    ) -> None:
        from arbiter.worker.queue import enqueue_review
        await enqueue_review(
            repository="owner/repo",
            pr_number=42,
            base_sha="abc123",
            head_sha="def456",
            is_draft=True,
        )
        assert len(mock_redis_pool._jobs) == 1
        job = mock_redis_pool._jobs[0]
        # Draft PRs should be in the low priority queue
        assert "arbiter:queue:3" in job["kwargs"]["_queue_name"]
 class TestJobStatusAndCancel:
    @pytest.fixture
    def mock_redis_pool_with_jobs(self, monkeypatch: pytest.MonkeyPatch) -> MockRedisForQueue:
        mock = MockRedisForQueue()
        mock._data["arbiter:job:test-job-id"] = "pending"
        async def get_pool() -> MockRedisForQueue:
            return mock
        monkeypatch.setattr("arbiter.worker.queue.get_redis_pool", get_pool)
        return mock
    async def test_get_job_status_found(
        self,
        mock_redis_pool_with_jobs: MockRedisForQueue,  # noqa: ARG002
    ) -> None:
        status = await get_job_status("test-job-id")
        assert status is not None
        assert status["job_id"] == "test-job-id"
        assert status["status"] == "pending"
    async def test_get_job_status_not_found(
        self,
        mock_redis_pool_with_jobs: MockRedisForQueue,  # noqa: ARG002
    ) -> None:
        status = await get_job_status("nonexistent")
        assert status is None
    async def test_cancel_job_success(
        self,
        mock_redis_pool_with_jobs: MockRedisForQueue,  # noqa: ARG002
    ) -> None:
        result = await cancel_job("test-job-id")
        assert result is True
    async def test_cancel_job_not_found(
        self,
        mock_redis_pool_with_jobs: MockRedisForQueue,  # noqa: ARG002
    ) -> None:
        result = await cancel_job("nonexistent")
        assert result is False
 class TestWorkerStartupShutdown:
    async def test_startup_hook(self) -> None:
        from unittest.mock import AsyncMock, patch
        from arbiter.worker.settings import startup
        # Mock init_db and get_settings
        with (
            patch("arbiter.worker.settings.init_db", new_callable=AsyncMock) as mock_init,
            patch("arbiter.worker.settings.get_settings") as mock_settings,
        ):
            mock_settings.return_value = "mock_settings"
            ctx: dict[str, Any] = {}
            await startup(ctx)
            mock_init.assert_called_once()
            assert ctx["settings"] == "mock_settings"
    async def test_shutdown_hook(self) -> None:
        from unittest.mock import AsyncMock, patch
        from arbiter.worker.settings import shutdown
        with patch("arbiter.worker.settings.close_db", new_callable=AsyncMock) as mock_close:
            ctx: dict[str, Any] = {}
            await shutdown(ctx)
            mock_close.assert_called_once()
    async def test_health_check(self) -> None:
        from arbiter.worker.settings import health_check
        ctx: dict[str, Any] = {}
        result = await health_check(ctx)
        assert result == "healthy"
    def test_worker_settings_redis_settings(self) -> None:
        from arbiter.worker.settings import WorkerSettings
        redis_settings = WorkerSettings.redis_settings()
        assert redis_settings is not None
    def test_worker_settings_get_functions(self) -> None:
        from arbiter.worker.settings import WorkerSettings
        functions = WorkerSettings._get_functions()
        assert len(functions) == 2
        # Verify the functions are the expected ones
        func_names = [f.__name__ for f in functions]
        assert "process_review" in func_names
        assert "process_followup" in func_names
 class TestDetectPlatform:
    def test_detect_platform_from_webhook_github(self) -> None:
        platform = detect_platform("owner/repo", "github")
        assert platform == Platform.GITHUB
    def test_detect_platform_from_webhook_gitlab(self) -> None:
        platform = detect_platform("owner/repo", "gitlab")
        assert platform == Platform.GITLAB
    def test_detect_platform_case_insensitive(self) -> None:
        assert detect_platform("owner/repo", "GITHUB") == Platform.GITHUB
        assert detect_platform("owner/repo", "GitLab") == Platform.GITLAB
    def test_detect_platform_defaults_to_github(self) -> None:
        platform = detect_platform("owner/repo")
        assert platform == Platform.GITHUB
 class TestGetPlatformClient:
    def test_get_platform_client_github_no_token(self, mock_settings_no_github: Any) -> None:
        client = get_platform_client(Platform.GITHUB, mock_settings_no_github)
        assert client is None
    def test_get_platform_client_gitlab_no_token(self, mock_settings_no_github: Any) -> None:
        client = get_platform_client(Platform.GITLAB, mock_settings_no_github)
        assert client is None
    def test_github_client_with_token(self, mock_settings: Any) -> None:
        from arbiter.integrations import GitHubClient
        client = get_platform_client(Platform.GITHUB, mock_settings)
        assert client is not None
        assert isinstance(client, GitHubClient)
    def test_gitlab_client_with_token(self, mock_settings: Any) -> None:
        from arbiter.integrations import GitLabClient
        client = get_platform_client(Platform.GITLAB, mock_settings)
        assert client is not None
        assert isinstance(client, GitLabClient)
 class TestVerdictToStatus:
    def test_approve_returns_success(self) -> None:
        assert _verdict_to_status(Verdict.APPROVE) == CommitStatus.SUCCESS
    def test_request_changes_returns_failure(self) -> None:
        assert _verdict_to_status(Verdict.REQUEST_CHANGES) == CommitStatus.FAILURE
    def test_comment_returns_success(self) -> None:
        assert _verdict_to_status(Verdict.COMMENT) == CommitStatus.SUCCESS
 class TestPostOrUpdateComment:
    async def test_post_new_comment(self, mock_platform_client: MockPlatformClient) -> None:
        body = f"Test comment {ARBITER_MARKER}"
        url = await _post_or_update_comment(mock_platform_client, "owner/repo", 42, body)
        assert url is not None
        assert "owner/repo" in url
        assert len(mock_platform_client._posted_comments) == 1
        assert mock_platform_client._posted_comments[0]["body"] == body
    async def test_update_existing_comment(self, mock_platform_client: MockPlatformClient) -> None:
        # Add an existing Arbiter comment
        mock_platform_client._comments = [
            Comment(
                id="existing-123",
                body=f"Old review {ARBITER_MARKER}",
                author="arbiter-bot",
                url="https://github.com/owner/repo/pull/42#comment-existing-123",
                created_at=datetime.now(UTC),
            )
        ]
        body = f"Updated review {ARBITER_MARKER}"
        url = await _post_or_update_comment(mock_platform_client, "owner/repo", 42, body)
        assert url is not None
        assert len(mock_platform_client._posted_comments) == 1
        # Should be an update, not a new post
        assert mock_platform_client._posted_comments[0].get("comment_id") == "existing-123"
    async def test_fallback_on_fetch_failure(
        self, mock_platform_client: MockPlatformClient
    ) -> None:
        mock_platform_client._fail_on.add("get_comments")
        body = f"Test comment {ARBITER_MARKER}"
        url = await _post_or_update_comment(mock_platform_client, "owner/repo", 42, body)
        # Should still post a new comment
        assert url is not None
        assert len(mock_platform_client._posted_comments) == 1
        # Should be a new post since fetching failed
        assert mock_platform_client._posted_comments[0].get("comment_id") is None
    async def test_returns_none_on_post_failure(
        self, mock_platform_client: MockPlatformClient
    ) -> None:
        mock_platform_client._fail_on.add("post_comment")
        body = f"Test comment {ARBITER_MARKER}"
        url = await _post_or_update_comment(mock_platform_client, "owner/repo", 42, body)
        assert url is None
 class TestProcessReview:
    @pytest.fixture
    def mock_deliberation_result(self) -> Any:
        from arbiter.deliberation import DeliberationResult, DeliberationStep
        from arbiter.deliberation.conflicts import Conflict, ConflictNature
        from arbiter.deliberation.coordinator import StepType
        from arbiter.models import Finding
        finding = Finding(
            id=str(uuid4()),
            agent=AgentName.SECURITY,
            file="src/auth.py",
            line_start=10,
            line_end=15,
            severity=Severity.HIGH,
            confidence=0.9,
            title="SQL Injection",
            description="User input concatenated into SQL",
            reasoning="Allows SQL injection attacks",
            prompt_version="security-v1.0",
        )
        return DeliberationResult(
            verdict=Verdict.COMMENT,
            verdict_confidence=0.75,
            verdict_reasoning="Found security issues",
            findings=[finding],
            conflicts=[
                Conflict(
                    id="conflict-1",
                    finding_ids=["f1", "f2"],
                    nature=ConflictNature.TRADE_OFF,
                    description="Trade-off detected",
                    severity_weight=0.5,
                )
            ],
            steps=[
                DeliberationStep(
                    step_type=StepType.MERGE,
                    timestamp=datetime.now(UTC),
                    description="Merged findings",
                    details={"count": 1},
                )
            ],
            tokens_used=500,
            cost_usd=0.005,
        )
    @pytest.fixture
    def mock_review_result(self) -> Any:
        from arbiter.models import ReviewResult
        return ReviewResult(
            agent_name=AgentName.SECURITY,
            findings=[],
            duration_ms=1000,
            tokens_used=500,
            cost_usd=0.005,
        )
    async def test_process_review_creates_review_record(
        self,
        db_session: AsyncSession,
        mock_deliberation_result: Any,
        mock_review_result: Any,
    ) -> None:
        # Mock the review pipeline
        with (
            patch(
                "arbiter.worker.tasks._run_review_pipeline",
                return_value=([mock_review_result], mock_deliberation_result),
            ),
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=None,
            ),
        ):
            review_id = await process_review(
                {},
                repository="owner/repo",
                pr_number=42,
                base_sha="abc123",
                head_sha="def456",
                pr_title="Test PR",
                author="testuser",
                diff_content="mock diff",
            )
            # Verify review was created
            result = await db_session.execute(
                select(ReviewModel).where(ReviewModel.id == review_id)
            )
            review = result.scalar_one()
            assert review.repository == "owner/repo"
            assert review.pr_number == 42
            assert review.status == "completed"
            assert review.verdict == Verdict.COMMENT
    async def test_process_review_stores_findings(
        self,
        db_session: AsyncSession,
        mock_deliberation_result: Any,
        mock_review_result: Any,
    ) -> None:
        with (
            patch(
                "arbiter.worker.tasks._run_review_pipeline",
                return_value=([mock_review_result], mock_deliberation_result),
            ),
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=None,
            ),
        ):
            review_id = await process_review(
                {},
                repository="owner/repo",
                pr_number=42,
                base_sha="abc123",
                head_sha="def456",
                diff_content="mock diff",
            )
            # Verify findings were stored
            result = await db_session.execute(
                select(FindingModel).where(FindingModel.review_id == review_id)
            )
            findings = result.scalars().all()
            assert len(findings) == 1
            assert findings[0].title == "SQL Injection"
            assert findings[0].severity == Severity.HIGH
    async def test_process_review_stores_conflicts(
        self,
        db_session: AsyncSession,
        mock_deliberation_result: Any,
        mock_review_result: Any,
    ) -> None:
        with (
            patch(
                "arbiter.worker.tasks._run_review_pipeline",
                return_value=([mock_review_result], mock_deliberation_result),
            ),
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=None,
            ),
        ):
            review_id = await process_review(
                {},
                repository="owner/repo",
                pr_number=42,
                base_sha="abc123",
                head_sha="def456",
                diff_content="mock diff",
            )
            # Verify conflicts were stored
            result = await db_session.execute(
                select(ConflictModel).where(ConflictModel.review_id == review_id)
            )
            conflicts = result.scalars().all()
            assert len(conflicts) == 1
            assert conflicts[0].description == "Trade-off detected"
    async def test_process_review_stores_deliberation_steps(
        self,
        db_session: AsyncSession,
        mock_deliberation_result: Any,
        mock_review_result: Any,
    ) -> None:
        with (
            patch(
                "arbiter.worker.tasks._run_review_pipeline",
                return_value=([mock_review_result], mock_deliberation_result),
            ),
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=None,
            ),
        ):
            review_id = await process_review(
                {},
                repository="owner/repo",
                pr_number=42,
                base_sha="abc123",
                head_sha="def456",
                diff_content="mock diff",
            )
            # Verify deliberation steps were stored
            result = await db_session.execute(
                select(DeliberationStepModel).where(DeliberationStepModel.review_id == review_id)
            )
            steps = result.scalars().all()
            assert len(steps) == 1
            assert steps[0].description == "Merged findings"
    async def test_process_review_handles_errors(
        self,
        db_session: AsyncSession,
    ) -> None:
        with (
            patch(
                "arbiter.worker.tasks._run_review_pipeline",
                side_effect=ValueError("Test error"),
            ),
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=None,
            ),
        ):
            with pytest.raises(ValueError, match="Test error"):
                await process_review(
                    {},
                    repository="owner/repo",
                    pr_number=42,
                    base_sha="abc123",
                    head_sha="def456",
                    diff_content="mock diff",
                )
            # Verify review was marked as failed
            result = await db_session.execute(
                select(ReviewModel).where(ReviewModel.repository == "owner/repo")
            )
            review = result.scalar_one()
            assert review.status == "failed"
            assert "Test error" in (review.error_message or "")
    async def test_process_review_posts_comment(
        self,
        db_session: AsyncSession,
        mock_platform_client: MockPlatformClient,
        mock_deliberation_result: Any,
        mock_review_result: Any,
        mock_settings: Any,
    ) -> None:
        with (
            patch(
                "arbiter.worker.tasks._run_review_pipeline",
                return_value=([mock_review_result], mock_deliberation_result),
            ),
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=mock_platform_client,
            ),
            patch(
                "arbiter.worker.tasks.get_settings",
                return_value=mock_settings,
            ),
        ):
            await process_review(
                {},
                repository="owner/repo",
                pr_number=42,
                base_sha="abc123",
                head_sha="def456",
                diff_content="mock diff",
                platform="github",
            )
            # Verify comment was posted
            assert len(mock_platform_client._posted_comments) == 1
            assert ARBITER_MARKER in mock_platform_client._posted_comments[0]["body"]
    async def test_process_review_updates_status(
        self,
        db_session: AsyncSession,
        mock_platform_client: MockPlatformClient,
        mock_deliberation_result: Any,
        mock_review_result: Any,
        mock_settings: Any,
    ) -> None:
        with (
            patch(
                "arbiter.worker.tasks._run_review_pipeline",
                return_value=([mock_review_result], mock_deliberation_result),
            ),
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=mock_platform_client,
            ),
            patch(
                "arbiter.worker.tasks.get_settings",
                return_value=mock_settings,
            ),
        ):
            await process_review(
                {},
                repository="owner/repo",
                pr_number=42,
                base_sha="abc123",
                head_sha="def456",
                diff_content="mock diff",
                platform="github",
            )
            # Verify status was updated (pending then final)
            assert len(mock_platform_client._status_updates) >= 1
            # Last update should be the final status
            final_update = mock_platform_client._status_updates[-1]
            assert final_update["status"] == CommitStatus.SUCCESS
    async def test_process_review_requires_diff(
        self,
        db_session: AsyncSession,
    ) -> None:
        with (
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=None,
            ),
            pytest.raises(ValueError, match="diff_content not provided"),
        ):
            await process_review(
                {},
                repository="owner/repo",
                pr_number=42,
                base_sha="abc123",
                head_sha="def456",
                # No diff_content
            )
 class TestProcessFollowup:
    async def test_process_followup_no_review(
        self,
        db_session: AsyncSession,
        mock_settings: Any,
    ) -> None:
        with (
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_settings",
                return_value=mock_settings,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=None,
            ),
        ):
            result = await process_followup(
                {},
                repository="owner/repo",
                pr_number=999,  # Non-existent PR
                comment_id="comment-123",
                comment_body="Why is this a security issue?",
                author="testuser",
                platform="github",
            )
            assert result is None
    async def test_process_followup_disabled(
        self,
        db_session: AsyncSession,
        completed_review_fixture: ReviewModel,  # noqa: ARG002
    ) -> None:
        class DisabledSettings:
            followup_enabled = False
        with (
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_settings",
                return_value=DisabledSettings(),
            ),
        ):
            result = await process_followup(
                {},
                repository="owner/repo",
                pr_number=42,
                comment_id="comment-123",
                comment_body="Why is this a security issue?",
                author="testuser",
                platform="github",
            )
            assert result is None
    async def test_process_followup_not_a_question(
        self,
        db_session: AsyncSession,
        completed_review_fixture: ReviewModel,  # noqa: ARG002
        mock_settings: Any,
    ) -> None:
        with (
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_settings",
                return_value=mock_settings,
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=None,
            ),
        ):
            result = await process_followup(
                {},
                repository="owner/repo",
                pr_number=42,
                comment_id="comment-123",
                comment_body="This looks good to me.",  # Not a question
                author="testuser",
                platform="github",
            )
            assert result is None
    async def test_process_followup_low_confidence(
        self,
        db_session: AsyncSession,
        completed_review_fixture: ReviewModel,  # noqa: ARG002
    ) -> None:
        class HighThresholdSettings:
            followup_enabled = True
            followup_confidence_threshold = 0.99  # Very high threshold
            llm_timeout = 60
            llm_max_retries = 3
            templates_dir = Path("templates")
            post_comments = False
        with (
            patch(
                "arbiter.worker.tasks.async_session_factory",
                return_value=lambda: db_session,
            ),
            patch(
                "arbiter.worker.tasks.get_settings",
                return_value=HighThresholdSettings(),
            ),
            patch(
                "arbiter.worker.tasks.get_platform_client",
                return_value=None,
            ),
        ):
            result = await process_followup(
                {},
                repository="owner/repo",
                pr_number=42,
                comment_id="comment-123",
                comment_body="What does this mean?",  # A question but low confidence
                author="testuser",
                platform="github",
            )
            assert result is None
 class TestEnqueueFollowup:
    @pytest.fixture
    def mock_redis_pool_followup(self, monkeypatch: pytest.MonkeyPatch) -> "MockRedisForQueue":
        mock = MockRedisForQueue()
        async def get_pool() -> MockRedisForQueue:
            return mock
        monkeypatch.setattr("arbiter.worker.queue.get_redis_pool", get_pool)
        return mock
    async def test_enqueue_followup_creates_job(
        self, mock_redis_pool_followup: "MockRedisForQueue"
    ) -> None:
        from arbiter.worker.queue import enqueue_followup
        job_id = await enqueue_followup(
            repository="owner/repo",
            pr_number=42,
            comment_id="comment-123",
            comment_body="Why is this a security issue?",
            author="testuser",
            platform="github",
        )
        assert job_id is not None
        assert len(mock_redis_pool_followup._jobs) == 1
        assert mock_redis_pool_followup._jobs[0]["func"] == "process_followup"
    async def test_enqueue_followup_deduplication(
        self, mock_redis_pool_followup: "MockRedisForQueue"
    ) -> None:
        from arbiter.worker.queue import enqueue_followup, generate_followup_job_id
        # Pre-set the job as existing
        job_id = generate_followup_job_id("owner/repo", 42, "comment-123")
        mock_redis_pool_followup._data[f"arbiter:followup:{job_id}"] = "pending"
        result = await enqueue_followup(
            repository="owner/repo",
            pr_number=42,
            comment_id="comment-123",
            comment_body="Why is this a security issue?",
            author="testuser",
            platform="github",
        )
        assert result is None
        # No new job should be added
        assert len(mock_redis_pool_followup._jobs) == 0
 class TestGenerateFollowupJobId:
    def test_followup_job_id_stable(self) -> None:
        from arbiter.worker.queue import generate_followup_job_id
        id1 = generate_followup_job_id("owner/repo", 42, "comment-123")
        id2 = generate_followup_job_id("owner/repo", 42, "comment-123")
        assert id1 == id2
    def test_generate_followup_job_id_unique(self) -> None:
        from arbiter.worker.queue import generate_followup_job_id
        id1 = generate_followup_job_id("owner/repo", 42, "comment-123")
        id2 = generate_followup_job_id("owner/repo", 42, "comment-456")
        id3 = generate_followup_job_id("owner/repo", 43, "comment-123")
        id4 = generate_followup_job_id("other/repo", 42, "comment-123")
        assert len({id1, id2, id3, id4}) == 4