"""Tests for metric-based validators.""" import pytest from veritext.core.exceptions import InvalidThresholdError, ValidationError from veritext.core.types import ValidationContext from veritext.validators import bleu, lexical, rouge from veritext.validators.metric import BleuValidator, LexicalValidator, RougeValidator class TestBleuValidator: def test_bleu_passes_above_threshold(self) -> None: validator = BleuValidator(min_score=0.5, variant=4) context = ValidationContext(reference="the cat sat on the mat") result = validator.check("the cat sat on the mat", context) assert result.passed is True assert result.name == "bleu-4" assert result.actual == 1.0 # Identical text assert result.threshold == 0.5 def test_bleu_fails_below_threshold(self) -> None: validator = BleuValidator(min_score=0.9, variant=4) context = ValidationContext(reference="the cat sat on the mat") result = validator.check("a dog ran through the park", context) assert result.passed is False assert result.name == "bleu-4" assert result.actual < 0.9 assert "below minimum" in result.message def test_bleu_variant_selection(self) -> None: context = ValidationContext(reference="the quick brown fox jumps") for variant in (1, 2, 3, 4): validator = BleuValidator(min_score=0.0, variant=variant) # type: ignore[arg-type] result = validator.check("the quick brown fox", context) assert result.name == f"bleu-{variant}" def test_bleu_requires_reference(self) -> None: validator = BleuValidator(min_score=0.5) context = ValidationContext() with pytest.raises(ValidationError, match="requires reference text"): validator.check("some text", context) def test_bleu_rejects_invalid_score(self) -> None: with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"): BleuValidator(min_score=1.5) with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"): BleuValidator(min_score=-0.1) def test_bleu_rejects_invalid_variant(self) -> None: with pytest.raises(InvalidThresholdError, match="variant must be"): BleuValidator(min_score=0.5, variant=5) # type: ignore[arg-type] def test_bleu_factory(self) -> None: validator = bleu(min_score=0.6, variant=2) assert isinstance(validator, BleuValidator) assert validator.name == "bleu-2" class TestRougeValidator: def test_rouge_passes_above_threshold(self) -> None: validator = RougeValidator(min_score=0.5, variant="l") context = ValidationContext(reference="the cat sat on the mat") result = validator.check("the cat sat on the mat", context) assert result.passed is True assert result.name == "rouge-l" assert result.actual == 1.0 # Identical text assert result.threshold == 0.5 def test_rouge_fails_below_threshold(self) -> None: validator = RougeValidator(min_score=0.9, variant="l") context = ValidationContext(reference="the cat sat on the mat") result = validator.check("a dog ran through the park", context) assert result.passed is False assert result.actual < 0.9 assert "below minimum" in result.message def test_rouge_variant_selection(self) -> None: context = ValidationContext(reference="the quick brown fox jumps") for variant in ("1", "2", "l"): validator = RougeValidator(min_score=0.0, variant=variant) # type: ignore[arg-type] result = validator.check("the quick brown fox", context) assert result.name == f"rouge-{variant}" def test_rouge_requires_reference(self) -> None: validator = RougeValidator(min_score=0.5) context = ValidationContext() with pytest.raises(ValidationError, match="requires reference text"): validator.check("some text", context) def test_rouge_rejects_invalid_score(self) -> None: with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"): RougeValidator(min_score=1.5) def test_rouge_rejects_invalid_variant(self) -> None: with pytest.raises(InvalidThresholdError, match="variant must be"): RougeValidator(min_score=0.5, variant="3") # type: ignore[arg-type] def test_rouge_factory(self) -> None: validator = rouge(min_score=0.6, variant="2") assert isinstance(validator, RougeValidator) assert validator.name == "rouge-2" class TestLexicalValidator: def test_lexical_passes_jaccard(self) -> None: validator = LexicalValidator(min_jaccard=0.5) context = ValidationContext(reference="the cat sat on the mat") result = validator.check("the cat sat on the mat", context) assert result.passed is True assert result.name == "lexical" assert result.actual["jaccard"] == 1.0 def test_lexical_fails_jaccard(self) -> None: validator = LexicalValidator(min_jaccard=0.9) context = ValidationContext(reference="the cat sat on the mat") result = validator.check("a dog ran through the park", context) assert result.passed is False assert "Jaccard" in result.message assert "below minimum" in result.message def test_lexical_passes_overlap(self) -> None: validator = LexicalValidator(min_overlap=0.5) context = ValidationContext(reference="the cat sat on the mat") result = validator.check("the cat sat on the mat", context) assert result.passed is True assert result.actual["token_overlap"] == 1.0 def test_lexical_fails_overlap(self) -> None: validator = LexicalValidator(min_overlap=0.9) context = ValidationContext(reference="the cat sat on the mat") result = validator.check("a dog ran through", context) assert result.passed is False assert "overlap" in result.message def test_lexical_both_thresholds(self) -> None: validator = LexicalValidator(min_jaccard=0.3, min_overlap=0.5) context = ValidationContext(reference="the cat sat on the mat") result = validator.check("the cat sat", context) # Should check both thresholds assert "min_jaccard" in result.threshold assert "min_overlap" in result.threshold def test_lexical_needs_threshold(self) -> None: with pytest.raises(InvalidThresholdError, match="At least one"): LexicalValidator() def test_lexical_rejects_bad_jaccard(self) -> None: with pytest.raises(InvalidThresholdError, match="min_jaccard"): LexicalValidator(min_jaccard=1.5) def test_lexical_rejects_bad_overlap(self) -> None: with pytest.raises(InvalidThresholdError, match="min_overlap"): LexicalValidator(min_overlap=-0.1) def test_lexical_requires_reference(self) -> None: validator = LexicalValidator(min_jaccard=0.5) context = ValidationContext() with pytest.raises(ValidationError, match="requires reference text"): validator.check("some text", context) def test_lexical_factory(self) -> None: validator = lexical(min_jaccard=0.5, min_overlap=0.6) assert isinstance(validator, LexicalValidator) assert validator.name == "lexical" # SemanticValidator tests - conditionally run if sentence-transformers is installed class TestSemanticValidator: @staticmethod def _skip_if_no_transformers() -> None: pytest.importorskip("sentence_transformers") def test_semantic_passes_above_threshold(self) -> None: self._skip_if_no_transformers() from veritext.validators.metric import SemanticValidator validator = SemanticValidator(min_score=0.5) context = ValidationContext(reference="the cat sat on the mat") result = validator.check("the cat sat on the mat", context) assert result.passed is True assert result.name == "semantic" assert result.actual >= 0.99 # Identical text assert result.threshold == 0.5 def test_semantic_fails_below_threshold(self) -> None: self._skip_if_no_transformers() from veritext.validators.metric import SemanticValidator validator = SemanticValidator(min_score=0.99) context = ValidationContext(reference="the cat sat on the mat") result = validator.check( "quantum physics describes particle behaviour", context ) assert result.passed is False assert result.name == "semantic" assert result.actual < 0.99 assert "below minimum" in result.message def test_semantic_requires_reference(self) -> None: self._skip_if_no_transformers() from veritext.validators.metric import SemanticValidator validator = SemanticValidator(min_score=0.5) context = ValidationContext() with pytest.raises(ValidationError, match="requires reference text"): validator.check("some text", context) def test_semantic_rejects_invalid_score(self) -> None: with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"): from veritext.validators.metric import SemanticValidator SemanticValidator(min_score=1.5) with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"): from veritext.validators.metric import SemanticValidator SemanticValidator(min_score=-0.1) def test_semantic_factory(self) -> None: self._skip_if_no_transformers() from veritext.validators import semantic from veritext.validators.metric import SemanticValidator validator = semantic(min_score=0.6) assert isinstance(validator, SemanticValidator) assert validator.name == "semantic"