test(validators): add validator tests
Add comprehensive tests for metric validators, constraint validators, and composite validators covering pass/fail cases and error handling.
This commit is contained in:
209
tests/test_validators/test_metric.py
Normal file
209
tests/test_validators/test_metric.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""Tests for metric-based validators."""
|
||||
|
||||
import pytest
|
||||
|
||||
from veritext.core.exceptions import InvalidThresholdError, ValidationError
|
||||
from veritext.core.types import ValidationContext
|
||||
from veritext.validators import bleu, lexical, rouge
|
||||
from veritext.validators.metric import BleuValidator, LexicalValidator, RougeValidator
|
||||
|
||||
|
||||
class TestBleuValidator:
|
||||
"""Tests for BleuValidator."""
|
||||
|
||||
def test_bleu_validator_passes_when_score_meets_threshold(self) -> None:
|
||||
"""Test that validator passes when BLEU score meets threshold."""
|
||||
validator = BleuValidator(min_score=0.5, variant=4)
|
||||
context = ValidationContext(reference="the cat sat on the mat")
|
||||
result = validator.check("the cat sat on the mat", context)
|
||||
|
||||
assert result.passed is True
|
||||
assert result.name == "bleu-4"
|
||||
assert result.actual == 1.0 # Identical text
|
||||
assert result.threshold == 0.5
|
||||
|
||||
def test_bleu_validator_fails_when_score_below_threshold(self) -> None:
|
||||
"""Test that validator fails when BLEU score is below threshold."""
|
||||
validator = BleuValidator(min_score=0.9, variant=4)
|
||||
context = ValidationContext(reference="the cat sat on the mat")
|
||||
result = validator.check("a dog ran through the park", context)
|
||||
|
||||
assert result.passed is False
|
||||
assert result.name == "bleu-4"
|
||||
assert result.actual < 0.9
|
||||
assert "below minimum" in result.message
|
||||
|
||||
def test_bleu_validator_variant_selection(self) -> None:
|
||||
"""Test different BLEU variants."""
|
||||
context = ValidationContext(reference="the quick brown fox jumps")
|
||||
|
||||
for variant in (1, 2, 3, 4):
|
||||
validator = BleuValidator(min_score=0.0, variant=variant) # type: ignore[arg-type]
|
||||
result = validator.check("the quick brown fox", context)
|
||||
assert result.name == f"bleu-{variant}"
|
||||
|
||||
def test_bleu_validator_raises_on_missing_reference(self) -> None:
|
||||
"""Test that validator raises when reference is missing."""
|
||||
validator = BleuValidator(min_score=0.5)
|
||||
context = ValidationContext()
|
||||
|
||||
with pytest.raises(ValidationError, match="requires reference text"):
|
||||
validator.check("some text", context)
|
||||
|
||||
def test_bleu_validator_raises_on_invalid_min_score(self) -> None:
|
||||
"""Test that invalid min_score raises error."""
|
||||
with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
|
||||
BleuValidator(min_score=1.5)
|
||||
|
||||
with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
|
||||
BleuValidator(min_score=-0.1)
|
||||
|
||||
def test_bleu_validator_raises_on_invalid_variant(self) -> None:
|
||||
"""Test that invalid variant raises error."""
|
||||
with pytest.raises(InvalidThresholdError, match="variant must be"):
|
||||
BleuValidator(min_score=0.5, variant=5) # type: ignore[arg-type]
|
||||
|
||||
def test_bleu_factory_function(self) -> None:
|
||||
"""Test the bleu() factory function."""
|
||||
validator = bleu(min_score=0.6, variant=2)
|
||||
assert isinstance(validator, BleuValidator)
|
||||
assert validator.name == "bleu-2"
|
||||
|
||||
|
||||
class TestRougeValidator:
|
||||
"""Tests for RougeValidator."""
|
||||
|
||||
def test_rouge_validator_passes_when_score_meets_threshold(self) -> None:
|
||||
"""Test that validator passes when ROUGE score meets threshold."""
|
||||
validator = RougeValidator(min_score=0.5, variant="l")
|
||||
context = ValidationContext(reference="the cat sat on the mat")
|
||||
result = validator.check("the cat sat on the mat", context)
|
||||
|
||||
assert result.passed is True
|
||||
assert result.name == "rouge-l"
|
||||
assert result.actual == 1.0 # Identical text
|
||||
assert result.threshold == 0.5
|
||||
|
||||
def test_rouge_validator_fails_when_score_below_threshold(self) -> None:
|
||||
"""Test that validator fails when ROUGE score is below threshold."""
|
||||
validator = RougeValidator(min_score=0.9, variant="l")
|
||||
context = ValidationContext(reference="the cat sat on the mat")
|
||||
result = validator.check("a dog ran through the park", context)
|
||||
|
||||
assert result.passed is False
|
||||
assert result.actual < 0.9
|
||||
assert "below minimum" in result.message
|
||||
|
||||
def test_rouge_validator_variant_selection(self) -> None:
|
||||
"""Test different ROUGE variants."""
|
||||
context = ValidationContext(reference="the quick brown fox jumps")
|
||||
|
||||
for variant in ("1", "2", "l"):
|
||||
validator = RougeValidator(min_score=0.0, variant=variant) # type: ignore[arg-type]
|
||||
result = validator.check("the quick brown fox", context)
|
||||
assert result.name == f"rouge-{variant}"
|
||||
|
||||
def test_rouge_validator_raises_on_missing_reference(self) -> None:
|
||||
"""Test that validator raises when reference is missing."""
|
||||
validator = RougeValidator(min_score=0.5)
|
||||
context = ValidationContext()
|
||||
|
||||
with pytest.raises(ValidationError, match="requires reference text"):
|
||||
validator.check("some text", context)
|
||||
|
||||
def test_rouge_validator_raises_on_invalid_min_score(self) -> None:
|
||||
"""Test that invalid min_score raises error."""
|
||||
with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
|
||||
RougeValidator(min_score=1.5)
|
||||
|
||||
def test_rouge_validator_raises_on_invalid_variant(self) -> None:
|
||||
"""Test that invalid variant raises error."""
|
||||
with pytest.raises(InvalidThresholdError, match="variant must be"):
|
||||
RougeValidator(min_score=0.5, variant="3") # type: ignore[arg-type]
|
||||
|
||||
def test_rouge_factory_function(self) -> None:
|
||||
"""Test the rouge() factory function."""
|
||||
validator = rouge(min_score=0.6, variant="2")
|
||||
assert isinstance(validator, RougeValidator)
|
||||
assert validator.name == "rouge-2"
|
||||
|
||||
|
||||
class TestLexicalValidator:
|
||||
"""Tests for LexicalValidator."""
|
||||
|
||||
def test_lexical_validator_passes_on_jaccard(self) -> None:
|
||||
"""Test that validator passes when Jaccard similarity meets threshold."""
|
||||
validator = LexicalValidator(min_jaccard=0.5)
|
||||
context = ValidationContext(reference="the cat sat on the mat")
|
||||
result = validator.check("the cat sat on the mat", context)
|
||||
|
||||
assert result.passed is True
|
||||
assert result.name == "lexical"
|
||||
assert result.actual["jaccard"] == 1.0
|
||||
|
||||
def test_lexical_validator_fails_on_jaccard(self) -> None:
|
||||
"""Test that validator fails when Jaccard is below threshold."""
|
||||
validator = LexicalValidator(min_jaccard=0.9)
|
||||
context = ValidationContext(reference="the cat sat on the mat")
|
||||
result = validator.check("a dog ran through the park", context)
|
||||
|
||||
assert result.passed is False
|
||||
assert "Jaccard" in result.message
|
||||
assert "below minimum" in result.message
|
||||
|
||||
def test_lexical_validator_passes_on_overlap(self) -> None:
|
||||
"""Test that validator passes when token overlap meets threshold."""
|
||||
validator = LexicalValidator(min_overlap=0.5)
|
||||
context = ValidationContext(reference="the cat sat on the mat")
|
||||
result = validator.check("the cat sat on the mat", context)
|
||||
|
||||
assert result.passed is True
|
||||
assert result.actual["token_overlap"] == 1.0
|
||||
|
||||
def test_lexical_validator_fails_on_overlap(self) -> None:
|
||||
"""Test that validator fails when overlap is below threshold."""
|
||||
validator = LexicalValidator(min_overlap=0.9)
|
||||
context = ValidationContext(reference="the cat sat on the mat")
|
||||
result = validator.check("a dog ran through", context)
|
||||
|
||||
assert result.passed is False
|
||||
assert "overlap" in result.message
|
||||
|
||||
def test_lexical_validator_with_both_thresholds(self) -> None:
|
||||
"""Test validator with both Jaccard and overlap thresholds."""
|
||||
validator = LexicalValidator(min_jaccard=0.3, min_overlap=0.5)
|
||||
context = ValidationContext(reference="the cat sat on the mat")
|
||||
result = validator.check("the cat sat", context)
|
||||
|
||||
# Should check both thresholds
|
||||
assert "min_jaccard" in result.threshold
|
||||
assert "min_overlap" in result.threshold
|
||||
|
||||
def test_lexical_validator_raises_when_no_threshold(self) -> None:
|
||||
"""Test that validator raises when no threshold is provided."""
|
||||
with pytest.raises(InvalidThresholdError, match="At least one"):
|
||||
LexicalValidator()
|
||||
|
||||
def test_lexical_validator_raises_on_invalid_jaccard(self) -> None:
|
||||
"""Test that invalid Jaccard threshold raises error."""
|
||||
with pytest.raises(InvalidThresholdError, match="min_jaccard"):
|
||||
LexicalValidator(min_jaccard=1.5)
|
||||
|
||||
def test_lexical_validator_raises_on_invalid_overlap(self) -> None:
|
||||
"""Test that invalid overlap threshold raises error."""
|
||||
with pytest.raises(InvalidThresholdError, match="min_overlap"):
|
||||
LexicalValidator(min_overlap=-0.1)
|
||||
|
||||
def test_lexical_validator_raises_on_missing_reference(self) -> None:
|
||||
"""Test that validator raises when reference is missing."""
|
||||
validator = LexicalValidator(min_jaccard=0.5)
|
||||
context = ValidationContext()
|
||||
|
||||
with pytest.raises(ValidationError, match="requires reference text"):
|
||||
validator.check("some text", context)
|
||||
|
||||
def test_lexical_factory_function(self) -> None:
|
||||
"""Test the lexical() factory function."""
|
||||
validator = lexical(min_jaccard=0.5, min_overlap=0.6)
|
||||
assert isinstance(validator, LexicalValidator)
|
||||
assert validator.name == "lexical"
|
||||
Reference in New Issue
Block a user