validate_text assertion helper

Primary API for text validation in pytest with keyword arguments
for BLEU, ROUGE, semantic similarity, length, readability, and
pattern matching. Includes detailed failure formatting.
This commit is contained in:
2025-04-06 15:13:34 +00:00
parent 7528c44dcc
commit 9f9a3da6cc
2 changed files with 215 additions and 0 deletions

View File

@@ -0,0 +1,135 @@
"""Assertion functions for text validation in pytest."""
from typing import TYPE_CHECKING
from veritext.core.types import ValidationContext, ValidationResult
from veritext.validators import all_of
if TYPE_CHECKING:
from veritext.validators.base import Check
def validate_text(
text: str,
*,
reference: str | list[str] | None = None,
min_bleu: float | None = None,
min_rouge: float | None = None,
min_semantic: float | None = None,
max_length: int | None = None,
min_length: int | None = None,
max_reading_grade: float | None = None,
must_contain: list[str] | None = None,
must_exclude: list[str] | None = None,
) -> None:
"""Assert text passes all specified validation criteria.
This is the primary assertion function for text validation in pytest.
It builds validators from keyword arguments and raises AssertionError
with detailed failure information if validation fails.
Args:
text: The text to validate.
reference: Reference text for comparison metrics (BLEU, ROUGE, semantic).
min_bleu: Minimum BLEU-4 score required (0.0 to 1.0).
min_rouge: Minimum ROUGE-L F-measure required (0.0 to 1.0).
min_semantic: Minimum semantic similarity required (0.0 to 1.0).
max_length: Maximum character count allowed.
min_length: Minimum character count required.
max_reading_grade: Maximum Flesch-Kincaid grade level.
must_contain: Patterns that must be present in the text.
must_exclude: Patterns that must not be present in the text.
Raises:
AssertionError: With detailed failure information if validation fails.
ValueError: If comparison metrics requested but reference not provided,
or if no validation criteria are specified.
Example:
>>> validate_text(
... "The quick brown fox jumps over the lazy dog.",
... min_length=10,
... max_length=100,
... max_reading_grade=8.0,
... )
"""
if any([min_bleu, min_rouge, min_semantic]) and reference is None:
raise ValueError(
"Reference text required for comparison metrics "
"(min_bleu, min_rouge, min_semantic)"
)
checks: list[Check] = []
if min_bleu is not None:
from veritext.validators import bleu
checks.append(bleu(min_score=min_bleu))
if min_rouge is not None:
from veritext.validators import rouge
checks.append(rouge(min_score=min_rouge))
if min_semantic is not None:
from veritext.validators import semantic
checks.append(semantic(min_score=min_semantic))
if max_length is not None or min_length is not None:
from veritext.validators import length
checks.append(length(min_chars=min_length, max_chars=max_length))
if max_reading_grade is not None:
from veritext.validators import readability
checks.append(readability(max_grade=max_reading_grade))
if must_contain is not None:
from veritext.validators import contains
checks.append(contains(patterns=must_contain))
if must_exclude is not None:
from veritext.validators import excludes
checks.append(excludes(patterns=must_exclude))
if not checks:
raise ValueError("At least one validation criterion must be specified")
context = ValidationContext(reference=reference)
validator = all_of(checks)
result = validator.check(text, context)
if not result.passed:
raise AssertionError(_format_failure(text, result))
def _format_failure(text: str, result: ValidationResult) -> str:
"""Format a detailed failure message for pytest output.
Args:
text: The text that was validated.
result: The validation result containing check failures.
Returns:
Formatted failure message with check details.
"""
lines = ["Text validation failed:"]
lines.append("")
preview = text[:100] + "..." if len(text) > 100 else text
lines.append(f" Text: {preview!r}")
lines.append("")
lines.append(" Failed checks:")
for check in result.failed_checks:
lines.append(f" - {check.name}:")
lines.append(f" {check.message}")
if check.threshold is not None:
lines.append(f" Expected: >= {check.threshold}")
lines.append(f" Actual: {check.actual}")
return "\n".join(lines)

View File

@@ -0,0 +1,80 @@
"""Pytest fixtures for text validation."""
from typing import TYPE_CHECKING, Any
import pytest
from veritext.core.types import ValidationContext, ValidationResult
from veritext.validators import all_of
from veritext.validators.base import Check
if TYPE_CHECKING:
from collections.abc import Callable
class ValidatorFactory:
"""Factory for building validators from keyword arguments."""
def __call__(
self,
checks: list[Check],
reference: str | list[str] | None = None,
) -> "Callable[[str], ValidationResult]":
"""Create a validator function from a list of checks.
Args:
checks: List of validation checks to apply.
reference: Optional reference text for comparison metrics.
Returns:
A callable that takes text and returns a ValidationResult.
"""
validator = all_of(checks)
context = ValidationContext(reference=reference)
def validate(text: str) -> ValidationResult:
return validator.check(text, context)
return validate
@pytest.fixture
def text_validator() -> ValidatorFactory:
"""Provide a factory for building validators.
Example:
>>> def test_with_factory(text_validator):
... from veritext.validators import bleu, length
... validate = text_validator(
... checks=[bleu(min_score=0.5), length(min_words=10)],
... reference="The reference text.",
... )
... result = validate("Some candidate text.")
... assert result.passed
Returns:
ValidatorFactory instance.
"""
return ValidatorFactory()
@pytest.fixture
def validation_context() -> "Callable[..., ValidationContext]":
"""Provide a factory for creating ValidationContext objects.
Example:
>>> def test_with_context(validation_context):
... ctx = validation_context(reference="The reference text.")
... assert ctx.reference == "The reference text."
Returns:
A callable that creates ValidationContext objects.
"""
def _create(
reference: str | list[str] | None = None,
**metadata: Any,
) -> ValidationContext:
return ValidationContext(reference=reference, metadata=metadata)
return _create