validator factory functions

Export all validators and provide factory functions for clean API:
bleu(), rouge(), lexical(), length(), readability(), contains(),
excludes(), all_of(), any_of().
This commit is contained in:
2025-03-29 13:12:53 +00:00
parent 62d78ab699
commit 8fd1dc4cd3

View File

@@ -0,0 +1,238 @@
"""Validators module: composable validation checks for text quality.
This module provides validators that apply thresholds to metrics and return
pass/fail decisions with diagnostics.
Example:
>>> from veritext.validators import bleu, length, all_of
>>> from veritext.core.types import ValidationContext
>>>
>>> validator = all_of([
... bleu(min_score=0.5),
... length(min_words=10),
... ])
>>> context = ValidationContext(reference="The quick brown fox.")
>>> result = validator.check("The quick brown fox jumps.", context)
>>> print(result.passed)
"""
from typing import Literal
from veritext.core.tokenisation import WordTokeniser
from veritext.validators.base import Check
from veritext.validators.composite import AllOf, AnyOf
from veritext.validators.constraint import (
ContainsValidator,
ExcludesValidator,
LengthValidator,
ReadabilityValidator,
)
from veritext.validators.metric import (
BleuValidator,
LexicalValidator,
RougeValidator,
SemanticValidator,
)
def bleu(
min_score: float,
variant: Literal[1, 2, 3, 4] = 4,
tokeniser: WordTokeniser | None = None,
) -> BleuValidator:
"""Create a BLEU validator.
Args:
min_score: Minimum BLEU score required (0.0 to 1.0).
variant: BLEU variant to use (1, 2, 3, or 4). Defaults to 4.
tokeniser: Tokeniser to use. Defaults to WordTokeniser().
Returns:
BleuValidator instance.
"""
return BleuValidator(min_score=min_score, variant=variant, tokeniser=tokeniser)
def rouge(
min_score: float,
variant: Literal["1", "2", "l"] = "l",
tokeniser: WordTokeniser | None = None,
) -> RougeValidator:
"""Create a ROUGE validator.
Args:
min_score: Minimum ROUGE F-measure required (0.0 to 1.0).
variant: ROUGE variant ("1", "2", or "l"). Defaults to "l".
tokeniser: Tokeniser to use. Defaults to WordTokeniser().
Returns:
RougeValidator instance.
"""
return RougeValidator(min_score=min_score, variant=variant, tokeniser=tokeniser)
def lexical(
min_jaccard: float | None = None,
min_overlap: float | None = None,
tokeniser: WordTokeniser | None = None,
) -> LexicalValidator:
"""Create a lexical similarity validator.
Args:
min_jaccard: Minimum Jaccard similarity required (0.0 to 1.0).
min_overlap: Minimum token overlap required (0.0 to 1.0).
tokeniser: Tokeniser to use. Defaults to WordTokeniser().
Returns:
LexicalValidator instance.
"""
return LexicalValidator(
min_jaccard=min_jaccard, min_overlap=min_overlap, tokeniser=tokeniser
)
def length(
min_chars: int | None = None,
max_chars: int | None = None,
min_words: int | None = None,
max_words: int | None = None,
tokeniser: WordTokeniser | None = None,
) -> LengthValidator:
"""Create a length validator.
Args:
min_chars: Minimum character count (inclusive).
max_chars: Maximum character count (inclusive).
min_words: Minimum word count (inclusive).
max_words: Maximum word count (inclusive).
tokeniser: Tokeniser to use for word counting. Defaults to WordTokeniser().
Returns:
LengthValidator instance.
"""
return LengthValidator(
min_chars=min_chars,
max_chars=max_chars,
min_words=min_words,
max_words=max_words,
tokeniser=tokeniser,
)
def readability(
max_grade: float | None = None,
min_ease: float | None = None,
) -> ReadabilityValidator:
"""Create a readability validator.
Args:
max_grade: Maximum Flesch-Kincaid grade level allowed.
min_ease: Minimum Flesch Reading Ease score required.
Returns:
ReadabilityValidator instance.
"""
return ReadabilityValidator(max_grade=max_grade, min_ease=min_ease)
def contains(
patterns: list[str],
case_sensitive: bool = False,
) -> ContainsValidator:
"""Create a contains validator.
Args:
patterns: List of substrings or regex patterns that must be present.
case_sensitive: Whether matching is case-sensitive. Defaults to False.
Returns:
ContainsValidator instance.
"""
return ContainsValidator(patterns=patterns, case_sensitive=case_sensitive)
def excludes(
patterns: list[str],
case_sensitive: bool = False,
) -> ExcludesValidator:
"""Create an excludes validator.
Args:
patterns: List of substrings or regex patterns that must not be present.
case_sensitive: Whether matching is case-sensitive. Defaults to False.
Returns:
ExcludesValidator instance.
"""
return ExcludesValidator(patterns=patterns, case_sensitive=case_sensitive)
def all_of(checks: list[Check]) -> AllOf:
"""Create an AllOf composite validator.
Args:
checks: List of checks that must all pass.
Returns:
AllOf instance.
"""
return AllOf(checks=checks)
def any_of(checks: list[Check]) -> AnyOf:
"""Create an AnyOf composite validator.
Args:
checks: List of checks where at least one must pass.
Returns:
AnyOf instance.
"""
return AnyOf(checks=checks)
def semantic(
min_score: float,
model: str = "all-MiniLM-L6-v2",
cache_embeddings: bool = True,
) -> SemanticValidator:
"""Create a semantic similarity validator.
Requires the `veritext[semantic]` extra to be installed.
Args:
min_score: Minimum semantic similarity required (0.0 to 1.0).
model: Name of the sentence-transformers model to use.
cache_embeddings: Whether to cache embeddings for repeated texts.
Returns:
SemanticValidator instance.
"""
return SemanticValidator(
min_score=min_score, model=model, cache_embeddings=cache_embeddings
)
__all__ = [
"AllOf",
"AnyOf",
"BleuValidator",
"Check",
"ContainsValidator",
"ExcludesValidator",
"LengthValidator",
"LexicalValidator",
"ReadabilityValidator",
"RougeValidator",
"SemanticValidator",
"all_of",
"any_of",
"bleu",
"contains",
"excludes",
"length",
"lexical",
"readability",
"rouge",
"semantic",
]