validator factory functions
Export all validators and provide factory functions for clean API: bleu(), rouge(), lexical(), length(), readability(), contains(), excludes(), all_of(), any_of().
This commit is contained in:
238
src/veritext/validators/__init__.py
Normal file
238
src/veritext/validators/__init__.py
Normal file
@@ -0,0 +1,238 @@
|
||||
"""Validators module: composable validation checks for text quality.
|
||||
|
||||
This module provides validators that apply thresholds to metrics and return
|
||||
pass/fail decisions with diagnostics.
|
||||
|
||||
Example:
|
||||
>>> from veritext.validators import bleu, length, all_of
|
||||
>>> from veritext.core.types import ValidationContext
|
||||
>>>
|
||||
>>> validator = all_of([
|
||||
... bleu(min_score=0.5),
|
||||
... length(min_words=10),
|
||||
... ])
|
||||
>>> context = ValidationContext(reference="The quick brown fox.")
|
||||
>>> result = validator.check("The quick brown fox jumps.", context)
|
||||
>>> print(result.passed)
|
||||
"""
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from veritext.core.tokenisation import WordTokeniser
|
||||
from veritext.validators.base import Check
|
||||
from veritext.validators.composite import AllOf, AnyOf
|
||||
from veritext.validators.constraint import (
|
||||
ContainsValidator,
|
||||
ExcludesValidator,
|
||||
LengthValidator,
|
||||
ReadabilityValidator,
|
||||
)
|
||||
from veritext.validators.metric import (
|
||||
BleuValidator,
|
||||
LexicalValidator,
|
||||
RougeValidator,
|
||||
SemanticValidator,
|
||||
)
|
||||
|
||||
|
||||
def bleu(
|
||||
min_score: float,
|
||||
variant: Literal[1, 2, 3, 4] = 4,
|
||||
tokeniser: WordTokeniser | None = None,
|
||||
) -> BleuValidator:
|
||||
"""Create a BLEU validator.
|
||||
|
||||
Args:
|
||||
min_score: Minimum BLEU score required (0.0 to 1.0).
|
||||
variant: BLEU variant to use (1, 2, 3, or 4). Defaults to 4.
|
||||
tokeniser: Tokeniser to use. Defaults to WordTokeniser().
|
||||
|
||||
Returns:
|
||||
BleuValidator instance.
|
||||
"""
|
||||
return BleuValidator(min_score=min_score, variant=variant, tokeniser=tokeniser)
|
||||
|
||||
|
||||
def rouge(
|
||||
min_score: float,
|
||||
variant: Literal["1", "2", "l"] = "l",
|
||||
tokeniser: WordTokeniser | None = None,
|
||||
) -> RougeValidator:
|
||||
"""Create a ROUGE validator.
|
||||
|
||||
Args:
|
||||
min_score: Minimum ROUGE F-measure required (0.0 to 1.0).
|
||||
variant: ROUGE variant ("1", "2", or "l"). Defaults to "l".
|
||||
tokeniser: Tokeniser to use. Defaults to WordTokeniser().
|
||||
|
||||
Returns:
|
||||
RougeValidator instance.
|
||||
"""
|
||||
return RougeValidator(min_score=min_score, variant=variant, tokeniser=tokeniser)
|
||||
|
||||
|
||||
def lexical(
|
||||
min_jaccard: float | None = None,
|
||||
min_overlap: float | None = None,
|
||||
tokeniser: WordTokeniser | None = None,
|
||||
) -> LexicalValidator:
|
||||
"""Create a lexical similarity validator.
|
||||
|
||||
Args:
|
||||
min_jaccard: Minimum Jaccard similarity required (0.0 to 1.0).
|
||||
min_overlap: Minimum token overlap required (0.0 to 1.0).
|
||||
tokeniser: Tokeniser to use. Defaults to WordTokeniser().
|
||||
|
||||
Returns:
|
||||
LexicalValidator instance.
|
||||
"""
|
||||
return LexicalValidator(
|
||||
min_jaccard=min_jaccard, min_overlap=min_overlap, tokeniser=tokeniser
|
||||
)
|
||||
|
||||
|
||||
def length(
|
||||
min_chars: int | None = None,
|
||||
max_chars: int | None = None,
|
||||
min_words: int | None = None,
|
||||
max_words: int | None = None,
|
||||
tokeniser: WordTokeniser | None = None,
|
||||
) -> LengthValidator:
|
||||
"""Create a length validator.
|
||||
|
||||
Args:
|
||||
min_chars: Minimum character count (inclusive).
|
||||
max_chars: Maximum character count (inclusive).
|
||||
min_words: Minimum word count (inclusive).
|
||||
max_words: Maximum word count (inclusive).
|
||||
tokeniser: Tokeniser to use for word counting. Defaults to WordTokeniser().
|
||||
|
||||
Returns:
|
||||
LengthValidator instance.
|
||||
"""
|
||||
return LengthValidator(
|
||||
min_chars=min_chars,
|
||||
max_chars=max_chars,
|
||||
min_words=min_words,
|
||||
max_words=max_words,
|
||||
tokeniser=tokeniser,
|
||||
)
|
||||
|
||||
|
||||
def readability(
|
||||
max_grade: float | None = None,
|
||||
min_ease: float | None = None,
|
||||
) -> ReadabilityValidator:
|
||||
"""Create a readability validator.
|
||||
|
||||
Args:
|
||||
max_grade: Maximum Flesch-Kincaid grade level allowed.
|
||||
min_ease: Minimum Flesch Reading Ease score required.
|
||||
|
||||
Returns:
|
||||
ReadabilityValidator instance.
|
||||
"""
|
||||
return ReadabilityValidator(max_grade=max_grade, min_ease=min_ease)
|
||||
|
||||
|
||||
def contains(
|
||||
patterns: list[str],
|
||||
case_sensitive: bool = False,
|
||||
) -> ContainsValidator:
|
||||
"""Create a contains validator.
|
||||
|
||||
Args:
|
||||
patterns: List of substrings or regex patterns that must be present.
|
||||
case_sensitive: Whether matching is case-sensitive. Defaults to False.
|
||||
|
||||
Returns:
|
||||
ContainsValidator instance.
|
||||
"""
|
||||
return ContainsValidator(patterns=patterns, case_sensitive=case_sensitive)
|
||||
|
||||
|
||||
def excludes(
|
||||
patterns: list[str],
|
||||
case_sensitive: bool = False,
|
||||
) -> ExcludesValidator:
|
||||
"""Create an excludes validator.
|
||||
|
||||
Args:
|
||||
patterns: List of substrings or regex patterns that must not be present.
|
||||
case_sensitive: Whether matching is case-sensitive. Defaults to False.
|
||||
|
||||
Returns:
|
||||
ExcludesValidator instance.
|
||||
"""
|
||||
return ExcludesValidator(patterns=patterns, case_sensitive=case_sensitive)
|
||||
|
||||
|
||||
def all_of(checks: list[Check]) -> AllOf:
|
||||
"""Create an AllOf composite validator.
|
||||
|
||||
Args:
|
||||
checks: List of checks that must all pass.
|
||||
|
||||
Returns:
|
||||
AllOf instance.
|
||||
"""
|
||||
return AllOf(checks=checks)
|
||||
|
||||
|
||||
def any_of(checks: list[Check]) -> AnyOf:
|
||||
"""Create an AnyOf composite validator.
|
||||
|
||||
Args:
|
||||
checks: List of checks where at least one must pass.
|
||||
|
||||
Returns:
|
||||
AnyOf instance.
|
||||
"""
|
||||
return AnyOf(checks=checks)
|
||||
|
||||
|
||||
def semantic(
|
||||
min_score: float,
|
||||
model: str = "all-MiniLM-L6-v2",
|
||||
cache_embeddings: bool = True,
|
||||
) -> SemanticValidator:
|
||||
"""Create a semantic similarity validator.
|
||||
|
||||
Requires the `veritext[semantic]` extra to be installed.
|
||||
|
||||
Args:
|
||||
min_score: Minimum semantic similarity required (0.0 to 1.0).
|
||||
model: Name of the sentence-transformers model to use.
|
||||
cache_embeddings: Whether to cache embeddings for repeated texts.
|
||||
|
||||
Returns:
|
||||
SemanticValidator instance.
|
||||
"""
|
||||
return SemanticValidator(
|
||||
min_score=min_score, model=model, cache_embeddings=cache_embeddings
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"AllOf",
|
||||
"AnyOf",
|
||||
"BleuValidator",
|
||||
"Check",
|
||||
"ContainsValidator",
|
||||
"ExcludesValidator",
|
||||
"LengthValidator",
|
||||
"LexicalValidator",
|
||||
"ReadabilityValidator",
|
||||
"RougeValidator",
|
||||
"SemanticValidator",
|
||||
"all_of",
|
||||
"any_of",
|
||||
"bleu",
|
||||
"contains",
|
||||
"excludes",
|
||||
"length",
|
||||
"lexical",
|
||||
"readability",
|
||||
"rouge",
|
||||
"semantic",
|
||||
]
|
||||
Reference in New Issue
Block a user