metric validators (threshold checks)

Implement BleuValidator, RougeValidator, and LexicalValidator for
validating text against reference using metric thresholds.
This commit is contained in:
2025-03-22 11:59:22 +00:00
parent d17d3de06d
commit 3ef262d357

View File

@@ -0,0 +1,361 @@
"""Metric-based validators that require reference text."""
from typing import Literal
from veritext.core.exceptions import InvalidThresholdError, ValidationError
from veritext.core.tokenisation import WordTokeniser
from veritext.core.types import CheckResult, ValidationContext
from veritext.metrics.bleu import Bleu
from veritext.metrics.lexical import Lexical
from veritext.metrics.rouge import Rouge
class BleuValidator:
"""Validates that BLEU score meets minimum threshold."""
def __init__(
self,
min_score: float,
variant: Literal[1, 2, 3, 4] = 4,
tokeniser: WordTokeniser | None = None,
) -> None:
"""
Initialise the BLEU validator.
Args:
min_score: Minimum BLEU score required (0.0 to 1.0).
variant: BLEU variant to use (1, 2, 3, or 4). Defaults to 4.
tokeniser: Tokeniser to use. Defaults to WordTokeniser().
Raises:
InvalidThresholdError: If min_score is not in range [0.0, 1.0].
"""
if not 0.0 <= min_score <= 1.0:
raise InvalidThresholdError(
f"min_score must be between 0.0 and 1.0, got {min_score}"
)
if variant not in (1, 2, 3, 4):
raise InvalidThresholdError(f"variant must be 1, 2, 3, or 4, got {variant}")
self._min_score = min_score
self._variant = variant
self._metric = Bleu(tokeniser=tokeniser)
@property
def name(self) -> str:
return f"bleu-{self._variant}"
def check(self, text: str, context: ValidationContext) -> CheckResult:
"""
Run the BLEU check.
Args:
text: The text to validate.
context: Validation context containing reference text.
Returns:
CheckResult with pass/fail status.
Raises:
ValidationError: If reference text is missing from context.
"""
if context.reference is None:
raise ValidationError(f"{self.name} requires reference text in context")
result = self._metric.score(text, context.reference)
score_map = {
1: result.bleu1,
2: result.bleu2,
3: result.bleu3,
4: result.bleu4,
}
actual_score = score_map[self._variant]
passed = actual_score >= self._min_score
if passed:
message = (
f"BLEU-{self._variant} score {actual_score:.2f} "
f"meets minimum {self._min_score:.2f}"
)
else:
message = (
f"BLEU-{self._variant} score {actual_score:.2f} "
f"below minimum {self._min_score:.2f}"
)
return CheckResult(
name=self.name,
passed=passed,
actual=actual_score,
threshold=self._min_score,
message=message,
)
class RougeValidator:
"""Validates that ROUGE score meets minimum threshold."""
def __init__(
self,
min_score: float,
variant: Literal["1", "2", "l"] = "l",
tokeniser: WordTokeniser | None = None,
) -> None:
"""
Initialise the ROUGE validator.
Args:
min_score: Minimum ROUGE F-measure required (0.0 to 1.0).
variant: ROUGE variant ("1", "2", or "l"). Defaults to "l".
tokeniser: Tokeniser to use. Defaults to WordTokeniser().
Raises:
InvalidThresholdError: If min_score is not in range [0.0, 1.0].
"""
if not 0.0 <= min_score <= 1.0:
raise InvalidThresholdError(
f"min_score must be between 0.0 and 1.0, got {min_score}"
)
if variant not in ("1", "2", "l"):
raise InvalidThresholdError(
f"variant must be '1', '2', or 'l', got '{variant}'"
)
self._min_score = min_score
self._variant = variant
self._metric = Rouge(tokeniser=tokeniser)
@property
def name(self) -> str:
return f"rouge-{self._variant}"
def check(self, text: str, context: ValidationContext) -> CheckResult:
"""
Run the ROUGE check.
Args:
text: The text to validate.
context: Validation context containing reference text.
Returns:
CheckResult with pass/fail status.
Raises:
ValidationError: If reference text is missing from context.
"""
if context.reference is None:
raise ValidationError(f"{self.name} requires reference text in context")
result = self._metric.score(text, context.reference)
score_map = {
"1": result.rouge1.fmeasure,
"2": result.rouge2.fmeasure,
"l": result.rouge_l.fmeasure,
}
actual_score = score_map[self._variant]
passed = actual_score >= self._min_score
if passed:
message = (
f"ROUGE-{self._variant.upper()} score {actual_score:.2f} "
f"meets minimum {self._min_score:.2f}"
)
else:
message = (
f"ROUGE-{self._variant.upper()} score {actual_score:.2f} "
f"below minimum {self._min_score:.2f}"
)
return CheckResult(
name=self.name,
passed=passed,
actual=actual_score,
threshold=self._min_score,
message=message,
)
class LexicalValidator:
"""Validates lexical similarity meets threshold."""
def __init__(
self,
min_jaccard: float | None = None,
min_overlap: float | None = None,
tokeniser: WordTokeniser | None = None,
) -> None:
"""
Initialise the lexical validator.
Args:
min_jaccard: Minimum Jaccard similarity required (0.0 to 1.0).
min_overlap: Minimum token overlap required (0.0 to 1.0).
tokeniser: Tokeniser to use. Defaults to WordTokeniser().
Raises:
InvalidThresholdError: If thresholds are invalid or none provided.
"""
if min_jaccard is None and min_overlap is None:
raise InvalidThresholdError(
"At least one of min_jaccard or min_overlap must be provided"
)
if min_jaccard is not None and not 0.0 <= min_jaccard <= 1.0:
raise InvalidThresholdError(
f"min_jaccard must be between 0.0 and 1.0, got {min_jaccard}"
)
if min_overlap is not None and not 0.0 <= min_overlap <= 1.0:
raise InvalidThresholdError(
f"min_overlap must be between 0.0 and 1.0, got {min_overlap}"
)
self._min_jaccard = min_jaccard
self._min_overlap = min_overlap
self._metric = Lexical(tokeniser=tokeniser)
@property
def name(self) -> str:
return "lexical"
def check(self, text: str, context: ValidationContext) -> CheckResult:
"""
Run the lexical similarity check.
Args:
text: The text to validate.
context: Validation context containing reference text.
Returns:
CheckResult with pass/fail status.
Raises:
ValidationError: If reference text is missing from context.
"""
if context.reference is None:
raise ValidationError(f"{self.name} requires reference text in context")
result = self._metric.score(text, context.reference)
failures = []
if self._min_jaccard is not None and result.jaccard < self._min_jaccard:
failures.append(
f"Jaccard {result.jaccard:.2f} below minimum {self._min_jaccard:.2f}"
)
if self._min_overlap is not None and result.token_overlap < self._min_overlap:
failures.append(
f"token overlap {result.token_overlap:.2f} "
f"below minimum {self._min_overlap:.2f}"
)
passed = len(failures) == 0
if passed:
parts = []
if self._min_jaccard is not None:
parts.append(f"Jaccard {result.jaccard:.2f} >= {self._min_jaccard:.2f}")
if self._min_overlap is not None:
parts.append(
f"overlap {result.token_overlap:.2f} >= {self._min_overlap:.2f}"
)
message = "Lexical similarity: " + ", ".join(parts)
else:
message = "Lexical similarity: " + "; ".join(failures)
actual = {"jaccard": result.jaccard, "token_overlap": result.token_overlap}
threshold = {}
if self._min_jaccard is not None:
threshold["min_jaccard"] = self._min_jaccard
if self._min_overlap is not None:
threshold["min_overlap"] = self._min_overlap
return CheckResult(
name=self.name,
passed=passed,
actual=actual,
threshold=threshold,
message=message,
)
class SemanticValidator:
"""Validates that semantic similarity meets minimum threshold.
Requires the `veritext[semantic]` extra to be installed.
"""
def __init__(
self,
min_score: float,
model: str = "all-MiniLM-L6-v2",
cache_embeddings: bool = True,
) -> None:
"""
Initialise the semantic validator.
Args:
min_score: Minimum semantic similarity required (0.0 to 1.0).
model: Name of the sentence-transformers model to use.
cache_embeddings: Whether to cache embeddings for repeated texts.
Raises:
InvalidThresholdError: If min_score is not in range [0.0, 1.0].
DependencyError: If sentence-transformers is not installed.
"""
if not 0.0 <= min_score <= 1.0:
raise InvalidThresholdError(
f"min_score must be between 0.0 and 1.0, got {min_score}"
)
self._min_score = min_score
from veritext.semantic.similarity import SemanticSimilarity
self._metric: SemanticSimilarity = SemanticSimilarity(
model=model, cache_embeddings=cache_embeddings
)
@property
def name(self) -> str:
return "semantic"
def check(self, text: str, context: ValidationContext) -> CheckResult:
"""
Run the semantic similarity check.
Args:
text: The text to validate.
context: Validation context containing reference text.
Returns:
CheckResult with pass/fail status.
Raises:
ValidationError: If reference text is missing from context.
"""
if context.reference is None:
raise ValidationError(f"{self.name} requires reference text in context")
result = self._metric.score(text, context.reference)
passed = result.similarity >= self._min_score
if passed:
message = (
f"Semantic similarity {result.similarity:.2f} "
f"meets minimum {self._min_score:.2f}"
)
else:
message = (
f"Semantic similarity {result.similarity:.2f} "
f"below minimum {self._min_score:.2f}"
)
return CheckResult(
name=self.name,
passed=passed,
actual=result.similarity,
threshold=self._min_score,
message=message,
)