From 75cd7b68de7a6afdddafbd4a1a7dbd5731ec6095 Mon Sep 17 00:00:00 2001 From: Kai Chappell Date: Tue, 3 Feb 2026 17:14:14 +0000 Subject: [PATCH] feat(validators): add constraint validators Implement LengthValidator, ReadabilityValidator, ContainsValidator, and ExcludesValidator for text constraints without reference text. --- src/veritext/validators/constraint.py | 337 ++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 src/veritext/validators/constraint.py diff --git a/src/veritext/validators/constraint.py b/src/veritext/validators/constraint.py new file mode 100644 index 0000000..5751f7a --- /dev/null +++ b/src/veritext/validators/constraint.py @@ -0,0 +1,337 @@ +"""Constraint validators that do not require reference text.""" + +import re + +from veritext.core.exceptions import InvalidThresholdError +from veritext.core.tokenisation import WordTokeniser +from veritext.core.types import CheckResult, ValidationContext +from veritext.metrics.readability import Readability + + +class LengthValidator: + """Validates text length constraints.""" + + def __init__( + self, + min_chars: int | None = None, + max_chars: int | None = None, + min_words: int | None = None, + max_words: int | None = None, + tokeniser: WordTokeniser | None = None, + ) -> None: + """ + Initialise the length validator. + + Args: + min_chars: Minimum character count (inclusive). + max_chars: Maximum character count (inclusive). + min_words: Minimum word count (inclusive). + max_words: Maximum word count (inclusive). + tokeniser: Tokeniser to use for word counting. Defaults to WordTokeniser(). + + Raises: + InvalidThresholdError: If no constraints provided or invalid values. + """ + if all(v is None for v in (min_chars, max_chars, min_words, max_words)): + raise InvalidThresholdError("At least one length constraint must be set") + + if min_chars is not None and min_chars < 0: + raise InvalidThresholdError(f"min_chars must be >= 0, got {min_chars}") + if max_chars is not None and max_chars < 0: + raise InvalidThresholdError(f"max_chars must be >= 0, got {max_chars}") + if min_words is not None and min_words < 0: + raise InvalidThresholdError(f"min_words must be >= 0, got {min_words}") + if max_words is not None and max_words < 0: + raise InvalidThresholdError(f"max_words must be >= 0, got {max_words}") + + if min_chars is not None and max_chars is not None and min_chars > max_chars: + raise InvalidThresholdError( + f"min_chars ({min_chars}) cannot exceed max_chars ({max_chars})" + ) + if min_words is not None and max_words is not None and min_words > max_words: + raise InvalidThresholdError( + f"min_words ({min_words}) cannot exceed max_words ({max_words})" + ) + + self._min_chars = min_chars + self._max_chars = max_chars + self._min_words = min_words + self._max_words = max_words + self._tokeniser = tokeniser or WordTokeniser() + + @property + def name(self) -> str: + """Return the name of this check.""" + return "length" + + def check(self, text: str, context: ValidationContext) -> CheckResult: # noqa: ARG002 + """ + Run the length check. + + Args: + text: The text to validate. + context: Validation context (not used for length checks). + + Returns: + CheckResult with pass/fail status. + """ + char_count = len(text) + words = self._tokeniser.tokenise(text) + word_count = len(words) + + failures = [] + + if self._min_chars is not None and char_count < self._min_chars: + failures.append(f"{char_count} chars < min {self._min_chars}") + if self._max_chars is not None and char_count > self._max_chars: + failures.append(f"{char_count} chars > max {self._max_chars}") + if self._min_words is not None and word_count < self._min_words: + failures.append(f"{word_count} words < min {self._min_words}") + if self._max_words is not None and word_count > self._max_words: + failures.append(f"{word_count} words > max {self._max_words}") + + passed = len(failures) == 0 + + if passed: + message = f"Length check passed: {char_count} chars, {word_count} words" + else: + message = "Length check failed: " + "; ".join(failures) + + actual = {"chars": char_count, "words": word_count} + threshold = {} + if self._min_chars is not None: + threshold["min_chars"] = self._min_chars + if self._max_chars is not None: + threshold["max_chars"] = self._max_chars + if self._min_words is not None: + threshold["min_words"] = self._min_words + if self._max_words is not None: + threshold["max_words"] = self._max_words + + return CheckResult( + name=self.name, + passed=passed, + actual=actual, + threshold=threshold, + message=message, + ) + + +class ReadabilityValidator: + """Validates Flesch-Kincaid readability.""" + + def __init__( + self, + max_grade: float | None = None, + min_ease: float | None = None, + ) -> None: + """ + Initialise the readability validator. + + Args: + max_grade: Maximum Flesch-Kincaid grade level allowed. + min_ease: Minimum Flesch Reading Ease score required. + + Raises: + InvalidThresholdError: If no constraints provided. + """ + if max_grade is None and min_ease is None: + raise InvalidThresholdError( + "At least one of max_grade or min_ease must be provided" + ) + + self._max_grade = max_grade + self._min_ease = min_ease + self._metric = Readability() + + @property + def name(self) -> str: + """Return the name of this check.""" + return "readability" + + def check(self, text: str, context: ValidationContext) -> CheckResult: # noqa: ARG002 + """ + Run the readability check. + + Args: + text: The text to validate. + context: Validation context (not used for readability checks). + + Returns: + CheckResult with pass/fail status. + """ + result = self._metric.score(text) + + failures = [] + if ( + self._max_grade is not None + and result.flesch_kincaid_grade > self._max_grade + ): + failures.append( + f"grade level {result.flesch_kincaid_grade:.1f} " + f"> max {self._max_grade:.1f}" + ) + + if self._min_ease is not None and result.flesch_reading_ease < self._min_ease: + failures.append( + f"reading ease {result.flesch_reading_ease:.1f} " + f"< min {self._min_ease:.1f}" + ) + + passed = len(failures) == 0 + + if passed: + parts = [] + if self._max_grade is not None: + parts.append( + f"grade {result.flesch_kincaid_grade:.1f} <= {self._max_grade:.1f}" + ) + if self._min_ease is not None: + parts.append( + f"ease {result.flesch_reading_ease:.1f} >= {self._min_ease:.1f}" + ) + message = "Readability: " + ", ".join(parts) + else: + message = "Readability: " + "; ".join(failures) + + actual = { + "grade": result.flesch_kincaid_grade, + "ease": result.flesch_reading_ease, + } + threshold = {} + if self._max_grade is not None: + threshold["max_grade"] = self._max_grade + if self._min_ease is not None: + threshold["min_ease"] = self._min_ease + + return CheckResult( + name=self.name, + passed=passed, + actual=actual, + threshold=threshold, + message=message, + ) + + +class ContainsValidator: + """Validates text contains required patterns.""" + + def __init__( + self, + patterns: list[str], + case_sensitive: bool = False, + ) -> None: + """ + Initialise the contains validator. + + Args: + patterns: List of substrings or regex patterns that must be present. + case_sensitive: Whether matching is case-sensitive. Defaults to False. + + Raises: + InvalidThresholdError: If patterns list is empty. + """ + if not patterns: + raise InvalidThresholdError("patterns list cannot be empty") + + self._patterns = patterns + self._case_sensitive = case_sensitive + self._flags = 0 if case_sensitive else re.IGNORECASE + + @property + def name(self) -> str: + """Return the name of this check.""" + return "contains" + + def check(self, text: str, context: ValidationContext) -> CheckResult: # noqa: ARG002 + """ + Run the contains check. + + Args: + text: The text to validate. + context: Validation context (not used for contains checks). + + Returns: + CheckResult with pass/fail status. + """ + missing = [] + for pattern in self._patterns: + if not re.search(pattern, text, self._flags): + missing.append(pattern) + + passed = len(missing) == 0 + + if passed: + message = f"Text contains all {len(self._patterns)} required pattern(s)" + else: + message = f"Text missing {len(missing)} pattern(s): {missing}" + + return CheckResult( + name=self.name, + passed=passed, + actual={"found": len(self._patterns) - len(missing), "missing": missing}, + threshold={"patterns": self._patterns}, + message=message, + ) + + +class ExcludesValidator: + """Validates text excludes forbidden patterns.""" + + def __init__( + self, + patterns: list[str], + case_sensitive: bool = False, + ) -> None: + """ + Initialise the excludes validator. + + Args: + patterns: List of substrings or regex patterns that must not be present. + case_sensitive: Whether matching is case-sensitive. Defaults to False. + + Raises: + InvalidThresholdError: If patterns list is empty. + """ + if not patterns: + raise InvalidThresholdError("patterns list cannot be empty") + + self._patterns = patterns + self._case_sensitive = case_sensitive + self._flags = 0 if case_sensitive else re.IGNORECASE + + @property + def name(self) -> str: + """Return the name of this check.""" + return "excludes" + + def check(self, text: str, context: ValidationContext) -> CheckResult: # noqa: ARG002 + """ + Run the excludes check. + + Args: + text: The text to validate. + context: Validation context (not used for excludes checks). + + Returns: + CheckResult with pass/fail status. + """ + found = [] + for pattern in self._patterns: + if re.search(pattern, text, self._flags): + found.append(pattern) + + passed = len(found) == 0 + + if passed: + message = f"Text excludes all {len(self._patterns)} forbidden pattern(s)" + else: + message = f"Text contains {len(found)} forbidden pattern(s): {found}" + + return CheckResult( + name=self.name, + passed=passed, + actual={"excluded": len(self._patterns) - len(found), "found": found}, + threshold={"patterns": self._patterns}, + message=message, + )