docs(changelog): add semantic similarity entries

test(semantic): add semantic similarity tests
feat(validators): add SemanticValidator
2026-02-03 17:31:14 +00:00 · 2026-02-03 17:31:07 +00:00 · 2026-02-03 17:31:01 +00:00 · 2026-02-03 17:30:56 +00:00 · 2026-02-03 17:30:50 +00:00 · 2026-02-03 17:14:37 +00:00
16 changed files with 2357 additions and 0 deletions
@@ -21,3 +21,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - ROUGE metric (ROUGE-1, ROUGE-2, ROUGE-L with precision/recall/F-measure)
 - Flesch-Kincaid readability metrics (grade level and reading ease)
 - Batch scoring with aggregate statistics for all metrics
+- Validators module with `Check` protocol for validation checks
+- Metric-based validators: `BleuValidator`, `RougeValidator`, `LexicalValidator`
+- Constraint validators: `LengthValidator`, `ReadabilityValidator`, `ContainsValidator`, `ExcludesValidator`
+- Composite validators: `AllOf` (all checks must pass), `AnyOf` (any check must pass)
+- Factory functions for clean validator API (`bleu()`, `rouge()`, `lexical()`, `length()`, `readability()`, `contains()`, `excludes()`, `all_of()`, `any_of()`)
+- Semantic similarity module with embedding-based text comparison (requires `veritext[semantic]` extra)
+- `SemanticSimilarity` metric using sentence-transformers for semantic relatedness
+- `SemanticValidator` for threshold-based semantic similarity validation
+- `semantic()` factory function for creating semantic validators
+- Embedding caching for performance optimisation in repeated comparisons
@@ -10,6 +10,7 @@ from veritext.metrics.results import (
    ReadabilityResult,
    RougeResult,
    RougeScore,
+    SemanticResult,
 )
 from veritext.metrics.rouge import Rouge

@@ -26,4 +27,5 @@ __all__ = [
    "Rouge",
    "RougeResult",
    "RougeScore",
+    "SemanticResult",
 ]
@@ -91,3 +91,20 @@ class ReadabilityResult(BaseModel):
    def score(self) -> float:
        """Return Flesch reading ease as the primary score."""
        return self.flesch_reading_ease
+
+
+class SemanticResult(BaseModel):
+    """Result of semantic similarity computation."""
+
+    model_config = ConfigDict(frozen=True)
+
+    similarity: float
+    """Cosine similarity score (0.0 to 1.0)."""
+
+    model: str
+    """Name of the embedding model used."""
+
+    @property
+    def score(self) -> float:
+        """Return the primary score for this result."""
+        return self.similarity
@@ -0,0 +1,16 @@
+"""Semantic similarity module: embedding-based text comparison.
+
+This module provides semantic similarity using sentence-transformers.
+It requires the `veritext[semantic]` extra to be installed.
+
+Example:
+    >>> from veritext.semantic import SemanticSimilarity
+    >>>
+    >>> metric = SemanticSimilarity()
+    >>> result = metric.score("The cat sat on the mat", "A feline rested on the rug")
+    >>> print(f"Similarity: {result.similarity:.2f}")
+"""
+
+from veritext.semantic.similarity import SemanticSimilarity
+
+__all__ = ["SemanticSimilarity"]
@@ -0,0 +1,188 @@
+"""Embedding-based semantic similarity using sentence-transformers."""
+
+from typing import Any
+
+from veritext.core.exceptions import DependencyError
+from veritext.metrics.base import AggregateStats, BatchResult
+from veritext.metrics.results import SemanticResult
+
+
+class SemanticSimilarity:
+    """
+    Embedding-based semantic similarity using sentence-transformers.
+
+    Computes cosine similarity between text embeddings to measure semantic
+    relatedness. This metric captures meaning beyond lexical overlap.
+
+    Requires the `veritext[semantic]` extra to be installed.
+    """
+
+    def __init__(
+        self,
+        model: str = "all-MiniLM-L6-v2",
+        cache_embeddings: bool = True,
+    ) -> None:
+        """
+        Initialise the semantic similarity metric.
+
+        Args:
+            model: Name of the sentence-transformers model to use.
+                   Defaults to "all-MiniLM-L6-v2" (22MB, good quality/size tradeoff).
+            cache_embeddings: Whether to cache embeddings for repeated texts.
+                              Defaults to True.
+
+        Raises:
+            DependencyError: If sentence-transformers is not installed.
+        """
+        try:
+            from sentence_transformers import SentenceTransformer
+        except ImportError as err:
+            raise DependencyError(
+                "Install veritext[semantic] for semantic similarity: "
+                "pip install veritext[semantic]"
+            ) from err
+
+        self._model_name = model
+        self._model: Any = SentenceTransformer(model)
+        self._cache: dict[str, Any] | None = {} if cache_embeddings else None
+
+    @property
+    def name(self) -> str:
+        """Return the name of this metric."""
+        return "semantic"
+
+    @property
+    def requires_reference(self) -> bool:
+        """Return whether this metric requires reference text."""
+        return True
+
+    def _get_embedding(self, text: str) -> Any:
+        """
+        Get embedding for text, using cache if available.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            The embedding tensor.
+        """
+        if self._cache is not None and text in self._cache:
+            return self._cache[text]
+
+        embedding = self._model.encode(text, convert_to_tensor=True)
+
+        if self._cache is not None:
+            self._cache[text] = embedding
+
+        return embedding
+
+    def _cosine_similarity(self, embedding1: Any, embedding2: Any) -> float:
+        """
+        Compute cosine similarity between two embeddings.
+
+        Args:
+            embedding1: First embedding tensor.
+            embedding2: Second embedding tensor.
+
+        Returns:
+            Cosine similarity score (0.0 to 1.0).
+        """
+        from sentence_transformers import util
+
+        similarity: float = util.cos_sim(embedding1, embedding2).item()
+        # Clamp to [0, 1] as negative similarities are possible but not meaningful
+        return max(0.0, min(1.0, similarity))
+
+    def score(
+        self, candidate: str, reference: str | list[str] | None = None
+    ) -> SemanticResult:
+        """
+        Compute semantic similarity between candidate and reference.
+
+        When multiple references are provided, returns the maximum similarity
+        across all references.
+
+        Args:
+            candidate: The text to score.
+            reference: Reference text(s) for comparison.
+
+        Returns:
+            SemanticResult with similarity score and model name.
+
+        Raises:
+            ValueError: If reference is None or empty.
+        """
+        if reference is None:
+            raise ValueError("Semantic similarity requires reference text")
+
+        # Normalise reference to list
+        references = [reference] if isinstance(reference, str) else reference
+
+        if not references:
+            raise ValueError("Reference text cannot be empty")
+
+        # Handle empty candidate
+        candidate_stripped = candidate.strip()
+        if not candidate_stripped:
+            return SemanticResult(similarity=0.0, model=self._model_name)
+
+        # Handle empty references
+        valid_references = [r for r in references if r.strip()]
+        if not valid_references:
+            raise ValueError("Reference text cannot be empty")
+
+        # Get candidate embedding
+        candidate_embedding = self._get_embedding(candidate_stripped)
+
+        # Compute similarity against each reference, take maximum
+        max_similarity = 0.0
+        for ref in valid_references:
+            ref_embedding = self._get_embedding(ref.strip())
+            similarity = self._cosine_similarity(candidate_embedding, ref_embedding)
+            max_similarity = max(max_similarity, similarity)
+
+        return SemanticResult(similarity=max_similarity, model=self._model_name)
+
+    def batch_score(
+        self,
+        candidates: list[str],
+        references: list[str] | list[list[str]] | None = None,
+    ) -> BatchResult[SemanticResult]:
+        """
+        Compute semantic similarity for a batch of candidates.
+
+        Args:
+            candidates: List of texts to score.
+            references: Reference text(s) for each candidate.
+
+        Returns:
+            BatchResult containing individual results and aggregate statistics.
+
+        Raises:
+            ValueError: If references is None or length mismatch.
+        """
+        if references is None:
+            raise ValueError("Semantic similarity requires reference texts")
+
+        if len(candidates) != len(references):
+            raise ValueError(
+                f"Number of candidates ({len(candidates)}) must match "
+                f"number of references ({len(references)})"
+            )
+
+        results: list[SemanticResult] = []
+        for i, cand in enumerate(candidates):
+            ref: str | list[str] = references[i]
+            results.append(self.score(cand, ref))
+
+        # Compute aggregate statistics
+        stats = {
+            "similarity": AggregateStats.from_values([r.similarity for r in results]),
+        }
+
+        return BatchResult(results=results, count=len(results), stats=stats)
+
+    def clear_cache(self) -> None:
+        """Clear the embedding cache."""
+        if self._cache is not None:
+            self._cache.clear()
@@ -0,0 +1,239 @@
+"""Validators module: composable validation checks for text quality.
+
+This module provides validators that apply thresholds to metrics and return
+pass/fail decisions with diagnostics.
+
+Example:
+    >>> from veritext.validators import bleu, length, all_of
+    >>> from veritext.core.types import ValidationContext
+    >>>
+    >>> validator = all_of([
+    ...     bleu(min_score=0.5),
+    ...     length(min_words=10),
+    ... ])
+    >>> context = ValidationContext(reference="The quick brown fox.")
+    >>> result = validator.check("The quick brown fox jumps.", context)
+    >>> print(result.passed)
+"""
+
+from typing import Literal
+
+from veritext.core.tokenisation import WordTokeniser
+from veritext.validators.base import Check
+from veritext.validators.composite import AllOf, AnyOf
+from veritext.validators.constraint import (
+    ContainsValidator,
+    ExcludesValidator,
+    LengthValidator,
+    ReadabilityValidator,
+)
+from veritext.validators.metric import (
+    BleuValidator,
+    LexicalValidator,
+    RougeValidator,
+    SemanticValidator,
+)
+
+
+# Factory functions for clean API
+def bleu(
+    min_score: float,
+    variant: Literal[1, 2, 3, 4] = 4,
+    tokeniser: WordTokeniser | None = None,
+) -> BleuValidator:
+    """Create a BLEU validator.
+
+    Args:
+        min_score: Minimum BLEU score required (0.0 to 1.0).
+        variant: BLEU variant to use (1, 2, 3, or 4). Defaults to 4.
+        tokeniser: Tokeniser to use. Defaults to WordTokeniser().
+
+    Returns:
+        BleuValidator instance.
+    """
+    return BleuValidator(min_score=min_score, variant=variant, tokeniser=tokeniser)
+
+
+def rouge(
+    min_score: float,
+    variant: Literal["1", "2", "l"] = "l",
+    tokeniser: WordTokeniser | None = None,
+) -> RougeValidator:
+    """Create a ROUGE validator.
+
+    Args:
+        min_score: Minimum ROUGE F-measure required (0.0 to 1.0).
+        variant: ROUGE variant ("1", "2", or "l"). Defaults to "l".
+        tokeniser: Tokeniser to use. Defaults to WordTokeniser().
+
+    Returns:
+        RougeValidator instance.
+    """
+    return RougeValidator(min_score=min_score, variant=variant, tokeniser=tokeniser)
+
+
+def lexical(
+    min_jaccard: float | None = None,
+    min_overlap: float | None = None,
+    tokeniser: WordTokeniser | None = None,
+) -> LexicalValidator:
+    """Create a lexical similarity validator.
+
+    Args:
+        min_jaccard: Minimum Jaccard similarity required (0.0 to 1.0).
+        min_overlap: Minimum token overlap required (0.0 to 1.0).
+        tokeniser: Tokeniser to use. Defaults to WordTokeniser().
+
+    Returns:
+        LexicalValidator instance.
+    """
+    return LexicalValidator(
+        min_jaccard=min_jaccard, min_overlap=min_overlap, tokeniser=tokeniser
+    )
+
+
+def length(
+    min_chars: int | None = None,
+    max_chars: int | None = None,
+    min_words: int | None = None,
+    max_words: int | None = None,
+    tokeniser: WordTokeniser | None = None,
+) -> LengthValidator:
+    """Create a length validator.
+
+    Args:
+        min_chars: Minimum character count (inclusive).
+        max_chars: Maximum character count (inclusive).
+        min_words: Minimum word count (inclusive).
+        max_words: Maximum word count (inclusive).
+        tokeniser: Tokeniser to use for word counting. Defaults to WordTokeniser().
+
+    Returns:
+        LengthValidator instance.
+    """
+    return LengthValidator(
+        min_chars=min_chars,
+        max_chars=max_chars,
+        min_words=min_words,
+        max_words=max_words,
+        tokeniser=tokeniser,
+    )
+
+
+def readability(
+    max_grade: float | None = None,
+    min_ease: float | None = None,
+) -> ReadabilityValidator:
+    """Create a readability validator.
+
+    Args:
+        max_grade: Maximum Flesch-Kincaid grade level allowed.
+        min_ease: Minimum Flesch Reading Ease score required.
+
+    Returns:
+        ReadabilityValidator instance.
+    """
+    return ReadabilityValidator(max_grade=max_grade, min_ease=min_ease)
+
+
+def contains(
+    patterns: list[str],
+    case_sensitive: bool = False,
+) -> ContainsValidator:
+    """Create a contains validator.
+
+    Args:
+        patterns: List of substrings or regex patterns that must be present.
+        case_sensitive: Whether matching is case-sensitive. Defaults to False.
+
+    Returns:
+        ContainsValidator instance.
+    """
+    return ContainsValidator(patterns=patterns, case_sensitive=case_sensitive)
+
+
+def excludes(
+    patterns: list[str],
+    case_sensitive: bool = False,
+) -> ExcludesValidator:
+    """Create an excludes validator.
+
+    Args:
+        patterns: List of substrings or regex patterns that must not be present.
+        case_sensitive: Whether matching is case-sensitive. Defaults to False.
+
+    Returns:
+        ExcludesValidator instance.
+    """
+    return ExcludesValidator(patterns=patterns, case_sensitive=case_sensitive)
+
+
+def all_of(checks: list[Check]) -> AllOf:
+    """Create an AllOf composite validator.
+
+    Args:
+        checks: List of checks that must all pass.
+
+    Returns:
+        AllOf instance.
+    """
+    return AllOf(checks=checks)
+
+
+def any_of(checks: list[Check]) -> AnyOf:
+    """Create an AnyOf composite validator.
+
+    Args:
+        checks: List of checks where at least one must pass.
+
+    Returns:
+        AnyOf instance.
+    """
+    return AnyOf(checks=checks)
+
+
+def semantic(
+    min_score: float,
+    model: str = "all-MiniLM-L6-v2",
+    cache_embeddings: bool = True,
+) -> SemanticValidator:
+    """Create a semantic similarity validator.
+
+    Requires the `veritext[semantic]` extra to be installed.
+
+    Args:
+        min_score: Minimum semantic similarity required (0.0 to 1.0).
+        model: Name of the sentence-transformers model to use.
+        cache_embeddings: Whether to cache embeddings for repeated texts.
+
+    Returns:
+        SemanticValidator instance.
+    """
+    return SemanticValidator(
+        min_score=min_score, model=model, cache_embeddings=cache_embeddings
+    )
+
+
+__all__ = [
+    "AllOf",
+    "AnyOf",
+    "BleuValidator",
+    "Check",
+    "ContainsValidator",
+    "ExcludesValidator",
+    "LengthValidator",
+    "LexicalValidator",
+    "ReadabilityValidator",
+    "RougeValidator",
+    "SemanticValidator",
+    "all_of",
+    "any_of",
+    "bleu",
+    "contains",
+    "excludes",
+    "length",
+    "lexical",
+    "readability",
+    "rouge",
+    "semantic",
+]
@@ -0,0 +1,31 @@
+"""Base types and protocols for validation checks."""
+
+from typing import Protocol, runtime_checkable
+
+from veritext.core.types import CheckResult, ValidationContext
+
+
+@runtime_checkable
+class Check(Protocol):
+    """Protocol for validation checks.
+
+    A Check computes a score or property of text and compares it against
+    a threshold to produce a pass/fail result.
+    """
+
+    @property
+    def name(self) -> str:
+        """Return the name of this check."""
+        ...
+
+    def check(self, text: str, context: ValidationContext) -> CheckResult:
+        """Run the check and return a result.
+
+        Args:
+            text: The text to validate.
+            context: Validation context containing reference text and metadata.
+
+        Returns:
+            CheckResult with pass/fail status and diagnostics.
+        """
+        ...
@@ -0,0 +1,90 @@
+"""Composite validators for combining multiple checks."""
+
+from veritext.core.types import CheckResult, ValidationContext, ValidationResult
+from veritext.validators.base import Check
+
+
+class AllOf:
+    """Passes only if all checks pass."""
+
+    def __init__(self, checks: list[Check]) -> None:
+        """
+        Initialise the AllOf composite validator.
+
+        Args:
+            checks: List of checks that must all pass.
+
+        Raises:
+            ValueError: If checks list is empty.
+        """
+        if not checks:
+            raise ValueError("checks list cannot be empty")
+
+        self._checks = checks
+
+    @property
+    def name(self) -> str:
+        """Return the name of this composite check."""
+        return "all_of"
+
+    def check(self, text: str, context: ValidationContext) -> ValidationResult:
+        """
+        Run all checks and return aggregate result.
+
+        Args:
+            text: The text to validate.
+            context: Validation context containing reference text and metadata.
+
+        Returns:
+            ValidationResult that passes only if all checks pass.
+        """
+        results: list[CheckResult] = []
+        for check in self._checks:
+            results.append(check.check(text, context))
+
+        all_passed = all(r.passed for r in results)
+
+        return ValidationResult(passed=all_passed, checks=results)
+
+
+class AnyOf:
+    """Passes if any check passes."""
+
+    def __init__(self, checks: list[Check]) -> None:
+        """
+        Initialise the AnyOf composite validator.
+
+        Args:
+            checks: List of checks where at least one must pass.
+
+        Raises:
+            ValueError: If checks list is empty.
+        """
+        if not checks:
+            raise ValueError("checks list cannot be empty")
+
+        self._checks = checks
+
+    @property
+    def name(self) -> str:
+        """Return the name of this composite check."""
+        return "any_of"
+
+    def check(self, text: str, context: ValidationContext) -> ValidationResult:
+        """
+        Run all checks and return aggregate result.
+
+        Args:
+            text: The text to validate.
+            context: Validation context containing reference text and metadata.
+
+        Returns:
+            ValidationResult that passes if any check passes.
+        """
+        results: list[CheckResult] = []
+        for check in self._checks:
+            results.append(check.check(text, context))
+
+        any_passed = any(r.passed for r in results)
+
+        return ValidationResult(passed=any_passed, checks=results)
@@ -0,0 +1,337 @@
+"""Constraint validators that do not require reference text."""
+
+import re
+
+from veritext.core.exceptions import InvalidThresholdError
+from veritext.core.tokenisation import WordTokeniser
+from veritext.core.types import CheckResult, ValidationContext
+from veritext.metrics.readability import Readability
+
+
+class LengthValidator:
+    """Validates text length constraints."""
+
+    def __init__(
+        self,
+        min_chars: int | None = None,
+        max_chars: int | None = None,
+        min_words: int | None = None,
+        max_words: int | None = None,
+        tokeniser: WordTokeniser | None = None,
+    ) -> None:
+        """
+        Initialise the length validator.
+
+        Args:
+            min_chars: Minimum character count (inclusive).
+            max_chars: Maximum character count (inclusive).
+            min_words: Minimum word count (inclusive).
+            max_words: Maximum word count (inclusive).
+            tokeniser: Tokeniser to use for word counting. Defaults to WordTokeniser().
+
+        Raises:
+            InvalidThresholdError: If no constraints provided or invalid values.
+        """
+        if all(v is None for v in (min_chars, max_chars, min_words, max_words)):
+            raise InvalidThresholdError("At least one length constraint must be set")
+
+        if min_chars is not None and min_chars < 0:
+            raise InvalidThresholdError(f"min_chars must be >= 0, got {min_chars}")
+        if max_chars is not None and max_chars < 0:
+            raise InvalidThresholdError(f"max_chars must be >= 0, got {max_chars}")
+        if min_words is not None and min_words < 0:
+            raise InvalidThresholdError(f"min_words must be >= 0, got {min_words}")
+        if max_words is not None and max_words < 0:
+            raise InvalidThresholdError(f"max_words must be >= 0, got {max_words}")
+
+        if min_chars is not None and max_chars is not None and min_chars > max_chars:
+            raise InvalidThresholdError(
+                f"min_chars ({min_chars}) cannot exceed max_chars ({max_chars})"
+            )
+        if min_words is not None and max_words is not None and min_words > max_words:
+            raise InvalidThresholdError(
+                f"min_words ({min_words}) cannot exceed max_words ({max_words})"
+            )
+
+        self._min_chars = min_chars
+        self._max_chars = max_chars
+        self._min_words = min_words
+        self._max_words = max_words
+        self._tokeniser = tokeniser or WordTokeniser()
+
+    @property
+    def name(self) -> str:
+        """Return the name of this check."""
+        return "length"
+
+    def check(self, text: str, context: ValidationContext) -> CheckResult:  # noqa: ARG002
+        """
+        Run the length check.
+
+        Args:
+            text: The text to validate.
+            context: Validation context (not used for length checks).
+
+        Returns:
+            CheckResult with pass/fail status.
+        """
+        char_count = len(text)
+        words = self._tokeniser.tokenise(text)
+        word_count = len(words)
+
+        failures = []
+
+        if self._min_chars is not None and char_count < self._min_chars:
+            failures.append(f"{char_count} chars < min {self._min_chars}")
+        if self._max_chars is not None and char_count > self._max_chars:
+            failures.append(f"{char_count} chars > max {self._max_chars}")
+        if self._min_words is not None and word_count < self._min_words:
+            failures.append(f"{word_count} words < min {self._min_words}")
+        if self._max_words is not None and word_count > self._max_words:
+            failures.append(f"{word_count} words > max {self._max_words}")
+
+        passed = len(failures) == 0
+
+        if passed:
+            message = f"Length check passed: {char_count} chars, {word_count} words"
+        else:
+            message = "Length check failed: " + "; ".join(failures)
+
+        actual = {"chars": char_count, "words": word_count}
+        threshold = {}
+        if self._min_chars is not None:
+            threshold["min_chars"] = self._min_chars
+        if self._max_chars is not None:
+            threshold["max_chars"] = self._max_chars
+        if self._min_words is not None:
+            threshold["min_words"] = self._min_words
+        if self._max_words is not None:
+            threshold["max_words"] = self._max_words
+
+        return CheckResult(
+            name=self.name,
+            passed=passed,
+            actual=actual,
+            threshold=threshold,
+            message=message,
+        )
+
+
+class ReadabilityValidator:
+    """Validates Flesch-Kincaid readability."""
+
+    def __init__(
+        self,
+        max_grade: float | None = None,
+        min_ease: float | None = None,
+    ) -> None:
+        """
+        Initialise the readability validator.
+
+        Args:
+            max_grade: Maximum Flesch-Kincaid grade level allowed.
+            min_ease: Minimum Flesch Reading Ease score required.
+
+        Raises:
+            InvalidThresholdError: If no constraints provided.
+        """
+        if max_grade is None and min_ease is None:
+            raise InvalidThresholdError(
+                "At least one of max_grade or min_ease must be provided"
+            )
+
+        self._max_grade = max_grade
+        self._min_ease = min_ease
+        self._metric = Readability()
+
+    @property
+    def name(self) -> str:
+        """Return the name of this check."""
+        return "readability"
+
+    def check(self, text: str, context: ValidationContext) -> CheckResult:  # noqa: ARG002
+        """
+        Run the readability check.
+
+        Args:
+            text: The text to validate.
+            context: Validation context (not used for readability checks).
+
+        Returns:
+            CheckResult with pass/fail status.
+        """
+        result = self._metric.score(text)
+
+        failures = []
+        if (
+            self._max_grade is not None
+            and result.flesch_kincaid_grade > self._max_grade
+        ):
+            failures.append(
+                f"grade level {result.flesch_kincaid_grade:.1f} "
+                f"> max {self._max_grade:.1f}"
+            )
+
+        if self._min_ease is not None and result.flesch_reading_ease < self._min_ease:
+            failures.append(
+                f"reading ease {result.flesch_reading_ease:.1f} "
+                f"< min {self._min_ease:.1f}"
+            )
+
+        passed = len(failures) == 0
+
+        if passed:
+            parts = []
+            if self._max_grade is not None:
+                parts.append(
+                    f"grade {result.flesch_kincaid_grade:.1f} <= {self._max_grade:.1f}"
+                )
+            if self._min_ease is not None:
+                parts.append(
+                    f"ease {result.flesch_reading_ease:.1f} >= {self._min_ease:.1f}"
+                )
+            message = "Readability: " + ", ".join(parts)
+        else:
+            message = "Readability: " + "; ".join(failures)
+
+        actual = {
+            "grade": result.flesch_kincaid_grade,
+            "ease": result.flesch_reading_ease,
+        }
+        threshold = {}
+        if self._max_grade is not None:
+            threshold["max_grade"] = self._max_grade
+        if self._min_ease is not None:
+            threshold["min_ease"] = self._min_ease
+
+        return CheckResult(
+            name=self.name,
+            passed=passed,
+            actual=actual,
+            threshold=threshold,
+            message=message,
+        )
+
+
+class ContainsValidator:
+    """Validates text contains required patterns."""
+
+    def __init__(
+        self,
+        patterns: list[str],
+        case_sensitive: bool = False,
+    ) -> None:
+        """
+        Initialise the contains validator.
+
+        Args:
+            patterns: List of substrings or regex patterns that must be present.
+            case_sensitive: Whether matching is case-sensitive. Defaults to False.
+
+        Raises:
+            InvalidThresholdError: If patterns list is empty.
+        """
+        if not patterns:
+            raise InvalidThresholdError("patterns list cannot be empty")
+
+        self._patterns = patterns
+        self._case_sensitive = case_sensitive
+        self._flags = 0 if case_sensitive else re.IGNORECASE
+
+    @property
+    def name(self) -> str:
+        """Return the name of this check."""
+        return "contains"
+
+    def check(self, text: str, context: ValidationContext) -> CheckResult:  # noqa: ARG002
+        """
+        Run the contains check.
+
+        Args:
+            text: The text to validate.
+            context: Validation context (not used for contains checks).
+
+        Returns:
+            CheckResult with pass/fail status.
+        """
+        missing = []
+        for pattern in self._patterns:
+            if not re.search(pattern, text, self._flags):
+                missing.append(pattern)
+
+        passed = len(missing) == 0
+
+        if passed:
+            message = f"Text contains all {len(self._patterns)} required pattern(s)"
+        else:
+            message = f"Text missing {len(missing)} pattern(s): {missing}"
+
+        return CheckResult(
+            name=self.name,
+            passed=passed,
+            actual={"found": len(self._patterns) - len(missing), "missing": missing},
+            threshold={"patterns": self._patterns},
+            message=message,
+        )
+
+
+class ExcludesValidator:
+    """Validates text excludes forbidden patterns."""
+
+    def __init__(
+        self,
+        patterns: list[str],
+        case_sensitive: bool = False,
+    ) -> None:
+        """
+        Initialise the excludes validator.
+
+        Args:
+            patterns: List of substrings or regex patterns that must not be present.
+            case_sensitive: Whether matching is case-sensitive. Defaults to False.
+
+        Raises:
+            InvalidThresholdError: If patterns list is empty.
+        """
+        if not patterns:
+            raise InvalidThresholdError("patterns list cannot be empty")
+
+        self._patterns = patterns
+        self._case_sensitive = case_sensitive
+        self._flags = 0 if case_sensitive else re.IGNORECASE
+
+    @property
+    def name(self) -> str:
+        """Return the name of this check."""
+        return "excludes"
+
+    def check(self, text: str, context: ValidationContext) -> CheckResult:  # noqa: ARG002
+        """
+        Run the excludes check.
+
+        Args:
+            text: The text to validate.
+            context: Validation context (not used for excludes checks).
+
+        Returns:
+            CheckResult with pass/fail status.
+        """
+        found = []
+        for pattern in self._patterns:
+            if re.search(pattern, text, self._flags):
+                found.append(pattern)
+
+        passed = len(found) == 0
+
+        if passed:
+            message = f"Text excludes all {len(self._patterns)} forbidden pattern(s)"
+        else:
+            message = f"Text contains {len(found)} forbidden pattern(s): {found}"
+
+        return CheckResult(
+            name=self.name,
+            passed=passed,
+            actual={"excluded": len(self._patterns) - len(found), "found": found},
+            threshold={"patterns": self._patterns},
+            message=message,
+        )
@@ -0,0 +1,370 @@
+"""Metric-based validators that require reference text."""
+
+from typing import Literal
+
+from veritext.core.exceptions import InvalidThresholdError, ValidationError
+from veritext.core.tokenisation import WordTokeniser
+from veritext.core.types import CheckResult, ValidationContext
+from veritext.metrics.bleu import Bleu
+from veritext.metrics.lexical import Lexical
+from veritext.metrics.rouge import Rouge
+
+
+class BleuValidator:
+    """Validates that BLEU score meets minimum threshold."""
+
+    def __init__(
+        self,
+        min_score: float,
+        variant: Literal[1, 2, 3, 4] = 4,
+        tokeniser: WordTokeniser | None = None,
+    ) -> None:
+        """
+        Initialise the BLEU validator.
+
+        Args:
+            min_score: Minimum BLEU score required (0.0 to 1.0).
+            variant: BLEU variant to use (1, 2, 3, or 4). Defaults to 4.
+            tokeniser: Tokeniser to use. Defaults to WordTokeniser().
+
+        Raises:
+            InvalidThresholdError: If min_score is not in range [0.0, 1.0].
+        """
+        if not 0.0 <= min_score <= 1.0:
+            raise InvalidThresholdError(
+                f"min_score must be between 0.0 and 1.0, got {min_score}"
+            )
+        if variant not in (1, 2, 3, 4):
+            raise InvalidThresholdError(f"variant must be 1, 2, 3, or 4, got {variant}")
+
+        self._min_score = min_score
+        self._variant = variant
+        self._metric = Bleu(tokeniser=tokeniser)
+
+    @property
+    def name(self) -> str:
+        """Return the name of this check."""
+        return f"bleu-{self._variant}"
+
+    def check(self, text: str, context: ValidationContext) -> CheckResult:
+        """
+        Run the BLEU check.
+
+        Args:
+            text: The text to validate.
+            context: Validation context containing reference text.
+
+        Returns:
+            CheckResult with pass/fail status.
+
+        Raises:
+            ValidationError: If reference text is missing from context.
+        """
+        if context.reference is None:
+            raise ValidationError(f"{self.name} requires reference text in context")
+
+        result = self._metric.score(text, context.reference)
+
+        # Select the appropriate BLEU variant
+        score_map = {
+            1: result.bleu1,
+            2: result.bleu2,
+            3: result.bleu3,
+            4: result.bleu4,
+        }
+        actual_score = score_map[self._variant]
+        passed = actual_score >= self._min_score
+
+        if passed:
+            message = (
+                f"BLEU-{self._variant} score {actual_score:.2f} "
+                f"meets minimum {self._min_score:.2f}"
+            )
+        else:
+            message = (
+                f"BLEU-{self._variant} score {actual_score:.2f} "
+                f"below minimum {self._min_score:.2f}"
+            )
+
+        return CheckResult(
+            name=self.name,
+            passed=passed,
+            actual=actual_score,
+            threshold=self._min_score,
+            message=message,
+        )
+
+
+class RougeValidator:
+    """Validates that ROUGE score meets minimum threshold."""
+
+    def __init__(
+        self,
+        min_score: float,
+        variant: Literal["1", "2", "l"] = "l",
+        tokeniser: WordTokeniser | None = None,
+    ) -> None:
+        """
+        Initialise the ROUGE validator.
+
+        Args:
+            min_score: Minimum ROUGE F-measure required (0.0 to 1.0).
+            variant: ROUGE variant ("1", "2", or "l"). Defaults to "l".
+            tokeniser: Tokeniser to use. Defaults to WordTokeniser().
+
+        Raises:
+            InvalidThresholdError: If min_score is not in range [0.0, 1.0].
+        """
+        if not 0.0 <= min_score <= 1.0:
+            raise InvalidThresholdError(
+                f"min_score must be between 0.0 and 1.0, got {min_score}"
+            )
+        if variant not in ("1", "2", "l"):
+            raise InvalidThresholdError(
+                f"variant must be '1', '2', or 'l', got '{variant}'"
+            )
+
+        self._min_score = min_score
+        self._variant = variant
+        self._metric = Rouge(tokeniser=tokeniser)
+
+    @property
+    def name(self) -> str:
+        """Return the name of this check."""
+        return f"rouge-{self._variant}"
+
+    def check(self, text: str, context: ValidationContext) -> CheckResult:
+        """
+        Run the ROUGE check.
+
+        Args:
+            text: The text to validate.
+            context: Validation context containing reference text.
+
+        Returns:
+            CheckResult with pass/fail status.
+
+        Raises:
+            ValidationError: If reference text is missing from context.
+        """
+        if context.reference is None:
+            raise ValidationError(f"{self.name} requires reference text in context")
+
+        result = self._metric.score(text, context.reference)
+
+        # Select the appropriate ROUGE variant (use F-measure)
+        score_map = {
+            "1": result.rouge1.fmeasure,
+            "2": result.rouge2.fmeasure,
+            "l": result.rouge_l.fmeasure,
+        }
+        actual_score = score_map[self._variant]
+        passed = actual_score >= self._min_score
+
+        if passed:
+            message = (
+                f"ROUGE-{self._variant.upper()} score {actual_score:.2f} "
+                f"meets minimum {self._min_score:.2f}"
+            )
+        else:
+            message = (
+                f"ROUGE-{self._variant.upper()} score {actual_score:.2f} "
+                f"below minimum {self._min_score:.2f}"
+            )
+
+        return CheckResult(
+            name=self.name,
+            passed=passed,
+            actual=actual_score,
+            threshold=self._min_score,
+            message=message,
+        )
+
+
+class LexicalValidator:
+    """Validates lexical similarity meets threshold."""
+
+    def __init__(
+        self,
+        min_jaccard: float | None = None,
+        min_overlap: float | None = None,
+        tokeniser: WordTokeniser | None = None,
+    ) -> None:
+        """
+        Initialise the lexical validator.
+
+        Args:
+            min_jaccard: Minimum Jaccard similarity required (0.0 to 1.0).
+            min_overlap: Minimum token overlap required (0.0 to 1.0).
+            tokeniser: Tokeniser to use. Defaults to WordTokeniser().
+
+        Raises:
+            InvalidThresholdError: If thresholds are invalid or none provided.
+        """
+        if min_jaccard is None and min_overlap is None:
+            raise InvalidThresholdError(
+                "At least one of min_jaccard or min_overlap must be provided"
+            )
+
+        if min_jaccard is not None and not 0.0 <= min_jaccard <= 1.0:
+            raise InvalidThresholdError(
+                f"min_jaccard must be between 0.0 and 1.0, got {min_jaccard}"
+            )
+
+        if min_overlap is not None and not 0.0 <= min_overlap <= 1.0:
+            raise InvalidThresholdError(
+                f"min_overlap must be between 0.0 and 1.0, got {min_overlap}"
+            )
+
+        self._min_jaccard = min_jaccard
+        self._min_overlap = min_overlap
+        self._metric = Lexical(tokeniser=tokeniser)
+
+    @property
+    def name(self) -> str:
+        """Return the name of this check."""
+        return "lexical"
+
+    def check(self, text: str, context: ValidationContext) -> CheckResult:
+        """
+        Run the lexical similarity check.
+
+        Args:
+            text: The text to validate.
+            context: Validation context containing reference text.
+
+        Returns:
+            CheckResult with pass/fail status.
+
+        Raises:
+            ValidationError: If reference text is missing from context.
+        """
+        if context.reference is None:
+            raise ValidationError(f"{self.name} requires reference text in context")
+
+        result = self._metric.score(text, context.reference)
+
+        # Check each threshold that was specified
+        failures = []
+        if self._min_jaccard is not None and result.jaccard < self._min_jaccard:
+            failures.append(
+                f"Jaccard {result.jaccard:.2f} below minimum {self._min_jaccard:.2f}"
+            )
+
+        if self._min_overlap is not None and result.token_overlap < self._min_overlap:
+            failures.append(
+                f"token overlap {result.token_overlap:.2f} "
+                f"below minimum {self._min_overlap:.2f}"
+            )
+
+        passed = len(failures) == 0
+
+        if passed:
+            parts = []
+            if self._min_jaccard is not None:
+                parts.append(f"Jaccard {result.jaccard:.2f} >= {self._min_jaccard:.2f}")
+            if self._min_overlap is not None:
+                parts.append(
+                    f"overlap {result.token_overlap:.2f} >= {self._min_overlap:.2f}"
+                )
+            message = "Lexical similarity: " + ", ".join(parts)
+        else:
+            message = "Lexical similarity: " + "; ".join(failures)
+
+        # Build actual value dict
+        actual = {"jaccard": result.jaccard, "token_overlap": result.token_overlap}
+        threshold = {}
+        if self._min_jaccard is not None:
+            threshold["min_jaccard"] = self._min_jaccard
+        if self._min_overlap is not None:
+            threshold["min_overlap"] = self._min_overlap
+
+        return CheckResult(
+            name=self.name,
+            passed=passed,
+            actual=actual,
+            threshold=threshold,
+            message=message,
+        )
+
+
+class SemanticValidator:
+    """Validates that semantic similarity meets minimum threshold.
+
+    Requires the `veritext[semantic]` extra to be installed.
+    """
+
+    def __init__(
+        self,
+        min_score: float,
+        model: str = "all-MiniLM-L6-v2",
+        cache_embeddings: bool = True,
+    ) -> None:
+        """
+        Initialise the semantic validator.
+
+        Args:
+            min_score: Minimum semantic similarity required (0.0 to 1.0).
+            model: Name of the sentence-transformers model to use.
+            cache_embeddings: Whether to cache embeddings for repeated texts.
+
+        Raises:
+            InvalidThresholdError: If min_score is not in range [0.0, 1.0].
+            DependencyError: If sentence-transformers is not installed.
+        """
+        if not 0.0 <= min_score <= 1.0:
+            raise InvalidThresholdError(
+                f"min_score must be between 0.0 and 1.0, got {min_score}"
+            )
+
+        self._min_score = min_score
+        # Lazy import to avoid loading PyTorch unless needed
+        from veritext.semantic.similarity import SemanticSimilarity
+
+        self._metric: SemanticSimilarity = SemanticSimilarity(
+            model=model, cache_embeddings=cache_embeddings
+        )
+
+    @property
+    def name(self) -> str:
+        """Return the name of this check."""
+        return "semantic"
+
+    def check(self, text: str, context: ValidationContext) -> CheckResult:
+        """
+        Run the semantic similarity check.
+
+        Args:
+            text: The text to validate.
+            context: Validation context containing reference text.
+
+        Returns:
+            CheckResult with pass/fail status.
+
+        Raises:
+            ValidationError: If reference text is missing from context.
+        """
+        if context.reference is None:
+            raise ValidationError(f"{self.name} requires reference text in context")
+
+        result = self._metric.score(text, context.reference)
+        passed = result.similarity >= self._min_score
+
+        if passed:
+            message = (
+                f"Semantic similarity {result.similarity:.2f} "
+                f"meets minimum {self._min_score:.2f}"
+            )
+        else:
+            message = (
+                f"Semantic similarity {result.similarity:.2f} "
+                f"below minimum {self._min_score:.2f}"
+            )
+
+        return CheckResult(
+            name=self.name,
+            passed=passed,
+            actual=result.similarity,
+            threshold=self._min_score,
+            message=message,
+        )
@@ -0,0 +1 @@
+"""Tests for semantic similarity module."""
@@ -0,0 +1,240 @@
+"""Tests for the semantic similarity metric."""
+
+import pytest
+
+# Skip all tests if sentence-transformers is not installed
+pytest.importorskip("sentence_transformers")
+
+from veritext.metrics.results import SemanticResult
+from veritext.semantic import SemanticSimilarity
+
+
+class TestSemanticSimilarity:
+    """Tests for the SemanticSimilarity metric class."""
+
+    @pytest.fixture
+    def semantic(self) -> SemanticSimilarity:
+        """Provide a SemanticSimilarity metric instance."""
+        return SemanticSimilarity()
+
+    def test_name(self, semantic: SemanticSimilarity) -> None:
+        """Test that name returns 'semantic'."""
+        assert semantic.name == "semantic"
+
+    def test_requires_reference(self, semantic: SemanticSimilarity) -> None:
+        """Test that semantic similarity requires reference text."""
+        assert semantic.requires_reference is True
+
+    def test_identical_texts(self, semantic: SemanticSimilarity) -> None:
+        """Test that identical texts produce high similarity."""
+        text = "The cat sat on the mat"
+        result = semantic.score(text, text)
+
+        # Identical texts should have very high similarity (close to 1.0)
+        assert result.similarity >= 0.99
+        assert result.model == "all-MiniLM-L6-v2"
+
+    def test_semantically_similar_texts(self, semantic: SemanticSimilarity) -> None:
+        """Test that semantically similar texts have high similarity."""
+        candidate = "The cat sat on the mat"
+        reference = "A feline rested on the rug"
+        result = semantic.score(candidate, reference)
+
+        # Similar meanings should have reasonable similarity
+        assert result.similarity > 0.3
+
+    def test_unrelated_texts(self, semantic: SemanticSimilarity) -> None:
+        """Test that unrelated texts have low similarity."""
+        candidate = "The quick brown fox"
+        reference = "Quantum physics describes particle behaviour"
+        result = semantic.score(candidate, reference)
+
+        # Unrelated texts should have low similarity
+        assert result.similarity < 0.5
+
+    def test_empty_candidate(self, semantic: SemanticSimilarity) -> None:
+        """Test that empty candidate returns zero similarity."""
+        result = semantic.score("", "The cat sat on the mat")
+        assert result.similarity == 0.0
+
+    def test_whitespace_only_candidate(self, semantic: SemanticSimilarity) -> None:
+        """Test that whitespace-only candidate returns zero similarity."""
+        result = semantic.score("   \t\n  ", "The cat sat on the mat")
+        assert result.similarity == 0.0
+
+    def test_none_reference_raises(self, semantic: SemanticSimilarity) -> None:
+        """Test that None reference raises ValueError."""
+        with pytest.raises(ValueError, match="requires reference"):
+            semantic.score("The cat sat", None)
+
+    def test_empty_reference_raises(self, semantic: SemanticSimilarity) -> None:
+        """Test that empty reference raises ValueError."""
+        with pytest.raises(ValueError, match="cannot be empty"):
+            semantic.score("The cat sat", "")
+
+    def test_whitespace_reference_raises(self, semantic: SemanticSimilarity) -> None:
+        """Test that whitespace-only reference raises ValueError."""
+        with pytest.raises(ValueError, match="cannot be empty"):
+            semantic.score("The cat sat", "   \t\n  ")
+
+    def test_multiple_references(self, semantic: SemanticSimilarity) -> None:
+        """Test semantic similarity with multiple references uses max."""
+        candidate = "The cat sat on the mat"
+        references = [
+            "A dog ran through the park",
+            "The cat sat on the mat",  # Exact match
+        ]
+        result = semantic.score(candidate, references)
+
+        # Should get high similarity due to exact match reference
+        assert result.similarity >= 0.99
+
+    def test_multiple_references_takes_max(self, semantic: SemanticSimilarity) -> None:
+        """Test that multiple references returns maximum similarity."""
+        candidate = "The cat sat on the mat"
+        references = [
+            "Quantum physics is complex",  # Low similarity
+            "A feline rested on the rug",  # Higher similarity
+        ]
+        result = semantic.score(candidate, references)
+
+        # Should use the higher similarity
+        assert result.similarity > 0.3
+
+    def test_result_score_property(self, semantic: SemanticSimilarity) -> None:
+        """Test that result.score returns similarity."""
+        result = semantic.score("The cat sat", "The cat sat")
+        assert result.score == result.similarity
+
+    def test_caching_behaviour(self) -> None:
+        """Test that caching works for repeated texts."""
+        semantic = SemanticSimilarity(cache_embeddings=True)
+
+        # Score same texts multiple times
+        text = "The cat sat on the mat"
+        result1 = semantic.score(text, text)
+        result2 = semantic.score(text, text)
+
+        # Results should be identical
+        assert result1.similarity == result2.similarity
+
+        # Clear cache and check again
+        semantic.clear_cache()
+        result3 = semantic.score(text, text)
+        assert result3.similarity == result1.similarity
+
+    def test_caching_disabled(self) -> None:
+        """Test that caching can be disabled."""
+        semantic = SemanticSimilarity(cache_embeddings=False)
+
+        text = "The cat sat on the mat"
+        result1 = semantic.score(text, text)
+        result2 = semantic.score(text, text)
+
+        # Results should still be identical (just not cached)
+        assert result1.similarity == result2.similarity
+
+        # Clear cache should not raise even when disabled
+        semantic.clear_cache()
+
+    def test_custom_model(self) -> None:
+        """Test that custom model name is recorded in result."""
+        # Use the same model but verify it's recorded correctly
+        semantic = SemanticSimilarity(model="all-MiniLM-L6-v2")
+        result = semantic.score("Test text", "Test text")
+        assert result.model == "all-MiniLM-L6-v2"
+
+
+class TestSemanticSimilarityBatch:
+    """Tests for semantic similarity batch scoring."""
+
+    @pytest.fixture
+    def semantic(self) -> SemanticSimilarity:
+        """Provide a SemanticSimilarity metric instance."""
+        return SemanticSimilarity()
+
+    def test_batch_score_basic(self, semantic: SemanticSimilarity) -> None:
+        """Test basic batch scoring."""
+        candidates = ["The cat sat on the mat", "A quick brown dog runs fast"]
+        references = ["The cat sat on the mat", "A quick brown dog runs fast"]
+        result = semantic.batch_score(candidates, references)
+
+        assert result.count == 2
+        assert len(result.results) == 2
+        # Identical texts should have very high similarity
+        assert all(r.similarity >= 0.99 for r in result.results)
+
+    def test_batch_score_statistics(self, semantic: SemanticSimilarity) -> None:
+        """Test that batch scoring computes statistics."""
+        candidates = ["The cat sat", "Quantum physics is complex"]
+        references = ["The cat sat", "The cat sat"]
+        result = semantic.batch_score(candidates, references)
+
+        # Check statistics are computed
+        assert "similarity" in result.stats
+
+        # Mean should be between min and max
+        stats = result.stats["similarity"]
+        assert stats.min <= stats.mean <= stats.max
+
+    def test_batch_score_percentiles(self, semantic: SemanticSimilarity) -> None:
+        """Test that batch scoring computes percentiles."""
+        candidates = ["a", "b", "c", "d", "e"]
+        references = ["a", "b", "c", "d", "e"]
+        result = semantic.batch_score(candidates, references)
+
+        stats = result.stats["similarity"]
+        assert 25 in stats.percentiles
+        assert 50 in stats.percentiles
+        assert 75 in stats.percentiles
+        assert 95 in stats.percentiles
+
+    def test_batch_score_none_references_raises(
+        self, semantic: SemanticSimilarity
+    ) -> None:
+        """Test that batch scoring raises for None references."""
+        with pytest.raises(ValueError, match="requires reference"):
+            semantic.batch_score(["text"], None)
+
+    def test_batch_score_length_mismatch_raises(
+        self, semantic: SemanticSimilarity
+    ) -> None:
+        """Test that batch scoring raises for mismatched lengths."""
+        with pytest.raises(ValueError, match="must match"):
+            semantic.batch_score(["a", "b"], ["a"])
+
+    def test_batch_score_with_multiple_references(
+        self, semantic: SemanticSimilarity
+    ) -> None:
+        """Test batch scoring with multiple references per candidate."""
+        candidates = [
+            "The cat sat on the mat",
+            "A quick brown dog runs fast",
+        ]
+        references = [
+            ["The cat sat on the mat", "A cat rests on floor"],
+            ["A quick brown dog runs fast", "Dogs run very quickly"],
+        ]
+        result = semantic.batch_score(candidates, references)
+
+        assert result.count == 2
+        # First pair has exact match
+        assert result.results[0].similarity >= 0.99
+        assert result.results[1].similarity >= 0.99
+
+
+class TestSemanticResult:
+    """Tests for SemanticResult type."""
+
+    def test_frozen(self) -> None:
+        """Test that SemanticResult is frozen."""
+        from pydantic import ValidationError
+
+        result = SemanticResult(similarity=0.85, model="test-model")
+        with pytest.raises(ValidationError):
+            result.similarity = 0.9  # type: ignore[misc]
+
+    def test_score_property(self) -> None:
+        """Test that score property returns similarity."""
+        result = SemanticResult(similarity=0.75, model="test-model")
+        assert result.score == 0.75
@@ -0,0 +1 @@
+"""Tests for the validators module."""
@@ -0,0 +1,198 @@
+"""Tests for composite validators."""
+
+import pytest
+
+from veritext.core.types import ValidationContext
+from veritext.validators import all_of, any_of, bleu, contains, excludes, length
+from veritext.validators.composite import AllOf, AnyOf
+
+
+class TestAllOf:
+    """Tests for AllOf composite validator."""
+
+    def test_all_of_passes_when_all_checks_pass(self) -> None:
+        """Test that AllOf passes when all checks pass."""
+        validator = AllOf(
+            checks=[
+                length(min_words=2),
+                contains(patterns=["hello"]),
+            ]
+        )
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is True
+        assert len(result.checks) == 2
+        assert all(c.passed for c in result.checks)
+
+    def test_all_of_fails_when_one_check_fails(self) -> None:
+        """Test that AllOf fails when any check fails."""
+        validator = AllOf(
+            checks=[
+                length(min_words=2),
+                contains(patterns=["goodbye"]),
+            ]
+        )
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is False
+        assert len(result.checks) == 2
+        assert len(result.failed_checks) == 1
+
+    def test_all_of_fails_when_all_checks_fail(self) -> None:
+        """Test that AllOf fails when all checks fail."""
+        validator = AllOf(
+            checks=[
+                length(min_words=10),
+                contains(patterns=["goodbye"]),
+            ]
+        )
+        context = ValidationContext()
+        result = validator.check("hello", context)
+
+        assert result.passed is False
+        assert len(result.failed_checks) == 2
+
+    def test_all_of_with_metric_validators(self) -> None:
+        """Test AllOf with metric-based validators."""
+        validator = AllOf(
+            checks=[
+                bleu(min_score=0.5),
+                length(min_words=3),
+            ]
+        )
+        context = ValidationContext(reference="the quick brown fox")
+        result = validator.check("the quick brown fox jumps", context)
+
+        assert result.passed is True
+        assert len(result.checks) == 2
+
+    def test_all_of_failure_summary(self) -> None:
+        """Test the failure summary property."""
+        validator = AllOf(
+            checks=[
+                length(min_words=10),
+                contains(patterns=["goodbye"]),
+            ]
+        )
+        context = ValidationContext()
+        result = validator.check("hello", context)
+
+        summary = result.failure_summary
+        assert "failed" in summary.lower()
+        assert "length" in summary
+        assert "contains" in summary
+
+    def test_all_of_raises_on_empty_checks(self) -> None:
+        """Test that empty checks list raises error."""
+        with pytest.raises(ValueError, match="cannot be empty"):
+            AllOf(checks=[])
+
+    def test_all_of_name_property(self) -> None:
+        """Test the name property."""
+        validator = AllOf(checks=[length(min_chars=1)])
+        assert validator.name == "all_of"
+
+    def test_all_of_factory_function(self) -> None:
+        """Test the all_of() factory function."""
+        validator = all_of(checks=[length(min_chars=1)])
+        assert isinstance(validator, AllOf)
+
+
+class TestAnyOf:
+    """Tests for AnyOf composite validator."""
+
+    def test_any_of_passes_when_any_check_passes(self) -> None:
+        """Test that AnyOf passes when any check passes."""
+        validator = AnyOf(
+            checks=[
+                length(min_words=10),  # Will fail
+                contains(patterns=["hello"]),  # Will pass
+            ]
+        )
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is True
+        assert len(result.checks) == 2
+        # At least one check passed
+        assert any(c.passed for c in result.checks)
+
+    def test_any_of_passes_when_all_checks_pass(self) -> None:
+        """Test that AnyOf passes when all checks pass."""
+        validator = AnyOf(
+            checks=[
+                length(min_words=2),
+                contains(patterns=["hello"]),
+            ]
+        )
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is True
+        assert all(c.passed for c in result.checks)
+
+    def test_any_of_fails_when_all_checks_fail(self) -> None:
+        """Test that AnyOf fails when all checks fail."""
+        validator = AnyOf(
+            checks=[
+                length(min_words=10),
+                contains(patterns=["goodbye"]),
+            ]
+        )
+        context = ValidationContext()
+        result = validator.check("hello", context)
+
+        assert result.passed is False
+        assert not any(c.passed for c in result.checks)
+
+    def test_any_of_with_metric_validators(self) -> None:
+        """Test AnyOf with metric-based validators."""
+        validator = AnyOf(
+            checks=[
+                bleu(min_score=0.9),  # Might fail
+                length(min_words=3),  # Should pass
+            ]
+        )
+        context = ValidationContext(reference="different text entirely")
+        result = validator.check("the quick brown fox jumps", context)
+
+        assert result.passed is True  # Length check passes
+
+    def test_any_of_with_excludes(self) -> None:
+        """Test AnyOf with excludes validator."""
+        validator = AnyOf(
+            checks=[
+                excludes(patterns=["error"]),
+                excludes(patterns=["warning"]),
+            ]
+        )
+        context = ValidationContext()
+
+        # Should pass - neither pattern found
+        result = validator.check("All is well", context)
+        assert result.passed is True
+
+        # Should pass - one pattern found, other not
+        result = validator.check("This is an error", context)
+        assert result.passed is True
+
+        # Should fail - both patterns found
+        result = validator.check("error and warning", context)
+        assert result.passed is False
+
+    def test_any_of_raises_on_empty_checks(self) -> None:
+        """Test that empty checks list raises error."""
+        with pytest.raises(ValueError, match="cannot be empty"):
+            AnyOf(checks=[])
+
+    def test_any_of_name_property(self) -> None:
+        """Test the name property."""
+        validator = AnyOf(checks=[length(min_chars=1)])
+        assert validator.name == "any_of"
+
+    def test_any_of_factory_function(self) -> None:
+        """Test the any_of() factory function."""
+        validator = any_of(checks=[length(min_chars=1)])
+        assert isinstance(validator, AnyOf)
@@ -0,0 +1,334 @@
+"""Tests for constraint validators."""
+
+import pytest
+
+from veritext.core.exceptions import InvalidThresholdError
+from veritext.core.types import ValidationContext
+from veritext.validators import contains, excludes, length, readability
+from veritext.validators.constraint import (
+    ContainsValidator,
+    ExcludesValidator,
+    LengthValidator,
+    ReadabilityValidator,
+)
+
+
+class TestLengthValidator:
+    """Tests for LengthValidator."""
+
+    def test_length_validator_min_chars_passes(self) -> None:
+        """Test that validator passes when char count meets minimum."""
+        validator = LengthValidator(min_chars=10)
+        context = ValidationContext()
+        result = validator.check("hello world!", context)
+
+        assert result.passed is True
+        assert result.name == "length"
+        assert result.actual["chars"] == 12
+
+    def test_length_validator_min_chars_fails(self) -> None:
+        """Test that validator fails when char count below minimum."""
+        validator = LengthValidator(min_chars=20)
+        context = ValidationContext()
+        result = validator.check("hello", context)
+
+        assert result.passed is False
+        assert "< min" in result.message
+
+    def test_length_validator_max_chars_passes(self) -> None:
+        """Test that validator passes when char count within maximum."""
+        validator = LengthValidator(max_chars=20)
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is True
+        assert result.actual["chars"] == 11
+
+    def test_length_validator_max_chars_fails(self) -> None:
+        """Test that validator fails when char count exceeds maximum."""
+        validator = LengthValidator(max_chars=5)
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is False
+        assert "> max" in result.message
+
+    def test_length_validator_min_words_passes(self) -> None:
+        """Test that validator passes when word count meets minimum."""
+        validator = LengthValidator(min_words=3)
+        context = ValidationContext()
+        result = validator.check("the quick brown fox", context)
+
+        assert result.passed is True
+        assert result.actual["words"] == 4
+
+    def test_length_validator_min_words_fails(self) -> None:
+        """Test that validator fails when word count below minimum."""
+        validator = LengthValidator(min_words=10)
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is False
+        assert "words < min" in result.message
+
+    def test_length_validator_max_words_passes(self) -> None:
+        """Test that validator passes when word count within maximum."""
+        validator = LengthValidator(max_words=5)
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is True
+
+    def test_length_validator_max_words_fails(self) -> None:
+        """Test that validator fails when word count exceeds maximum."""
+        validator = LengthValidator(max_words=2)
+        context = ValidationContext()
+        result = validator.check("the quick brown fox", context)
+
+        assert result.passed is False
+        assert "words > max" in result.message
+
+    def test_length_validator_combined_constraints(self) -> None:
+        """Test validator with multiple constraints."""
+        validator = LengthValidator(
+            min_chars=5, max_chars=50, min_words=2, max_words=10
+        )
+        context = ValidationContext()
+        result = validator.check("the quick brown fox", context)
+
+        assert result.passed is True
+        assert "min_chars" in result.threshold
+        assert "max_chars" in result.threshold
+        assert "min_words" in result.threshold
+        assert "max_words" in result.threshold
+
+    def test_length_validator_raises_when_no_constraints(self) -> None:
+        """Test that validator raises when no constraints provided."""
+        with pytest.raises(InvalidThresholdError, match="At least one"):
+            LengthValidator()
+
+    def test_length_validator_raises_on_negative_values(self) -> None:
+        """Test that negative constraint values raise error."""
+        with pytest.raises(InvalidThresholdError, match="min_chars must be >= 0"):
+            LengthValidator(min_chars=-1)
+
+        with pytest.raises(InvalidThresholdError, match="max_chars must be >= 0"):
+            LengthValidator(max_chars=-1)
+
+        with pytest.raises(InvalidThresholdError, match="min_words must be >= 0"):
+            LengthValidator(min_words=-1)
+
+        with pytest.raises(InvalidThresholdError, match="max_words must be >= 0"):
+            LengthValidator(max_words=-1)
+
+    def test_length_validator_raises_on_invalid_range(self) -> None:
+        """Test that min > max raises error."""
+        with pytest.raises(InvalidThresholdError, match="cannot exceed max_chars"):
+            LengthValidator(min_chars=100, max_chars=50)
+
+        with pytest.raises(InvalidThresholdError, match="cannot exceed max_words"):
+            LengthValidator(min_words=20, max_words=5)
+
+    def test_length_factory_function(self) -> None:
+        """Test the length() factory function."""
+        validator = length(min_chars=10, max_words=100)
+        assert isinstance(validator, LengthValidator)
+        assert validator.name == "length"
+
+
+class TestReadabilityValidator:
+    """Tests for ReadabilityValidator."""
+
+    def test_readability_validator_max_grade_passes(self) -> None:
+        """Test that validator passes when grade level within maximum."""
+        validator = ReadabilityValidator(max_grade=12.0)
+        context = ValidationContext()
+        # Simple text should have low grade level
+        result = validator.check("The cat sat on the mat. It was a nice day.", context)
+
+        assert result.passed is True
+        assert result.name == "readability"
+        assert "grade" in result.actual
+
+    def test_readability_validator_max_grade_fails(self) -> None:
+        """Test that validator fails when grade level exceeds maximum."""
+        validator = ReadabilityValidator(max_grade=1.0)
+        context = ValidationContext()
+        # Complex text
+        result = validator.check(
+            "The implementation of sophisticated methodologies necessitates "
+            "comprehensive analytical frameworks for systematic evaluation.",
+            context,
+        )
+
+        assert result.passed is False
+        assert "grade level" in result.message
+        assert "> max" in result.message
+
+    def test_readability_validator_min_ease_passes(self) -> None:
+        """Test that validator passes when reading ease meets minimum."""
+        validator = ReadabilityValidator(min_ease=30.0)
+        context = ValidationContext()
+        # Simple text should have high reading ease
+        result = validator.check("The cat sat. The dog ran. It was fun.", context)
+
+        assert result.passed is True
+        assert "ease" in result.actual
+
+    def test_readability_validator_min_ease_fails(self) -> None:
+        """Test that validator fails when reading ease below minimum."""
+        validator = ReadabilityValidator(min_ease=100.0)
+        context = ValidationContext()
+        result = validator.check(
+            "The implementation of sophisticated methodologies necessitates "
+            "comprehensive analytical frameworks.",
+            context,
+        )
+
+        assert result.passed is False
+        assert "reading ease" in result.message
+        assert "< min" in result.message
+
+    def test_readability_validator_combined_constraints(self) -> None:
+        """Test validator with both grade and ease constraints."""
+        validator = ReadabilityValidator(max_grade=12.0, min_ease=30.0)
+        context = ValidationContext()
+        result = validator.check("The cat sat on the mat.", context)
+
+        assert "max_grade" in result.threshold
+        assert "min_ease" in result.threshold
+
+    def test_readability_validator_raises_when_no_constraints(self) -> None:
+        """Test that validator raises when no constraints provided."""
+        with pytest.raises(InvalidThresholdError, match="At least one"):
+            ReadabilityValidator()
+
+    def test_readability_factory_function(self) -> None:
+        """Test the readability() factory function."""
+        validator = readability(max_grade=8.0, min_ease=60.0)
+        assert isinstance(validator, ReadabilityValidator)
+        assert validator.name == "readability"
+
+
+class TestContainsValidator:
+    """Tests for ContainsValidator."""
+
+    def test_contains_validator_passes_when_pattern_found(self) -> None:
+        """Test that validator passes when all patterns are found."""
+        validator = ContainsValidator(patterns=["hello", "world"])
+        context = ValidationContext()
+        result = validator.check("Hello World!", context)
+
+        assert result.passed is True
+        assert result.name == "contains"
+        assert result.actual["found"] == 2
+        assert result.actual["missing"] == []
+
+    def test_contains_validator_fails_when_pattern_missing(self) -> None:
+        """Test that validator fails when a pattern is missing."""
+        validator = ContainsValidator(patterns=["hello", "goodbye"])
+        context = ValidationContext()
+        result = validator.check("Hello World!", context)
+
+        assert result.passed is False
+        assert "goodbye" in result.actual["missing"]
+        assert "missing" in result.message
+
+    def test_contains_validator_case_insensitive_by_default(self) -> None:
+        """Test that matching is case-insensitive by default."""
+        validator = ContainsValidator(patterns=["HELLO"])
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is True
+
+    def test_contains_validator_case_sensitive(self) -> None:
+        """Test case-sensitive matching."""
+        validator = ContainsValidator(patterns=["HELLO"], case_sensitive=True)
+        context = ValidationContext()
+        result = validator.check("hello world", context)
+
+        assert result.passed is False
+
+    def test_contains_validator_regex_patterns(self) -> None:
+        """Test regex pattern matching."""
+        validator = ContainsValidator(patterns=[r"\d{3}-\d{4}"])
+        context = ValidationContext()
+        result = validator.check("Call 555-1234 for info", context)
+
+        assert result.passed is True
+
+    def test_contains_validator_raises_on_empty_patterns(self) -> None:
+        """Test that empty patterns list raises error."""
+        with pytest.raises(InvalidThresholdError, match="cannot be empty"):
+            ContainsValidator(patterns=[])
+
+    def test_contains_factory_function(self) -> None:
+        """Test the contains() factory function."""
+        validator = contains(patterns=["test"], case_sensitive=True)
+        assert isinstance(validator, ContainsValidator)
+        assert validator.name == "contains"
+
+
+class TestExcludesValidator:
+    """Tests for ExcludesValidator."""
+
+    def test_excludes_validator_passes_when_pattern_absent(self) -> None:
+        """Test that validator passes when all patterns are absent."""
+        validator = ExcludesValidator(patterns=["bad", "forbidden"])
+        context = ValidationContext()
+        result = validator.check("This is good text.", context)
+
+        assert result.passed is True
+        assert result.name == "excludes"
+        assert result.actual["found"] == []
+
+    def test_excludes_validator_fails_when_pattern_found(self) -> None:
+        """Test that validator fails when a forbidden pattern is found."""
+        validator = ExcludesValidator(patterns=["bad", "forbidden"])
+        context = ValidationContext()
+        result = validator.check("This is bad text.", context)
+
+        assert result.passed is False
+        assert "bad" in result.actual["found"]
+        assert "forbidden" in result.message
+
+    def test_excludes_validator_case_insensitive_by_default(self) -> None:
+        """Test that matching is case-insensitive by default."""
+        validator = ExcludesValidator(patterns=["BAD"])
+        context = ValidationContext()
+        result = validator.check("This is bad text.", context)
+
+        assert result.passed is False
+
+    def test_excludes_validator_case_sensitive(self) -> None:
+        """Test case-sensitive matching."""
+        validator = ExcludesValidator(patterns=["BAD"], case_sensitive=True)
+        context = ValidationContext()
+        result = validator.check("This is bad text.", context)
+
+        assert result.passed is True
+
+    def test_excludes_validator_regex_patterns(self) -> None:
+        """Test regex pattern matching."""
+        validator = ExcludesValidator(patterns=[r"\b\d{4}\b"])  # 4-digit numbers
+        context = ValidationContext()
+
+        # Should fail when pattern found
+        result = validator.check("PIN is 1234", context)
+        assert result.passed is False
+
+        # Should pass when pattern absent
+        result = validator.check("No numbers here", context)
+        assert result.passed is True
+
+    def test_excludes_validator_raises_on_empty_patterns(self) -> None:
+        """Test that empty patterns list raises error."""
+        with pytest.raises(InvalidThresholdError, match="cannot be empty"):
+            ExcludesValidator(patterns=[])
+
+    def test_excludes_factory_function(self) -> None:
+        """Test the excludes() factory function."""
+        validator = excludes(patterns=["test"], case_sensitive=True)
+        assert isinstance(validator, ExcludesValidator)
+        assert validator.name == "excludes"
@@ -0,0 +1,283 @@
+"""Tests for metric-based validators."""
+
+import pytest
+
+from veritext.core.exceptions import InvalidThresholdError, ValidationError
+from veritext.core.types import ValidationContext
+from veritext.validators import bleu, lexical, rouge
+from veritext.validators.metric import BleuValidator, LexicalValidator, RougeValidator
+
+
+class TestBleuValidator:
+    """Tests for BleuValidator."""
+
+    def test_bleu_validator_passes_when_score_meets_threshold(self) -> None:
+        """Test that validator passes when BLEU score meets threshold."""
+        validator = BleuValidator(min_score=0.5, variant=4)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("the cat sat on the mat", context)
+
+        assert result.passed is True
+        assert result.name == "bleu-4"
+        assert result.actual == 1.0  # Identical text
+        assert result.threshold == 0.5
+
+    def test_bleu_validator_fails_when_score_below_threshold(self) -> None:
+        """Test that validator fails when BLEU score is below threshold."""
+        validator = BleuValidator(min_score=0.9, variant=4)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("a dog ran through the park", context)
+
+        assert result.passed is False
+        assert result.name == "bleu-4"
+        assert result.actual < 0.9
+        assert "below minimum" in result.message
+
+    def test_bleu_validator_variant_selection(self) -> None:
+        """Test different BLEU variants."""
+        context = ValidationContext(reference="the quick brown fox jumps")
+
+        for variant in (1, 2, 3, 4):
+            validator = BleuValidator(min_score=0.0, variant=variant)  # type: ignore[arg-type]
+            result = validator.check("the quick brown fox", context)
+            assert result.name == f"bleu-{variant}"
+
+    def test_bleu_validator_raises_on_missing_reference(self) -> None:
+        """Test that validator raises when reference is missing."""
+        validator = BleuValidator(min_score=0.5)
+        context = ValidationContext()
+
+        with pytest.raises(ValidationError, match="requires reference text"):
+            validator.check("some text", context)
+
+    def test_bleu_validator_raises_on_invalid_min_score(self) -> None:
+        """Test that invalid min_score raises error."""
+        with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
+            BleuValidator(min_score=1.5)
+
+        with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
+            BleuValidator(min_score=-0.1)
+
+    def test_bleu_validator_raises_on_invalid_variant(self) -> None:
+        """Test that invalid variant raises error."""
+        with pytest.raises(InvalidThresholdError, match="variant must be"):
+            BleuValidator(min_score=0.5, variant=5)  # type: ignore[arg-type]
+
+    def test_bleu_factory_function(self) -> None:
+        """Test the bleu() factory function."""
+        validator = bleu(min_score=0.6, variant=2)
+        assert isinstance(validator, BleuValidator)
+        assert validator.name == "bleu-2"
+
+
+class TestRougeValidator:
+    """Tests for RougeValidator."""
+
+    def test_rouge_validator_passes_when_score_meets_threshold(self) -> None:
+        """Test that validator passes when ROUGE score meets threshold."""
+        validator = RougeValidator(min_score=0.5, variant="l")
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("the cat sat on the mat", context)
+
+        assert result.passed is True
+        assert result.name == "rouge-l"
+        assert result.actual == 1.0  # Identical text
+        assert result.threshold == 0.5
+
+    def test_rouge_validator_fails_when_score_below_threshold(self) -> None:
+        """Test that validator fails when ROUGE score is below threshold."""
+        validator = RougeValidator(min_score=0.9, variant="l")
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("a dog ran through the park", context)
+
+        assert result.passed is False
+        assert result.actual < 0.9
+        assert "below minimum" in result.message
+
+    def test_rouge_validator_variant_selection(self) -> None:
+        """Test different ROUGE variants."""
+        context = ValidationContext(reference="the quick brown fox jumps")
+
+        for variant in ("1", "2", "l"):
+            validator = RougeValidator(min_score=0.0, variant=variant)  # type: ignore[arg-type]
+            result = validator.check("the quick brown fox", context)
+            assert result.name == f"rouge-{variant}"
+
+    def test_rouge_validator_raises_on_missing_reference(self) -> None:
+        """Test that validator raises when reference is missing."""
+        validator = RougeValidator(min_score=0.5)
+        context = ValidationContext()
+
+        with pytest.raises(ValidationError, match="requires reference text"):
+            validator.check("some text", context)
+
+    def test_rouge_validator_raises_on_invalid_min_score(self) -> None:
+        """Test that invalid min_score raises error."""
+        with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
+            RougeValidator(min_score=1.5)
+
+    def test_rouge_validator_raises_on_invalid_variant(self) -> None:
+        """Test that invalid variant raises error."""
+        with pytest.raises(InvalidThresholdError, match="variant must be"):
+            RougeValidator(min_score=0.5, variant="3")  # type: ignore[arg-type]
+
+    def test_rouge_factory_function(self) -> None:
+        """Test the rouge() factory function."""
+        validator = rouge(min_score=0.6, variant="2")
+        assert isinstance(validator, RougeValidator)
+        assert validator.name == "rouge-2"
+
+
+class TestLexicalValidator:
+    """Tests for LexicalValidator."""
+
+    def test_lexical_validator_passes_on_jaccard(self) -> None:
+        """Test that validator passes when Jaccard similarity meets threshold."""
+        validator = LexicalValidator(min_jaccard=0.5)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("the cat sat on the mat", context)
+
+        assert result.passed is True
+        assert result.name == "lexical"
+        assert result.actual["jaccard"] == 1.0
+
+    def test_lexical_validator_fails_on_jaccard(self) -> None:
+        """Test that validator fails when Jaccard is below threshold."""
+        validator = LexicalValidator(min_jaccard=0.9)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("a dog ran through the park", context)
+
+        assert result.passed is False
+        assert "Jaccard" in result.message
+        assert "below minimum" in result.message
+
+    def test_lexical_validator_passes_on_overlap(self) -> None:
+        """Test that validator passes when token overlap meets threshold."""
+        validator = LexicalValidator(min_overlap=0.5)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("the cat sat on the mat", context)
+
+        assert result.passed is True
+        assert result.actual["token_overlap"] == 1.0
+
+    def test_lexical_validator_fails_on_overlap(self) -> None:
+        """Test that validator fails when overlap is below threshold."""
+        validator = LexicalValidator(min_overlap=0.9)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("a dog ran through", context)
+
+        assert result.passed is False
+        assert "overlap" in result.message
+
+    def test_lexical_validator_with_both_thresholds(self) -> None:
+        """Test validator with both Jaccard and overlap thresholds."""
+        validator = LexicalValidator(min_jaccard=0.3, min_overlap=0.5)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("the cat sat", context)
+
+        # Should check both thresholds
+        assert "min_jaccard" in result.threshold
+        assert "min_overlap" in result.threshold
+
+    def test_lexical_validator_raises_when_no_threshold(self) -> None:
+        """Test that validator raises when no threshold is provided."""
+        with pytest.raises(InvalidThresholdError, match="At least one"):
+            LexicalValidator()
+
+    def test_lexical_validator_raises_on_invalid_jaccard(self) -> None:
+        """Test that invalid Jaccard threshold raises error."""
+        with pytest.raises(InvalidThresholdError, match="min_jaccard"):
+            LexicalValidator(min_jaccard=1.5)
+
+    def test_lexical_validator_raises_on_invalid_overlap(self) -> None:
+        """Test that invalid overlap threshold raises error."""
+        with pytest.raises(InvalidThresholdError, match="min_overlap"):
+            LexicalValidator(min_overlap=-0.1)
+
+    def test_lexical_validator_raises_on_missing_reference(self) -> None:
+        """Test that validator raises when reference is missing."""
+        validator = LexicalValidator(min_jaccard=0.5)
+        context = ValidationContext()
+
+        with pytest.raises(ValidationError, match="requires reference text"):
+            validator.check("some text", context)
+
+    def test_lexical_factory_function(self) -> None:
+        """Test the lexical() factory function."""
+        validator = lexical(min_jaccard=0.5, min_overlap=0.6)
+        assert isinstance(validator, LexicalValidator)
+        assert validator.name == "lexical"
+
+
+# SemanticValidator tests - conditionally run if sentence-transformers is installed
+class TestSemanticValidator:
+    """Tests for SemanticValidator."""
+
+    @staticmethod
+    def _skip_if_no_transformers() -> None:
+        """Skip test if sentence-transformers is not installed."""
+        pytest.importorskip("sentence_transformers")
+
+    def test_semantic_validator_passes_when_score_meets_threshold(self) -> None:
+        """Test that validator passes when semantic similarity meets threshold."""
+        self._skip_if_no_transformers()
+        from veritext.validators.metric import SemanticValidator
+
+        validator = SemanticValidator(min_score=0.5)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("the cat sat on the mat", context)
+
+        assert result.passed is True
+        assert result.name == "semantic"
+        assert result.actual >= 0.99  # Identical text
+        assert result.threshold == 0.5
+
+    def test_semantic_validator_fails_when_score_below_threshold(self) -> None:
+        """Test that validator fails when semantic similarity is below threshold."""
+        self._skip_if_no_transformers()
+        from veritext.validators.metric import SemanticValidator
+
+        validator = SemanticValidator(min_score=0.99)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check(
+            "quantum physics describes particle behaviour", context
+        )
+
+        assert result.passed is False
+        assert result.name == "semantic"
+        assert result.actual < 0.99
+        assert "below minimum" in result.message
+
+    def test_semantic_validator_raises_on_missing_reference(self) -> None:
+        """Test that validator raises when reference is missing."""
+        self._skip_if_no_transformers()
+        from veritext.validators.metric import SemanticValidator
+
+        validator = SemanticValidator(min_score=0.5)
+        context = ValidationContext()
+
+        with pytest.raises(ValidationError, match="requires reference text"):
+            validator.check("some text", context)
+
+    def test_semantic_validator_raises_on_invalid_min_score(self) -> None:
+        """Test that invalid min_score raises error without loading model."""
+        # This test doesn't need sentence-transformers since validation happens first
+        with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
+            from veritext.validators.metric import SemanticValidator
+
+            SemanticValidator(min_score=1.5)
+
+        with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
+            from veritext.validators.metric import SemanticValidator
+
+            SemanticValidator(min_score=-0.1)
+
+    def test_semantic_factory_function(self) -> None:
+        """Test the semantic() factory function."""
+        self._skip_if_no_transformers()
+        from veritext.validators import semantic
+        from veritext.validators.metric import SemanticValidator
+
+        validator = semantic(min_score=0.6)
+        assert isinstance(validator, SemanticValidator)
+        assert validator.name == "semantic"
Author	SHA1	Message	Date
kschappell	107fc4e275	docs(changelog): add semantic similarity entries	2026-02-03 17:31:14 +00:00
kschappell	571b770281	test(semantic): add semantic similarity tests	2026-02-03 17:31:07 +00:00
kschappell	8b3536873e	feat(validators): add SemanticValidator	2026-02-03 17:31:01 +00:00
kschappell	9a4ac359a3	feat(semantic): add SemanticSimilarity metric	2026-02-03 17:30:56 +00:00
kschappell	de5ad93524	feat(metrics): add SemanticResult type	2026-02-03 17:30:50 +00:00
kschappell	cab8099d06	docs(changelog): add validator entries Document validators module with Check protocol, metric validators, constraint validators, composite validators, and factory functions.	2026-02-03 17:14:37 +00:00
kschappell	e2be3daffd	test(validators): add validator tests Add comprehensive tests for metric validators, constraint validators, and composite validators covering pass/fail cases and error handling.	2026-02-03 17:14:32 +00:00
kschappell	9239300fd9	feat(validators): add factory functions and exports Export all validators and provide factory functions for clean API: bleu(), rouge(), lexical(), length(), readability(), contains(), excludes(), all_of(), any_of().	2026-02-03 17:14:26 +00:00
kschappell	b9f805b2f4	feat(validators): add composite validators Implement AllOf and AnyOf for combining multiple checks into composite validation rules.	2026-02-03 17:14:20 +00:00
kschappell	75cd7b68de	feat(validators): add constraint validators Implement LengthValidator, ReadabilityValidator, ContainsValidator, and ExcludesValidator for text constraints without reference text.	2026-02-03 17:14:14 +00:00
kschappell	b2b5eb1518	feat(validators): add metric-based validators Implement BleuValidator, RougeValidator, and LexicalValidator for validating text against reference using metric thresholds.	2026-02-03 17:14:09 +00:00
kschappell	9e7b0131b3	feat(validators): add Check protocol and base types Define the Check protocol for validation checks that compute a score and return pass/fail results with diagnostics.	2026-02-03 17:14:03 +00:00
				`@@ -0,0 +1 @@`
				`"""Tests for semantic similarity module."""`