fix: QA review fixes for 0.1.0 release
- Fix README readability example property names - Add validation for empty references after tokenisation in ROUGE - Guard against zero sentence count in readability metric - Implement LRU cache with max size for semantic embeddings - Add .score property to LexicalResult for API consistency - Use defensive list copy in composite validators
This commit is contained in:
15
changelog.md
15
changelog.md
@@ -5,6 +5,21 @@ All notable changes to Veritext will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- Fixed README example using incorrect property names (`grade_level` → `flesch_kincaid_grade`, `reading_ease` → `flesch_reading_ease`)
|
||||||
|
- Fixed potential crash in ROUGE metric when all references are empty after tokenisation
|
||||||
|
- Fixed potential division by zero in readability metric when text has no sentence endings
|
||||||
|
- Fixed unbounded cache growth in `SemanticSimilarity` by implementing LRU eviction with configurable max size
|
||||||
|
- Fixed mutable list aliasing in `AllOf` and `AnyOf` composite validators
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- Added `.score` property to `LexicalResult` for API consistency with other result types
|
||||||
|
- Added `cache_max_size` parameter to `SemanticSimilarity` (default: 1000 embeddings)
|
||||||
|
|
||||||
## [0.1.0] — 2026-02-03
|
## [0.1.0] — 2026-02-03
|
||||||
|
|
||||||
Initial release of Veritext, a semantic text validation framework for Python.
|
Initial release of Veritext, a semantic text validation framework for Python.
|
||||||
|
|||||||
@@ -110,8 +110,8 @@ from veritext.metrics import Readability
|
|||||||
|
|
||||||
readability = Readability()
|
readability = Readability()
|
||||||
result = readability.score("This is a simple sentence.")
|
result = readability.score("This is a simple sentence.")
|
||||||
print(f"Grade level: {result.grade_level:.1f}")
|
print(f"Grade level: {result.flesch_kincaid_grade:.1f}")
|
||||||
print(f"Reading ease: {result.reading_ease:.1f}")
|
print(f"Reading ease: {result.flesch_reading_ease:.1f}")
|
||||||
```
|
```
|
||||||
|
|
||||||
### Semantic Similarity (Optional)
|
### Semantic Similarity (Optional)
|
||||||
|
|||||||
@@ -137,8 +137,8 @@ class Readability:
|
|||||||
flesch_reading_ease=0.0,
|
flesch_reading_ease=0.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Count sentences
|
# Count sentences (ensure at least 1 to avoid division by zero)
|
||||||
sentence_count = _count_sentences(candidate)
|
sentence_count = max(_count_sentences(candidate), 1)
|
||||||
|
|
||||||
# Count syllables
|
# Count syllables
|
||||||
syllable_count = sum(_count_syllables(word) for word in words)
|
syllable_count = sum(_count_syllables(word) for word in words)
|
||||||
|
|||||||
@@ -40,6 +40,11 @@ class LexicalResult(BaseModel):
|
|||||||
token_overlap: float
|
token_overlap: float
|
||||||
"""Proportion of candidate tokens found in reference."""
|
"""Proportion of candidate tokens found in reference."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
def score(self) -> float:
|
||||||
|
"""Return Jaccard similarity as the primary score."""
|
||||||
|
return self.jaccard
|
||||||
|
|
||||||
|
|
||||||
class RougeScore(BaseModel):
|
class RougeScore(BaseModel):
|
||||||
"""Individual ROUGE variant score with precision, recall, F-measure."""
|
"""Individual ROUGE variant score with precision, recall, F-measure."""
|
||||||
|
|||||||
@@ -209,6 +209,10 @@ class Rouge:
|
|||||||
rouge2_scores.append(_compute_rouge_score(candidate_tokens, ref_tokens, 2))
|
rouge2_scores.append(_compute_rouge_score(candidate_tokens, ref_tokens, 2))
|
||||||
rouge_l_scores.append(_compute_rouge_l(candidate_tokens, ref_tokens))
|
rouge_l_scores.append(_compute_rouge_l(candidate_tokens, ref_tokens))
|
||||||
|
|
||||||
|
# All references were empty after tokenisation
|
||||||
|
if not rouge1_scores:
|
||||||
|
raise ValueError("Reference text cannot be empty")
|
||||||
|
|
||||||
return RougeResult(
|
return RougeResult(
|
||||||
rouge1=_max_rouge_scores(rouge1_scores),
|
rouge1=_max_rouge_scores(rouge1_scores),
|
||||||
rouge2=_max_rouge_scores(rouge2_scores),
|
rouge2=_max_rouge_scores(rouge2_scores),
|
||||||
|
|||||||
@@ -1,11 +1,15 @@
|
|||||||
"""Embedding-based semantic similarity using sentence-transformers."""
|
"""Embedding-based semantic similarity using sentence-transformers."""
|
||||||
|
|
||||||
|
from collections import OrderedDict
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from veritext.core.exceptions import DependencyError
|
from veritext.core.exceptions import DependencyError
|
||||||
from veritext.metrics.base import AggregateStats, BatchResult
|
from veritext.metrics.base import AggregateStats, BatchResult
|
||||||
from veritext.metrics.results import SemanticResult
|
from veritext.metrics.results import SemanticResult
|
||||||
|
|
||||||
|
# Default maximum cache size (number of embeddings to store)
|
||||||
|
DEFAULT_CACHE_MAX_SIZE = 1000
|
||||||
|
|
||||||
|
|
||||||
class SemanticSimilarity:
|
class SemanticSimilarity:
|
||||||
"""
|
"""
|
||||||
@@ -21,6 +25,7 @@ class SemanticSimilarity:
|
|||||||
self,
|
self,
|
||||||
model: str = "all-MiniLM-L6-v2",
|
model: str = "all-MiniLM-L6-v2",
|
||||||
cache_embeddings: bool = True,
|
cache_embeddings: bool = True,
|
||||||
|
cache_max_size: int = DEFAULT_CACHE_MAX_SIZE,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Initialise the semantic similarity metric.
|
Initialise the semantic similarity metric.
|
||||||
@@ -30,6 +35,8 @@ class SemanticSimilarity:
|
|||||||
Defaults to "all-MiniLM-L6-v2" (22MB, good quality/size tradeoff).
|
Defaults to "all-MiniLM-L6-v2" (22MB, good quality/size tradeoff).
|
||||||
cache_embeddings: Whether to cache embeddings for repeated texts.
|
cache_embeddings: Whether to cache embeddings for repeated texts.
|
||||||
Defaults to True.
|
Defaults to True.
|
||||||
|
cache_max_size: Maximum number of embeddings to cache. Oldest entries
|
||||||
|
are evicted when the limit is reached. Defaults to 1000.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
DependencyError: If sentence-transformers is not installed.
|
DependencyError: If sentence-transformers is not installed.
|
||||||
@@ -44,7 +51,10 @@ class SemanticSimilarity:
|
|||||||
|
|
||||||
self._model_name = model
|
self._model_name = model
|
||||||
self._model: Any = SentenceTransformer(model)
|
self._model: Any = SentenceTransformer(model)
|
||||||
self._cache: dict[str, Any] | None = {} if cache_embeddings else None
|
self._cache: OrderedDict[str, Any] | None = (
|
||||||
|
OrderedDict() if cache_embeddings else None
|
||||||
|
)
|
||||||
|
self._cache_max_size = cache_max_size
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
@@ -58,7 +68,7 @@ class SemanticSimilarity:
|
|||||||
|
|
||||||
def _get_embedding(self, text: str) -> Any:
|
def _get_embedding(self, text: str) -> Any:
|
||||||
"""
|
"""
|
||||||
Get embedding for text, using cache if available.
|
Get embedding for text, using LRU cache if available.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: The text to embed.
|
text: The text to embed.
|
||||||
@@ -67,11 +77,16 @@ class SemanticSimilarity:
|
|||||||
The embedding tensor.
|
The embedding tensor.
|
||||||
"""
|
"""
|
||||||
if self._cache is not None and text in self._cache:
|
if self._cache is not None and text in self._cache:
|
||||||
|
# Move to end to mark as recently used
|
||||||
|
self._cache.move_to_end(text)
|
||||||
return self._cache[text]
|
return self._cache[text]
|
||||||
|
|
||||||
embedding = self._model.encode(text, convert_to_tensor=True)
|
embedding = self._model.encode(text, convert_to_tensor=True)
|
||||||
|
|
||||||
if self._cache is not None:
|
if self._cache is not None:
|
||||||
|
# Evict oldest entries if cache is full
|
||||||
|
while len(self._cache) >= self._cache_max_size:
|
||||||
|
self._cache.popitem(last=False)
|
||||||
self._cache[text] = embedding
|
self._cache[text] = embedding
|
||||||
|
|
||||||
return embedding
|
return embedding
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ class AllOf:
|
|||||||
if not checks:
|
if not checks:
|
||||||
raise ValueError("checks list cannot be empty")
|
raise ValueError("checks list cannot be empty")
|
||||||
|
|
||||||
self._checks = checks
|
self._checks = list(checks)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
@@ -63,7 +63,7 @@ class AnyOf:
|
|||||||
if not checks:
|
if not checks:
|
||||||
raise ValueError("checks list cannot be empty")
|
raise ValueError("checks list cannot be empty")
|
||||||
|
|
||||||
self._checks = checks
|
self._checks = list(checks)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
|
|||||||
Reference in New Issue
Block a user