diff --git a/changelog.md b/changelog.md
index d1f87c9..aed3348 100644
--- a/changelog.md
+++ b/changelog.md
@@ -5,6 +5,21 @@ All notable changes to Veritext will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+### Fixed
+
+- Fixed README example using incorrect property names (`grade_level` → `flesch_kincaid_grade`, `reading_ease` → `flesch_reading_ease`)
+- Fixed potential crash in ROUGE metric when all references are empty after tokenisation
+- Fixed potential division by zero in readability metric when text has no sentence endings
+- Fixed unbounded cache growth in `SemanticSimilarity` by implementing LRU eviction with configurable max size
+- Fixed mutable list aliasing in `AllOf` and `AnyOf` composite validators
+
+### Added
+
+- Added `.score` property to `LexicalResult` for API consistency with other result types
+- Added `cache_max_size` parameter to `SemanticSimilarity` (default: 1000 embeddings)
+
 ## [0.1.0] — 2026-02-03
 
 Initial release of Veritext, a semantic text validation framework for Python.
diff --git a/readme.md b/readme.md
index ea7970c..a7ccb5c 100644
--- a/readme.md
+++ b/readme.md
@@ -110,8 +110,8 @@ from veritext.metrics import Readability
 
 readability = Readability()
 result = readability.score("This is a simple sentence.")
-print(f"Grade level: {result.grade_level:.1f}")
-print(f"Reading ease: {result.reading_ease:.1f}")
+print(f"Grade level: {result.flesch_kincaid_grade:.1f}")
+print(f"Reading ease: {result.flesch_reading_ease:.1f}")
 ```
 
 ### Semantic Similarity (Optional)
diff --git a/src/veritext/metrics/readability.py b/src/veritext/metrics/readability.py
index 30c6c0d..c406323 100644
--- a/src/veritext/metrics/readability.py
+++ b/src/veritext/metrics/readability.py
@@ -137,8 +137,8 @@ class Readability:
                 flesch_reading_ease=0.0,
             )
 
-        # Count sentences
-        sentence_count = _count_sentences(candidate)
+        # Count sentences (ensure at least 1 to avoid division by zero)
+        sentence_count = max(_count_sentences(candidate), 1)
 
         # Count syllables
         syllable_count = sum(_count_syllables(word) for word in words)
diff --git a/src/veritext/metrics/results.py b/src/veritext/metrics/results.py
index 53fcd21..99159c3 100644
--- a/src/veritext/metrics/results.py
+++ b/src/veritext/metrics/results.py
@@ -40,6 +40,11 @@ class LexicalResult(BaseModel):
     token_overlap: float
     """Proportion of candidate tokens found in reference."""
 
+    @property
+    def score(self) -> float:
+        """Return Jaccard similarity as the primary score."""
+        return self.jaccard
+
 
 class RougeScore(BaseModel):
     """Individual ROUGE variant score with precision, recall, F-measure."""
diff --git a/src/veritext/metrics/rouge.py b/src/veritext/metrics/rouge.py
index 248649f..e5d6dfa 100644
--- a/src/veritext/metrics/rouge.py
+++ b/src/veritext/metrics/rouge.py
@@ -209,6 +209,10 @@ class Rouge:
             rouge2_scores.append(_compute_rouge_score(candidate_tokens, ref_tokens, 2))
             rouge_l_scores.append(_compute_rouge_l(candidate_tokens, ref_tokens))
 
+        # All references were empty after tokenisation
+        if not rouge1_scores:
+            raise ValueError("Reference text cannot be empty")
+
         return RougeResult(
             rouge1=_max_rouge_scores(rouge1_scores),
             rouge2=_max_rouge_scores(rouge2_scores),
diff --git a/src/veritext/semantic/similarity.py b/src/veritext/semantic/similarity.py
index 5b1bc01..eab413c 100644
--- a/src/veritext/semantic/similarity.py
+++ b/src/veritext/semantic/similarity.py
@@ -1,11 +1,15 @@
 """Embedding-based semantic similarity using sentence-transformers."""
 
+from collections import OrderedDict
 from typing import Any
 
 from veritext.core.exceptions import DependencyError
 from veritext.metrics.base import AggregateStats, BatchResult
 from veritext.metrics.results import SemanticResult
 
+# Default maximum cache size (number of embeddings to store)
+DEFAULT_CACHE_MAX_SIZE = 1000
+
 
 class SemanticSimilarity:
     """
@@ -21,6 +25,7 @@ class SemanticSimilarity:
         self,
         model: str = "all-MiniLM-L6-v2",
         cache_embeddings: bool = True,
+        cache_max_size: int = DEFAULT_CACHE_MAX_SIZE,
     ) -> None:
         """
         Initialise the semantic similarity metric.
@@ -30,6 +35,8 @@ class SemanticSimilarity:
                    Defaults to "all-MiniLM-L6-v2" (22MB, good quality/size tradeoff).
             cache_embeddings: Whether to cache embeddings for repeated texts.
                               Defaults to True.
+            cache_max_size: Maximum number of embeddings to cache. Oldest entries
+                            are evicted when the limit is reached. Defaults to 1000.
 
         Raises:
             DependencyError: If sentence-transformers is not installed.
@@ -44,7 +51,10 @@ class SemanticSimilarity:
 
         self._model_name = model
         self._model: Any = SentenceTransformer(model)
-        self._cache: dict[str, Any] | None = {} if cache_embeddings else None
+        self._cache: OrderedDict[str, Any] | None = (
+            OrderedDict() if cache_embeddings else None
+        )
+        self._cache_max_size = cache_max_size
 
     @property
     def name(self) -> str:
@@ -58,7 +68,7 @@ class SemanticSimilarity:
 
     def _get_embedding(self, text: str) -> Any:
         """
-        Get embedding for text, using cache if available.
+        Get embedding for text, using LRU cache if available.
 
         Args:
             text: The text to embed.
@@ -67,11 +77,16 @@ class SemanticSimilarity:
             The embedding tensor.
         """
         if self._cache is not None and text in self._cache:
+            # Move to end to mark as recently used
+            self._cache.move_to_end(text)
             return self._cache[text]
 
         embedding = self._model.encode(text, convert_to_tensor=True)
 
         if self._cache is not None:
+            # Evict oldest entries if cache is full
+            while len(self._cache) >= self._cache_max_size:
+                self._cache.popitem(last=False)
             self._cache[text] = embedding
 
         return embedding
diff --git a/src/veritext/validators/composite.py b/src/veritext/validators/composite.py
index ab608ca..fa5349b 100644
--- a/src/veritext/validators/composite.py
+++ b/src/veritext/validators/composite.py
@@ -20,7 +20,7 @@ class AllOf:
         if not checks:
             raise ValueError("checks list cannot be empty")
 
-        self._checks = checks
+        self._checks = list(checks)
 
     @property
     def name(self) -> str:
@@ -63,7 +63,7 @@ class AnyOf:
         if not checks:
             raise ValueError("checks list cannot be empty")
 
-        self._checks = checks
+        self._checks = list(checks)
 
     @property
     def name(self) -> str: