diff --git a/tests/test_semantic/__init__.py b/tests/test_semantic/__init__.py
new file mode 100644
index 0000000..90cad00
--- /dev/null
+++ b/tests/test_semantic/__init__.py
@@ -0,0 +1 @@
+"""Tests for semantic similarity module."""
diff --git a/tests/test_semantic/test_similarity.py b/tests/test_semantic/test_similarity.py
new file mode 100644
index 0000000..fff2825
--- /dev/null
+++ b/tests/test_semantic/test_similarity.py
@@ -0,0 +1,240 @@
+"""Tests for the semantic similarity metric."""
+
+import pytest
+
+# Skip all tests if sentence-transformers is not installed
+pytest.importorskip("sentence_transformers")
+
+from veritext.metrics.results import SemanticResult
+from veritext.semantic import SemanticSimilarity
+
+
+class TestSemanticSimilarity:
+    """Tests for the SemanticSimilarity metric class."""
+
+    @pytest.fixture
+    def semantic(self) -> SemanticSimilarity:
+        """Provide a SemanticSimilarity metric instance."""
+        return SemanticSimilarity()
+
+    def test_name(self, semantic: SemanticSimilarity) -> None:
+        """Test that name returns 'semantic'."""
+        assert semantic.name == "semantic"
+
+    def test_requires_reference(self, semantic: SemanticSimilarity) -> None:
+        """Test that semantic similarity requires reference text."""
+        assert semantic.requires_reference is True
+
+    def test_identical_texts(self, semantic: SemanticSimilarity) -> None:
+        """Test that identical texts produce high similarity."""
+        text = "The cat sat on the mat"
+        result = semantic.score(text, text)
+
+        # Identical texts should have very high similarity (close to 1.0)
+        assert result.similarity >= 0.99
+        assert result.model == "all-MiniLM-L6-v2"
+
+    def test_semantically_similar_texts(self, semantic: SemanticSimilarity) -> None:
+        """Test that semantically similar texts have high similarity."""
+        candidate = "The cat sat on the mat"
+        reference = "A feline rested on the rug"
+        result = semantic.score(candidate, reference)
+
+        # Similar meanings should have reasonable similarity
+        assert result.similarity > 0.3
+
+    def test_unrelated_texts(self, semantic: SemanticSimilarity) -> None:
+        """Test that unrelated texts have low similarity."""
+        candidate = "The quick brown fox"
+        reference = "Quantum physics describes particle behaviour"
+        result = semantic.score(candidate, reference)
+
+        # Unrelated texts should have low similarity
+        assert result.similarity < 0.5
+
+    def test_empty_candidate(self, semantic: SemanticSimilarity) -> None:
+        """Test that empty candidate returns zero similarity."""
+        result = semantic.score("", "The cat sat on the mat")
+        assert result.similarity == 0.0
+
+    def test_whitespace_only_candidate(self, semantic: SemanticSimilarity) -> None:
+        """Test that whitespace-only candidate returns zero similarity."""
+        result = semantic.score("   \t\n  ", "The cat sat on the mat")
+        assert result.similarity == 0.0
+
+    def test_none_reference_raises(self, semantic: SemanticSimilarity) -> None:
+        """Test that None reference raises ValueError."""
+        with pytest.raises(ValueError, match="requires reference"):
+            semantic.score("The cat sat", None)
+
+    def test_empty_reference_raises(self, semantic: SemanticSimilarity) -> None:
+        """Test that empty reference raises ValueError."""
+        with pytest.raises(ValueError, match="cannot be empty"):
+            semantic.score("The cat sat", "")
+
+    def test_whitespace_reference_raises(self, semantic: SemanticSimilarity) -> None:
+        """Test that whitespace-only reference raises ValueError."""
+        with pytest.raises(ValueError, match="cannot be empty"):
+            semantic.score("The cat sat", "   \t\n  ")
+
+    def test_multiple_references(self, semantic: SemanticSimilarity) -> None:
+        """Test semantic similarity with multiple references uses max."""
+        candidate = "The cat sat on the mat"
+        references = [
+            "A dog ran through the park",
+            "The cat sat on the mat",  # Exact match
+        ]
+        result = semantic.score(candidate, references)
+
+        # Should get high similarity due to exact match reference
+        assert result.similarity >= 0.99
+
+    def test_multiple_references_takes_max(self, semantic: SemanticSimilarity) -> None:
+        """Test that multiple references returns maximum similarity."""
+        candidate = "The cat sat on the mat"
+        references = [
+            "Quantum physics is complex",  # Low similarity
+            "A feline rested on the rug",  # Higher similarity
+        ]
+        result = semantic.score(candidate, references)
+
+        # Should use the higher similarity
+        assert result.similarity > 0.3
+
+    def test_result_score_property(self, semantic: SemanticSimilarity) -> None:
+        """Test that result.score returns similarity."""
+        result = semantic.score("The cat sat", "The cat sat")
+        assert result.score == result.similarity
+
+    def test_caching_behaviour(self) -> None:
+        """Test that caching works for repeated texts."""
+        semantic = SemanticSimilarity(cache_embeddings=True)
+
+        # Score same texts multiple times
+        text = "The cat sat on the mat"
+        result1 = semantic.score(text, text)
+        result2 = semantic.score(text, text)
+
+        # Results should be identical
+        assert result1.similarity == result2.similarity
+
+        # Clear cache and check again
+        semantic.clear_cache()
+        result3 = semantic.score(text, text)
+        assert result3.similarity == result1.similarity
+
+    def test_caching_disabled(self) -> None:
+        """Test that caching can be disabled."""
+        semantic = SemanticSimilarity(cache_embeddings=False)
+
+        text = "The cat sat on the mat"
+        result1 = semantic.score(text, text)
+        result2 = semantic.score(text, text)
+
+        # Results should still be identical (just not cached)
+        assert result1.similarity == result2.similarity
+
+        # Clear cache should not raise even when disabled
+        semantic.clear_cache()
+
+    def test_custom_model(self) -> None:
+        """Test that custom model name is recorded in result."""
+        # Use the same model but verify it's recorded correctly
+        semantic = SemanticSimilarity(model="all-MiniLM-L6-v2")
+        result = semantic.score("Test text", "Test text")
+        assert result.model == "all-MiniLM-L6-v2"
+
+
+class TestSemanticSimilarityBatch:
+    """Tests for semantic similarity batch scoring."""
+
+    @pytest.fixture
+    def semantic(self) -> SemanticSimilarity:
+        """Provide a SemanticSimilarity metric instance."""
+        return SemanticSimilarity()
+
+    def test_batch_score_basic(self, semantic: SemanticSimilarity) -> None:
+        """Test basic batch scoring."""
+        candidates = ["The cat sat on the mat", "A quick brown dog runs fast"]
+        references = ["The cat sat on the mat", "A quick brown dog runs fast"]
+        result = semantic.batch_score(candidates, references)
+
+        assert result.count == 2
+        assert len(result.results) == 2
+        # Identical texts should have very high similarity
+        assert all(r.similarity >= 0.99 for r in result.results)
+
+    def test_batch_score_statistics(self, semantic: SemanticSimilarity) -> None:
+        """Test that batch scoring computes statistics."""
+        candidates = ["The cat sat", "Quantum physics is complex"]
+        references = ["The cat sat", "The cat sat"]
+        result = semantic.batch_score(candidates, references)
+
+        # Check statistics are computed
+        assert "similarity" in result.stats
+
+        # Mean should be between min and max
+        stats = result.stats["similarity"]
+        assert stats.min <= stats.mean <= stats.max
+
+    def test_batch_score_percentiles(self, semantic: SemanticSimilarity) -> None:
+        """Test that batch scoring computes percentiles."""
+        candidates = ["a", "b", "c", "d", "e"]
+        references = ["a", "b", "c", "d", "e"]
+        result = semantic.batch_score(candidates, references)
+
+        stats = result.stats["similarity"]
+        assert 25 in stats.percentiles
+        assert 50 in stats.percentiles
+        assert 75 in stats.percentiles
+        assert 95 in stats.percentiles
+
+    def test_batch_score_none_references_raises(
+        self, semantic: SemanticSimilarity
+    ) -> None:
+        """Test that batch scoring raises for None references."""
+        with pytest.raises(ValueError, match="requires reference"):
+            semantic.batch_score(["text"], None)
+
+    def test_batch_score_length_mismatch_raises(
+        self, semantic: SemanticSimilarity
+    ) -> None:
+        """Test that batch scoring raises for mismatched lengths."""
+        with pytest.raises(ValueError, match="must match"):
+            semantic.batch_score(["a", "b"], ["a"])
+
+    def test_batch_score_with_multiple_references(
+        self, semantic: SemanticSimilarity
+    ) -> None:
+        """Test batch scoring with multiple references per candidate."""
+        candidates = [
+            "The cat sat on the mat",
+            "A quick brown dog runs fast",
+        ]
+        references = [
+            ["The cat sat on the mat", "A cat rests on floor"],
+            ["A quick brown dog runs fast", "Dogs run very quickly"],
+        ]
+        result = semantic.batch_score(candidates, references)
+
+        assert result.count == 2
+        # First pair has exact match
+        assert result.results[0].similarity >= 0.99
+        assert result.results[1].similarity >= 0.99
+
+
+class TestSemanticResult:
+    """Tests for SemanticResult type."""
+
+    def test_frozen(self) -> None:
+        """Test that SemanticResult is frozen."""
+        from pydantic import ValidationError
+
+        result = SemanticResult(similarity=0.85, model="test-model")
+        with pytest.raises(ValidationError):
+            result.similarity = 0.9  # type: ignore[misc]
+
+    def test_score_property(self) -> None:
+        """Test that score property returns similarity."""
+        result = SemanticResult(similarity=0.75, model="test-model")
+        assert result.score == 0.75
diff --git a/tests/test_validators/test_metric.py b/tests/test_validators/test_metric.py
index 42b6eaa..664d2ec 100644
--- a/tests/test_validators/test_metric.py
+++ b/tests/test_validators/test_metric.py
@@ -207,3 +207,77 @@ class TestLexicalValidator:
         validator = lexical(min_jaccard=0.5, min_overlap=0.6)
         assert isinstance(validator, LexicalValidator)
         assert validator.name == "lexical"
+
+
+# SemanticValidator tests - conditionally run if sentence-transformers is installed
+class TestSemanticValidator:
+    """Tests for SemanticValidator."""
+
+    @staticmethod
+    def _skip_if_no_transformers() -> None:
+        """Skip test if sentence-transformers is not installed."""
+        pytest.importorskip("sentence_transformers")
+
+    def test_semantic_validator_passes_when_score_meets_threshold(self) -> None:
+        """Test that validator passes when semantic similarity meets threshold."""
+        self._skip_if_no_transformers()
+        from veritext.validators.metric import SemanticValidator
+
+        validator = SemanticValidator(min_score=0.5)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check("the cat sat on the mat", context)
+
+        assert result.passed is True
+        assert result.name == "semantic"
+        assert result.actual >= 0.99  # Identical text
+        assert result.threshold == 0.5
+
+    def test_semantic_validator_fails_when_score_below_threshold(self) -> None:
+        """Test that validator fails when semantic similarity is below threshold."""
+        self._skip_if_no_transformers()
+        from veritext.validators.metric import SemanticValidator
+
+        validator = SemanticValidator(min_score=0.99)
+        context = ValidationContext(reference="the cat sat on the mat")
+        result = validator.check(
+            "quantum physics describes particle behaviour", context
+        )
+
+        assert result.passed is False
+        assert result.name == "semantic"
+        assert result.actual < 0.99
+        assert "below minimum" in result.message
+
+    def test_semantic_validator_raises_on_missing_reference(self) -> None:
+        """Test that validator raises when reference is missing."""
+        self._skip_if_no_transformers()
+        from veritext.validators.metric import SemanticValidator
+
+        validator = SemanticValidator(min_score=0.5)
+        context = ValidationContext()
+
+        with pytest.raises(ValidationError, match="requires reference text"):
+            validator.check("some text", context)
+
+    def test_semantic_validator_raises_on_invalid_min_score(self) -> None:
+        """Test that invalid min_score raises error without loading model."""
+        # This test doesn't need sentence-transformers since validation happens first
+        with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
+            from veritext.validators.metric import SemanticValidator
+
+            SemanticValidator(min_score=1.5)
+
+        with pytest.raises(InvalidThresholdError, match=r"between 0\.0 and 1\.0"):
+            from veritext.validators.metric import SemanticValidator
+
+            SemanticValidator(min_score=-0.1)
+
+    def test_semantic_factory_function(self) -> None:
+        """Test the semantic() factory function."""
+        self._skip_if_no_transformers()
+        from veritext.validators import semantic
+        from veritext.validators.metric import SemanticValidator
+
+        validator = semantic(min_score=0.6)
+        assert isinstance(validator, SemanticValidator)
+        assert validator.name == "semantic"