"""Tests for the lexical similarity metric.""" import pytest from veritext.metrics import Lexical, LexicalResult class TestLexical: """Tests for the Lexical metric class.""" @pytest.fixture def lexical(self) -> Lexical: """Provide a lexical metric instance.""" return Lexical() def test_name(self, lexical: Lexical) -> None: """Test that name returns 'lexical'.""" assert lexical.name == "lexical" def test_requires_reference(self, lexical: Lexical) -> None: """Test that lexical requires reference text.""" assert lexical.requires_reference is True def test_identical_texts(self, lexical: Lexical) -> None: """Test that identical texts produce perfect scores.""" text = "The cat sat on the mat" result = lexical.score(text, text) assert result.jaccard == 1.0 assert result.token_overlap == 1.0 def test_no_overlap(self, lexical: Lexical) -> None: """Test that texts with no overlap produce zero scores.""" candidate = "apple banana cherry" reference = "dog elephant fox" result = lexical.score(candidate, reference) assert result.jaccard == 0.0 assert result.token_overlap == 0.0 def test_partial_overlap_jaccard(self, lexical: Lexical) -> None: """Test Jaccard with partial overlap.""" candidate = "the cat sat" reference = "the dog sat" result = lexical.score(candidate, reference) # Intersection: {the, sat}, Union: {the, cat, sat, dog} # Jaccard = 2/4 = 0.5 assert result.jaccard == 0.5 def test_partial_overlap_token_overlap(self, lexical: Lexical) -> None: """Test token overlap with partial overlap.""" candidate = "the cat sat" reference = "the dog sat" result = lexical.score(candidate, reference) # Candidate tokens: {the, cat, sat} # In reference: {the, sat} # Overlap = 2/3 assert abs(result.token_overlap - 2 / 3) < 1e-10 def test_candidate_subset_of_reference(self, lexical: Lexical) -> None: """Test when candidate is a subset of reference.""" candidate = "the cat" reference = "the cat sat on the mat" result = lexical.score(candidate, reference) # All candidate tokens are in reference assert result.token_overlap == 1.0 # But Jaccard is less than 1 due to extra tokens in reference assert result.jaccard < 1.0 def test_reference_subset_of_candidate(self, lexical: Lexical) -> None: """Test when reference is a subset of candidate.""" candidate = "the cat sat on the mat" reference = "the cat" result = lexical.score(candidate, reference) # Jaccard is less than 1 assert result.jaccard < 1.0 # Token overlap is less than 1 assert result.token_overlap < 1.0 def test_empty_candidate(self, lexical: Lexical) -> None: """Test that empty candidate returns zero scores.""" result = lexical.score("", "The cat sat") assert result.jaccard == 0.0 assert result.token_overlap == 0.0 def test_whitespace_only_candidate(self, lexical: Lexical) -> None: """Test that whitespace-only candidate returns zero scores.""" result = lexical.score(" \t\n ", "The cat sat") assert result.jaccard == 0.0 assert result.token_overlap == 0.0 def test_empty_reference_raises(self, lexical: Lexical) -> None: """Test that empty reference raises ValueError.""" with pytest.raises(ValueError, match="cannot be empty"): lexical.score("The cat sat", "") def test_none_reference_raises(self, lexical: Lexical) -> None: """Test that None reference raises ValueError.""" with pytest.raises(ValueError, match="requires reference"): lexical.score("The cat sat", None) def test_multiple_references_uses_first(self, lexical: Lexical) -> None: """Test that multiple references uses the first one.""" candidate = "the cat sat" references = ["the dog ran", "the cat sat"] # First differs result = lexical.score(candidate, references) # Should use first reference, not second assert result.jaccard < 1.0 def test_case_insensitivity(self, lexical: Lexical) -> None: """Test that lexical is case insensitive by default.""" result = lexical.score("THE CAT SAT", "the cat sat") assert result.jaccard == 1.0 assert result.token_overlap == 1.0 def test_punctuation_ignored(self, lexical: Lexical) -> None: """Test that punctuation is ignored by default.""" result = lexical.score("The cat sat.", "The cat sat!") assert result.jaccard == 1.0 assert result.token_overlap == 1.0 def test_repeated_tokens(self, lexical: Lexical) -> None: """Test handling of repeated tokens.""" candidate = "the the the" reference = "the cat" result = lexical.score(candidate, reference) # Sets: {the} and {the, cat} # Jaccard = 1/2 = 0.5 assert result.jaccard == 0.5 # Token overlap: {the} / {the} = 1.0 assert result.token_overlap == 1.0 class TestLexicalBatch: """Tests for lexical batch scoring.""" @pytest.fixture def lexical(self) -> Lexical: """Provide a lexical metric instance.""" return Lexical() def test_batch_score_basic(self, lexical: Lexical) -> None: """Test basic batch scoring.""" candidates = ["The cat sat", "A dog runs"] references = ["The cat sat", "A dog runs"] result = lexical.batch_score(candidates, references) assert result.count == 2 assert len(result.results) == 2 assert all(r.jaccard == 1.0 for r in result.results) def test_batch_score_statistics(self, lexical: Lexical) -> None: """Test that batch scoring computes statistics.""" candidates = ["The cat sat", "Completely different words"] references = ["The cat sat", "The cat sat"] result = lexical.batch_score(candidates, references) # Check statistics are computed assert "jaccard" in result.stats assert "token_overlap" in result.stats # First result should be 1.0, second should be 0.0 assert result.results[0].jaccard == 1.0 assert result.results[1].jaccard == 0.0 # Mean should be 0.5 assert result.stats["jaccard"].mean == 0.5 def test_batch_score_percentiles(self, lexical: Lexical) -> None: """Test that batch scoring computes percentiles.""" candidates = ["a", "b", "c", "d", "e"] references = ["a", "b", "c", "d", "e"] result = lexical.batch_score(candidates, references) stats = result.stats["jaccard"] assert 25 in stats.percentiles assert 50 in stats.percentiles assert 75 in stats.percentiles assert 95 in stats.percentiles def test_batch_score_none_references_raises(self, lexical: Lexical) -> None: """Test that batch scoring raises for None references.""" with pytest.raises(ValueError, match="requires reference"): lexical.batch_score(["text"], None) def test_batch_score_length_mismatch_raises(self, lexical: Lexical) -> None: """Test that batch scoring raises for mismatched lengths.""" with pytest.raises(ValueError, match="must match"): lexical.batch_score(["a", "b"], ["a"]) class TestLexicalResult: """Tests for LexicalResult type.""" def test_frozen(self) -> None: """Test that LexicalResult is frozen.""" from pydantic import ValidationError result = LexicalResult(jaccard=0.5, token_overlap=0.7) with pytest.raises(ValidationError): result.jaccard = 0.6 # type: ignore[misc] def test_values(self) -> None: """Test that values are stored correctly.""" result = LexicalResult(jaccard=0.5, token_overlap=0.7) assert result.jaccard == 0.5 assert result.token_overlap == 0.7