tests for ROUGE and readability
This commit is contained in:
237
tests/test_metrics/test_readability.py
Normal file
237
tests/test_metrics/test_readability.py
Normal file
@@ -0,0 +1,237 @@
|
||||
"""Tests for the readability metric."""
|
||||
|
||||
import pytest
|
||||
|
||||
from veritext.metrics import Readability, ReadabilityResult
|
||||
|
||||
|
||||
class TestReadability:
|
||||
@pytest.fixture
|
||||
def readability(self) -> Readability:
|
||||
return Readability()
|
||||
|
||||
def test_name(self, readability: Readability) -> None:
|
||||
assert readability.name == "readability"
|
||||
|
||||
def test_requires_reference(self, readability: Readability) -> None:
|
||||
assert readability.requires_reference is False
|
||||
|
||||
def test_simple_text(self, readability: Readability) -> None:
|
||||
# Simple children's text - short sentences, simple words
|
||||
text = "The cat sat. The dog ran. I see a bird."
|
||||
result = readability.score(text)
|
||||
|
||||
# Should have low grade level and high reading ease
|
||||
assert result.flesch_kincaid_grade < 5.0
|
||||
assert result.flesch_reading_ease > 80.0
|
||||
|
||||
def test_complex_text(self, readability: Readability) -> None:
|
||||
# Complex academic text - long sentences, polysyllabic words
|
||||
text = (
|
||||
"The implementation of sophisticated computational methodologies "
|
||||
"necessitates thorough understanding of algorithmic complexity "
|
||||
"and architectural considerations."
|
||||
)
|
||||
result = readability.score(text)
|
||||
|
||||
# Should have high grade level and low reading ease
|
||||
assert result.flesch_kincaid_grade > 12.0
|
||||
assert result.flesch_reading_ease < 30.0
|
||||
|
||||
def test_medium_text(self, readability: Readability) -> None:
|
||||
text = (
|
||||
"The weather today is quite pleasant. "
|
||||
"Many people are enjoying the sunshine in the park. "
|
||||
"Children play while parents watch nearby."
|
||||
)
|
||||
result = readability.score(text)
|
||||
|
||||
# Should be middle of the road
|
||||
assert 3.0 < result.flesch_kincaid_grade < 10.0
|
||||
assert 50.0 < result.flesch_reading_ease < 90.0
|
||||
|
||||
def test_single_sentence(self, readability: Readability) -> None:
|
||||
text = "The cat sat on the mat."
|
||||
result = readability.score(text)
|
||||
|
||||
# Should compute without error
|
||||
assert result.flesch_kincaid_grade is not None
|
||||
assert result.flesch_reading_ease is not None
|
||||
|
||||
def test_single_word(self, readability: Readability) -> None:
|
||||
text = "Cat"
|
||||
result = readability.score(text)
|
||||
|
||||
# Should handle single word (1 word, 1 sentence, 1 syllable)
|
||||
assert result.flesch_kincaid_grade is not None
|
||||
assert result.flesch_reading_ease is not None
|
||||
|
||||
def test_empty_text(self, readability: Readability) -> None:
|
||||
result = readability.score("")
|
||||
|
||||
assert result.flesch_kincaid_grade == 0.0
|
||||
assert result.flesch_reading_ease == 0.0
|
||||
|
||||
def test_whitespace_only(self, readability: Readability) -> None:
|
||||
result = readability.score(" \t\n ")
|
||||
|
||||
assert result.flesch_kincaid_grade == 0.0
|
||||
assert result.flesch_reading_ease == 0.0
|
||||
|
||||
def test_reference_ignored(self, readability: Readability) -> None:
|
||||
text = "The cat sat on the mat."
|
||||
|
||||
# Score with no reference
|
||||
result1 = readability.score(text)
|
||||
# Score with reference (should be ignored)
|
||||
result2 = readability.score(text, "Completely different text")
|
||||
# Score with list of references
|
||||
result3 = readability.score(text, ["ref1", "ref2"])
|
||||
|
||||
# All should produce identical results
|
||||
assert result1.flesch_kincaid_grade == result2.flesch_kincaid_grade
|
||||
assert result1.flesch_reading_ease == result2.flesch_reading_ease
|
||||
assert result1.flesch_kincaid_grade == result3.flesch_kincaid_grade
|
||||
|
||||
def test_punctuation_handling(self, readability: Readability) -> None:
|
||||
# Same words, different sentence structure
|
||||
text1 = "The cat sat on the mat" # 1 sentence
|
||||
text2 = "The cat sat. On the mat." # 2 sentences
|
||||
|
||||
result1 = readability.score(text1)
|
||||
result2 = readability.score(text2)
|
||||
|
||||
# Different sentence counts should affect scores
|
||||
assert result1.flesch_kincaid_grade != result2.flesch_kincaid_grade
|
||||
|
||||
def test_question_marks_count_sentences(self, readability: Readability) -> None:
|
||||
text = "What is this? It is a test."
|
||||
result = readability.score(text)
|
||||
|
||||
# Should count as 2 sentences
|
||||
# With 7 words total, words_per_sentence = 3.5
|
||||
assert result.flesch_kincaid_grade is not None
|
||||
|
||||
def test_exclamation_marks_count_sentences(self, readability: Readability) -> None:
|
||||
text = "Wow! That is amazing!"
|
||||
result = readability.score(text)
|
||||
|
||||
# Should count as 2 sentences
|
||||
assert result.flesch_kincaid_grade is not None
|
||||
|
||||
def test_multiple_punctuation(self, readability: Readability) -> None:
|
||||
text = "What?! That's crazy... Well then."
|
||||
result = readability.score(text)
|
||||
|
||||
# Should handle gracefully
|
||||
assert result.flesch_kincaid_grade is not None
|
||||
|
||||
def test_result_score_property(self, readability: Readability) -> None:
|
||||
result = readability.score("The cat sat on the mat.")
|
||||
assert result.score == result.flesch_reading_ease
|
||||
|
||||
def test_contractions(self, readability: Readability) -> None:
|
||||
text = "I'm going to the store. It's not far away."
|
||||
result = readability.score(text)
|
||||
|
||||
# Should handle contractions as words
|
||||
assert result.flesch_kincaid_grade is not None
|
||||
assert result.flesch_reading_ease is not None
|
||||
|
||||
|
||||
class TestReadabilityBatch:
|
||||
@pytest.fixture
|
||||
def readability(self) -> Readability:
|
||||
return Readability()
|
||||
|
||||
def test_batch_score_basic(self, readability: Readability) -> None:
|
||||
candidates = [
|
||||
"The cat sat on the mat.",
|
||||
"A dog ran through the park.",
|
||||
]
|
||||
result = readability.batch_score(candidates)
|
||||
|
||||
assert result.count == 2
|
||||
assert len(result.results) == 2
|
||||
|
||||
def test_batch_score_statistics(self, readability: Readability) -> None:
|
||||
candidates = [
|
||||
"Cat sat.", # Very simple
|
||||
"The implementation of sophisticated methodologies requires expertise.",
|
||||
]
|
||||
result = readability.batch_score(candidates)
|
||||
|
||||
# Check statistics are computed
|
||||
assert "flesch_kincaid_grade" in result.stats
|
||||
assert "flesch_reading_ease" in result.stats
|
||||
|
||||
# First should be easier than second
|
||||
assert (
|
||||
result.results[0].flesch_reading_ease
|
||||
> result.results[1].flesch_reading_ease
|
||||
)
|
||||
|
||||
def test_batch_score_percentiles(self, readability: Readability) -> None:
|
||||
candidates = ["a", "b", "c", "d", "e"]
|
||||
result = readability.batch_score(candidates)
|
||||
|
||||
stats = result.stats["flesch_reading_ease"]
|
||||
assert 25 in stats.percentiles
|
||||
assert 50 in stats.percentiles
|
||||
assert 75 in stats.percentiles
|
||||
assert 95 in stats.percentiles
|
||||
|
||||
def test_batch_score_references_ignored(self, readability: Readability) -> None:
|
||||
candidates = ["The cat sat.", "A dog ran."]
|
||||
|
||||
result1 = readability.batch_score(candidates)
|
||||
result2 = readability.batch_score(candidates, ["ref1", "ref2"])
|
||||
|
||||
# Results should be identical
|
||||
assert result1.results[0].flesch_kincaid_grade == (
|
||||
result2.results[0].flesch_kincaid_grade
|
||||
)
|
||||
|
||||
def test_batch_score_empty_list_raises(self, readability: Readability) -> None:
|
||||
with pytest.raises(ValueError, match="empty"):
|
||||
readability.batch_score([])
|
||||
|
||||
|
||||
class TestReadabilityResult:
|
||||
def test_frozen(self) -> None:
|
||||
from pydantic import ValidationError
|
||||
|
||||
result = ReadabilityResult(flesch_kincaid_grade=5.0, flesch_reading_ease=70.0)
|
||||
with pytest.raises(ValidationError):
|
||||
result.flesch_kincaid_grade = 6.0 # type: ignore[misc]
|
||||
|
||||
def test_values(self) -> None:
|
||||
result = ReadabilityResult(flesch_kincaid_grade=8.5, flesch_reading_ease=65.0)
|
||||
assert result.flesch_kincaid_grade == 8.5
|
||||
assert result.flesch_reading_ease == 65.0
|
||||
|
||||
def test_score_property(self) -> None:
|
||||
result = ReadabilityResult(flesch_kincaid_grade=8.5, flesch_reading_ease=65.0)
|
||||
assert result.score == 65.0
|
||||
|
||||
|
||||
class TestSyllableCounting:
|
||||
@pytest.fixture
|
||||
def readability(self) -> Readability:
|
||||
return Readability()
|
||||
|
||||
def test_monosyllabic_words(self, readability: Readability) -> None:
|
||||
# All one-syllable words
|
||||
text = "The cat sat on the mat."
|
||||
result = readability.score(text)
|
||||
|
||||
# Should be very easy to read
|
||||
assert result.flesch_reading_ease > 90.0
|
||||
|
||||
def test_polysyllabic_words(self, readability: Readability) -> None:
|
||||
# Words with multiple syllables
|
||||
text = "International communication facilitates understanding."
|
||||
result = readability.score(text)
|
||||
|
||||
# Should be harder to read
|
||||
assert result.flesch_reading_ease < 50.0
|
||||
Reference in New Issue
Block a user