feat(metrics): add ROUGE and readability result types

This commit is contained in:
2026-02-03 17:03:14 +00:00
parent a4f5fa4cc6
commit 914c738013

View File

@@ -39,3 +39,55 @@ class LexicalResult(BaseModel):
token_overlap: float
"""Proportion of candidate tokens found in reference."""
class RougeScore(BaseModel):
"""Individual ROUGE variant score with precision, recall, F-measure."""
model_config = ConfigDict(frozen=True)
precision: float
"""Precision: overlap / candidate length."""
recall: float
"""Recall: overlap / reference length."""
fmeasure: float
"""F1-measure: harmonic mean of precision and recall."""
class RougeResult(BaseModel):
"""Result of ROUGE score computation."""
model_config = ConfigDict(frozen=True)
rouge1: RougeScore
"""ROUGE-1 (unigram) score."""
rouge2: RougeScore
"""ROUGE-2 (bigram) score."""
rouge_l: RougeScore
"""ROUGE-L (longest common subsequence) score."""
@property
def score(self) -> float:
"""Return ROUGE-L F-measure as the primary score."""
return self.rouge_l.fmeasure
class ReadabilityResult(BaseModel):
"""Result of readability computation."""
model_config = ConfigDict(frozen=True)
flesch_kincaid_grade: float
"""US grade level (e.g., 8.0 = 8th grade reading level)."""
flesch_reading_ease: float
"""Score 0-100, higher = easier to read."""
@property
def score(self) -> float:
"""Return Flesch reading ease as the primary score."""
return self.flesch_reading_ease