feat(metrics): add ROUGE and readability result types
This commit is contained in:
@@ -39,3 +39,55 @@ class LexicalResult(BaseModel):
|
||||
|
||||
token_overlap: float
|
||||
"""Proportion of candidate tokens found in reference."""
|
||||
|
||||
|
||||
class RougeScore(BaseModel):
|
||||
"""Individual ROUGE variant score with precision, recall, F-measure."""
|
||||
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
precision: float
|
||||
"""Precision: overlap / candidate length."""
|
||||
|
||||
recall: float
|
||||
"""Recall: overlap / reference length."""
|
||||
|
||||
fmeasure: float
|
||||
"""F1-measure: harmonic mean of precision and recall."""
|
||||
|
||||
|
||||
class RougeResult(BaseModel):
|
||||
"""Result of ROUGE score computation."""
|
||||
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
rouge1: RougeScore
|
||||
"""ROUGE-1 (unigram) score."""
|
||||
|
||||
rouge2: RougeScore
|
||||
"""ROUGE-2 (bigram) score."""
|
||||
|
||||
rouge_l: RougeScore
|
||||
"""ROUGE-L (longest common subsequence) score."""
|
||||
|
||||
@property
|
||||
def score(self) -> float:
|
||||
"""Return ROUGE-L F-measure as the primary score."""
|
||||
return self.rouge_l.fmeasure
|
||||
|
||||
|
||||
class ReadabilityResult(BaseModel):
|
||||
"""Result of readability computation."""
|
||||
|
||||
model_config = ConfigDict(frozen=True)
|
||||
|
||||
flesch_kincaid_grade: float
|
||||
"""US grade level (e.g., 8.0 = 8th grade reading level)."""
|
||||
|
||||
flesch_reading_ease: float
|
||||
"""Score 0-100, higher = easier to read."""
|
||||
|
||||
@property
|
||||
def score(self) -> float:
|
||||
"""Return Flesch reading ease as the primary score."""
|
||||
return self.flesch_reading_ease
|
||||
|
||||
Reference in New Issue
Block a user