feat(metrics): add ROUGE and readability result types

2026-02-03 17:03:14 +00:00
parent a4f5fa4cc6
commit 914c738013
1 changed files with 52 additions and 0 deletions
@@ -39,3 +39,55 @@ class LexicalResult(BaseModel):

    token_overlap: float
    """Proportion of candidate tokens found in reference."""
+
+
+class RougeScore(BaseModel):
+    """Individual ROUGE variant score with precision, recall, F-measure."""
+
+    model_config = ConfigDict(frozen=True)
+
+    precision: float
+    """Precision: overlap / candidate length."""
+
+    recall: float
+    """Recall: overlap / reference length."""
+
+    fmeasure: float
+    """F1-measure: harmonic mean of precision and recall."""
+
+
+class RougeResult(BaseModel):
+    """Result of ROUGE score computation."""
+
+    model_config = ConfigDict(frozen=True)
+
+    rouge1: RougeScore
+    """ROUGE-1 (unigram) score."""
+
+    rouge2: RougeScore
+    """ROUGE-2 (bigram) score."""
+
+    rouge_l: RougeScore
+    """ROUGE-L (longest common subsequence) score."""
+
+    @property
+    def score(self) -> float:
+        """Return ROUGE-L F-measure as the primary score."""
+        return self.rouge_l.fmeasure
+
+
+class ReadabilityResult(BaseModel):
+    """Result of readability computation."""
+
+    model_config = ConfigDict(frozen=True)
+
+    flesch_kincaid_grade: float
+    """US grade level (e.g., 8.0 = 8th grade reading level)."""
+
+    flesch_reading_ease: float
+    """Score 0-100, higher = easier to read."""
+
+    @property
+    def score(self) -> float:
+        """Return Flesch reading ease as the primary score."""
+        return self.flesch_reading_ease