Add comprehensive test suite for validate command, benchmark commands, input readers, and output formatters using Typer CliRunner.
142 lines
4.8 KiB
Python
142 lines
4.8 KiB
Python
"""Tests for CLI output formatters."""
|
|
|
|
from datetime import UTC, datetime
|
|
|
|
from veritext.benchmark.models import BenchmarkRun, RegressionReport
|
|
from veritext.cli.formatters import (
|
|
format_benchmark_history,
|
|
format_regression_report,
|
|
format_validation_json,
|
|
format_validation_simple,
|
|
format_validation_table,
|
|
)
|
|
|
|
|
|
class TestFormatValidationTable:
|
|
"""Tests for format_validation_table function."""
|
|
|
|
def test_format_empty_results(self) -> None:
|
|
"""Test formatting empty results."""
|
|
table = format_validation_table({})
|
|
assert table.title == "Validation Results"
|
|
assert table.row_count == 0
|
|
|
|
def test_format_single_metric(self) -> None:
|
|
"""Test formatting a single metric."""
|
|
results = {"bleu4": 0.8523}
|
|
table = format_validation_table(results)
|
|
assert table.row_count == 1
|
|
|
|
def test_format_multiple_metrics(self) -> None:
|
|
"""Test formatting multiple metrics."""
|
|
results = {"bleu4": 0.85, "rouge_l": 0.92, "jaccard": 0.75}
|
|
table = format_validation_table(results)
|
|
assert table.row_count == 3
|
|
|
|
def test_format_with_threshold(self) -> None:
|
|
"""Test formatting with threshold for pass/fail."""
|
|
results = {"bleu4": 0.85, "rouge_l": 0.45}
|
|
table = format_validation_table(results, threshold=0.5)
|
|
# Should have 3 columns: Metric, Score, Status
|
|
assert table.row_count == 2
|
|
|
|
|
|
class TestFormatValidationJson:
|
|
"""Tests for format_validation_json function."""
|
|
|
|
def test_format_empty_results(self) -> None:
|
|
"""Test formatting empty results as JSON."""
|
|
result = format_validation_json({})
|
|
assert result == "{}"
|
|
|
|
def test_format_results(self) -> None:
|
|
"""Test formatting results as JSON."""
|
|
results = {"bleu4": 0.85, "rouge_l": 0.92}
|
|
result = format_validation_json(results)
|
|
assert '"bleu4": 0.85' in result
|
|
assert '"rouge_l": 0.92' in result
|
|
|
|
|
|
class TestFormatValidationSimple:
|
|
"""Tests for format_validation_simple function."""
|
|
|
|
def test_format_empty_results(self) -> None:
|
|
"""Test formatting empty results as simple text."""
|
|
result = format_validation_simple({})
|
|
assert result == ""
|
|
|
|
def test_format_results(self) -> None:
|
|
"""Test formatting results as simple text."""
|
|
results = {"bleu4": 0.8523, "rouge_l": 0.9234}
|
|
result = format_validation_simple(results)
|
|
assert "bleu4: 0.8523" in result
|
|
assert "rouge_l: 0.9234" in result
|
|
|
|
|
|
class TestFormatBenchmarkHistory:
|
|
"""Tests for format_benchmark_history function."""
|
|
|
|
def test_format_empty_history(self) -> None:
|
|
"""Test formatting empty benchmark history."""
|
|
table = format_benchmark_history([])
|
|
assert table.title == "Benchmark History"
|
|
|
|
def test_format_single_run(self) -> None:
|
|
"""Test formatting a single benchmark run."""
|
|
run = BenchmarkRun(
|
|
id="test-id",
|
|
benchmark_name="test",
|
|
timestamp=datetime(2024, 1, 15, 10, 30, tzinfo=UTC),
|
|
veritext_version="0.1.0",
|
|
metrics={"rouge_l": 0.85, "bleu4": 0.72},
|
|
sample_count=100,
|
|
)
|
|
table = format_benchmark_history([run])
|
|
assert table.row_count == 1
|
|
|
|
def test_format_multiple_runs(self) -> None:
|
|
"""Test formatting multiple benchmark runs."""
|
|
runs = [
|
|
BenchmarkRun(
|
|
id=f"test-id-{i}",
|
|
benchmark_name="test",
|
|
timestamp=datetime(2024, 1, i + 1, 10, 30, tzinfo=UTC),
|
|
veritext_version="0.1.0",
|
|
metrics={"rouge_l": 0.8 + i * 0.01},
|
|
sample_count=100,
|
|
)
|
|
for i in range(3)
|
|
]
|
|
table = format_benchmark_history(runs)
|
|
assert table.row_count == 3
|
|
|
|
|
|
class TestFormatRegressionReport:
|
|
"""Tests for format_regression_report function."""
|
|
|
|
def test_format_no_regression(self) -> None:
|
|
"""Test formatting report with no regression."""
|
|
report = RegressionReport(
|
|
detected=False,
|
|
baseline={"rouge_l": 0.85},
|
|
current={"rouge_l": 0.86},
|
|
deltas={"rouge_l": 0.01},
|
|
tolerance=0.05,
|
|
)
|
|
panel = format_regression_report(report)
|
|
assert panel.title == "Regression Check"
|
|
assert panel.border_style == "green"
|
|
|
|
def test_format_with_regression(self) -> None:
|
|
"""Test formatting report with regression detected."""
|
|
report = RegressionReport(
|
|
detected=True,
|
|
baseline={"rouge_l": 0.85, "bleu4": 0.72},
|
|
current={"rouge_l": 0.70, "bleu4": 0.70},
|
|
deltas={"rouge_l": -0.15, "bleu4": -0.02},
|
|
tolerance=0.05,
|
|
)
|
|
panel = format_regression_report(report)
|
|
assert panel.title == "Regression Check"
|
|
assert panel.border_style == "red"
|