cli tests

Add comprehensive test suite for validate command, benchmark commands,
input readers, and output formatters using Typer CliRunner.
This commit is contained in:
2025-05-11 14:13:30 +00:00
parent 5f619a626b
commit 8511594697
5 changed files with 775 additions and 0 deletions

View File

@@ -0,0 +1,118 @@
"""Tests for CLI output formatters."""
from datetime import UTC, datetime
from veritext.benchmark.models import BenchmarkRun, RegressionReport
from veritext.cli.formatters import (
format_benchmark_history,
format_regression_report,
format_validation_json,
format_validation_simple,
format_validation_table,
)
class TestFormatValidationTable:
def test_format_empty_results(self) -> None:
table = format_validation_table({})
assert table.title == "Validation Results"
assert table.row_count == 0
def test_format_single_metric(self) -> None:
results = {"bleu4": 0.8523}
table = format_validation_table(results)
assert table.row_count == 1
def test_format_multiple_metrics(self) -> None:
results = {"bleu4": 0.85, "rouge_l": 0.92, "jaccard": 0.75}
table = format_validation_table(results)
assert table.row_count == 3
def test_format_with_threshold(self) -> None:
results = {"bleu4": 0.85, "rouge_l": 0.45}
table = format_validation_table(results, threshold=0.5)
# Should have 3 columns: Metric, Score, Status
assert table.row_count == 2
class TestFormatValidationJson:
def test_format_empty_results(self) -> None:
result = format_validation_json({})
assert result == "{}"
def test_format_results(self) -> None:
results = {"bleu4": 0.85, "rouge_l": 0.92}
result = format_validation_json(results)
assert '"bleu4": 0.85' in result
assert '"rouge_l": 0.92' in result
class TestFormatValidationSimple:
def test_format_empty_results(self) -> None:
result = format_validation_simple({})
assert result == ""
def test_format_results(self) -> None:
results = {"bleu4": 0.8523, "rouge_l": 0.9234}
result = format_validation_simple(results)
assert "bleu4: 0.8523" in result
assert "rouge_l: 0.9234" in result
class TestFormatBenchmarkHistory:
def test_format_empty_history(self) -> None:
table = format_benchmark_history([])
assert table.title == "Benchmark History"
def test_format_single_run(self) -> None:
run = BenchmarkRun(
id="test-id",
benchmark_name="test",
timestamp=datetime(2024, 1, 15, 10, 30, tzinfo=UTC),
veritext_version="0.1.0",
metrics={"rouge_l": 0.85, "bleu4": 0.72},
sample_count=100,
)
table = format_benchmark_history([run])
assert table.row_count == 1
def test_format_multiple_runs(self) -> None:
runs = [
BenchmarkRun(
id=f"test-id-{i}",
benchmark_name="test",
timestamp=datetime(2024, 1, i + 1, 10, 30, tzinfo=UTC),
veritext_version="0.1.0",
metrics={"rouge_l": 0.8 + i * 0.01},
sample_count=100,
)
for i in range(3)
]
table = format_benchmark_history(runs)
assert table.row_count == 3
class TestFormatRegressionReport:
def test_format_no_regression(self) -> None:
report = RegressionReport(
detected=False,
baseline={"rouge_l": 0.85},
current={"rouge_l": 0.86},
deltas={"rouge_l": 0.01},
tolerance=0.05,
)
panel = format_regression_report(report)
assert panel.title == "Regression Check"
assert panel.border_style == "green"
def test_format_with_regression(self) -> None:
report = RegressionReport(
detected=True,
baseline={"rouge_l": 0.85, "bleu4": 0.72},
current={"rouge_l": 0.70, "bleu4": 0.70},
deltas={"rouge_l": -0.15, "bleu4": -0.02},
tolerance=0.05,
)
panel = format_regression_report(report)
assert panel.title == "Regression Check"
assert panel.border_style == "red"