"""Tests for CLI output formatters.""" from datetime import UTC, datetime from veritext.benchmark.models import BenchmarkRun, RegressionReport from veritext.cli.formatters import ( format_benchmark_history, format_regression_report, format_validation_json, format_validation_simple, format_validation_table, ) class TestFormatValidationTable: def test_format_empty_results(self) -> None: table = format_validation_table({}) assert table.title == "Validation Results" assert table.row_count == 0 def test_format_single_metric(self) -> None: results = {"bleu4": 0.8523} table = format_validation_table(results) assert table.row_count == 1 def test_format_multiple_metrics(self) -> None: results = {"bleu4": 0.85, "rouge_l": 0.92, "jaccard": 0.75} table = format_validation_table(results) assert table.row_count == 3 def test_format_with_threshold(self) -> None: results = {"bleu4": 0.85, "rouge_l": 0.45} table = format_validation_table(results, threshold=0.5) # Should have 3 columns: Metric, Score, Status assert table.row_count == 2 class TestFormatValidationJson: def test_format_empty_results(self) -> None: result = format_validation_json({}) assert result == "{}" def test_format_results(self) -> None: results = {"bleu4": 0.85, "rouge_l": 0.92} result = format_validation_json(results) assert '"bleu4": 0.85' in result assert '"rouge_l": 0.92' in result class TestFormatValidationSimple: def test_format_empty_results(self) -> None: result = format_validation_simple({}) assert result == "" def test_format_results(self) -> None: results = {"bleu4": 0.8523, "rouge_l": 0.9234} result = format_validation_simple(results) assert "bleu4: 0.8523" in result assert "rouge_l: 0.9234" in result class TestFormatBenchmarkHistory: def test_format_empty_history(self) -> None: table = format_benchmark_history([]) assert table.title == "Benchmark History" def test_format_single_run(self) -> None: run = BenchmarkRun( id="test-id", benchmark_name="test", timestamp=datetime(2024, 1, 15, 10, 30, tzinfo=UTC), veritext_version="0.1.0", metrics={"rouge_l": 0.85, "bleu4": 0.72}, sample_count=100, ) table = format_benchmark_history([run]) assert table.row_count == 1 def test_format_multiple_runs(self) -> None: runs = [ BenchmarkRun( id=f"test-id-{i}", benchmark_name="test", timestamp=datetime(2024, 1, i + 1, 10, 30, tzinfo=UTC), veritext_version="0.1.0", metrics={"rouge_l": 0.8 + i * 0.01}, sample_count=100, ) for i in range(3) ] table = format_benchmark_history(runs) assert table.row_count == 3 class TestFormatRegressionReport: def test_format_no_regression(self) -> None: report = RegressionReport( detected=False, baseline={"rouge_l": 0.85}, current={"rouge_l": 0.86}, deltas={"rouge_l": 0.01}, tolerance=0.05, ) panel = format_regression_report(report) assert panel.title == "Regression Check" assert panel.border_style == "green" def test_format_with_regression(self) -> None: report = RegressionReport( detected=True, baseline={"rouge_l": 0.85, "bleu4": 0.72}, current={"rouge_l": 0.70, "bleu4": 0.70}, deltas={"rouge_l": -0.15, "bleu4": -0.02}, tolerance=0.05, ) panel = format_regression_report(report) assert panel.title == "Regression Check" assert panel.border_style == "red"