test(cli): add CLI tests

Add comprehensive test suite for validate command, benchmark commands, input readers, and output formatters using Typer CliRunner.
2026-02-03 18:22:31 +00:00
parent 0cadfd4d23
commit c54f8c3f6f
5 changed files with 857 additions and 0 deletions
--- a/tests/test_cli/test_formatters.py
+++ b/tests/test_cli/test_formatters.py
@@ -0,0 +1,141 @@
+"""Tests for CLI output formatters."""
+
+from datetime import UTC, datetime
+
+from veritext.benchmark.models import BenchmarkRun, RegressionReport
+from veritext.cli.formatters import (
+    format_benchmark_history,
+    format_regression_report,
+    format_validation_json,
+    format_validation_simple,
+    format_validation_table,
+)
+
+
+class TestFormatValidationTable:
+    """Tests for format_validation_table function."""
+
+    def test_format_empty_results(self) -> None:
+        """Test formatting empty results."""
+        table = format_validation_table({})
+        assert table.title == "Validation Results"
+        assert table.row_count == 0
+
+    def test_format_single_metric(self) -> None:
+        """Test formatting a single metric."""
+        results = {"bleu4": 0.8523}
+        table = format_validation_table(results)
+        assert table.row_count == 1
+
+    def test_format_multiple_metrics(self) -> None:
+        """Test formatting multiple metrics."""
+        results = {"bleu4": 0.85, "rouge_l": 0.92, "jaccard": 0.75}
+        table = format_validation_table(results)
+        assert table.row_count == 3
+
+    def test_format_with_threshold(self) -> None:
+        """Test formatting with threshold for pass/fail."""
+        results = {"bleu4": 0.85, "rouge_l": 0.45}
+        table = format_validation_table(results, threshold=0.5)
+        # Should have 3 columns: Metric, Score, Status
+        assert table.row_count == 2
+
+
+class TestFormatValidationJson:
+    """Tests for format_validation_json function."""
+
+    def test_format_empty_results(self) -> None:
+        """Test formatting empty results as JSON."""
+        result = format_validation_json({})
+        assert result == "{}"
+
+    def test_format_results(self) -> None:
+        """Test formatting results as JSON."""
+        results = {"bleu4": 0.85, "rouge_l": 0.92}
+        result = format_validation_json(results)
+        assert '"bleu4": 0.85' in result
+        assert '"rouge_l": 0.92' in result
+
+
+class TestFormatValidationSimple:
+    """Tests for format_validation_simple function."""
+
+    def test_format_empty_results(self) -> None:
+        """Test formatting empty results as simple text."""
+        result = format_validation_simple({})
+        assert result == ""
+
+    def test_format_results(self) -> None:
+        """Test formatting results as simple text."""
+        results = {"bleu4": 0.8523, "rouge_l": 0.9234}
+        result = format_validation_simple(results)
+        assert "bleu4: 0.8523" in result
+        assert "rouge_l: 0.9234" in result
+
+
+class TestFormatBenchmarkHistory:
+    """Tests for format_benchmark_history function."""
+
+    def test_format_empty_history(self) -> None:
+        """Test formatting empty benchmark history."""
+        table = format_benchmark_history([])
+        assert table.title == "Benchmark History"
+
+    def test_format_single_run(self) -> None:
+        """Test formatting a single benchmark run."""
+        run = BenchmarkRun(
+            id="test-id",
+            benchmark_name="test",
+            timestamp=datetime(2024, 1, 15, 10, 30, tzinfo=UTC),
+            veritext_version="0.1.0",
+            metrics={"rouge_l": 0.85, "bleu4": 0.72},
+            sample_count=100,
+        )
+        table = format_benchmark_history([run])
+        assert table.row_count == 1
+
+    def test_format_multiple_runs(self) -> None:
+        """Test formatting multiple benchmark runs."""
+        runs = [
+            BenchmarkRun(
+                id=f"test-id-{i}",
+                benchmark_name="test",
+                timestamp=datetime(2024, 1, i + 1, 10, 30, tzinfo=UTC),
+                veritext_version="0.1.0",
+                metrics={"rouge_l": 0.8 + i * 0.01},
+                sample_count=100,
+            )
+            for i in range(3)
+        ]
+        table = format_benchmark_history(runs)
+        assert table.row_count == 3
+
+
+class TestFormatRegressionReport:
+    """Tests for format_regression_report function."""
+
+    def test_format_no_regression(self) -> None:
+        """Test formatting report with no regression."""
+        report = RegressionReport(
+            detected=False,
+            baseline={"rouge_l": 0.85},
+            current={"rouge_l": 0.86},
+            deltas={"rouge_l": 0.01},
+            tolerance=0.05,
+        )
+        panel = format_regression_report(report)
+        assert panel.title == "Regression Check"
+        assert panel.border_style == "green"
+
+    def test_format_with_regression(self) -> None:
+        """Test formatting report with regression detected."""
+        report = RegressionReport(
+            detected=True,
+            baseline={"rouge_l": 0.85, "bleu4": 0.72},
+            current={"rouge_l": 0.70, "bleu4": 0.70},
+            deltas={"rouge_l": -0.15, "bleu4": -0.02},
+            tolerance=0.05,
+        )
+        panel = format_regression_report(report)
+        assert panel.title == "Regression Check"
+        assert panel.border_style == "red"