cli tests

Add comprehensive test suite for validate command, benchmark commands, input readers, and output formatters using Typer CliRunner.
2025-05-11 14:13:30 +00:00
parent 5f619a626b
commit 8511594697
5 changed files with 775 additions and 0 deletions
--- a/tests/test_cli/test_validate.py
+++ b/tests/test_cli/test_validate.py
@@ -0,0 +1,214 @@
+"""Tests for CLI validate command."""
+
+import json
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from veritext.cli.main import app
+
+runner = CliRunner()
+
+
+class TestValidateInline:
+    def test_validate_inline_basic(self) -> None:
+        result = runner.invoke(
+            app,
+            [
+                "validate",
+                "The quick brown fox jumps",
+                "-r",
+                "The quick brown fox jumps",
+                "-m",
+                "bleu",
+            ],
+        )
+        assert result.exit_code == 0
+        assert "bleu4" in result.stdout
+
+    def test_validate_inline_with_rouge(self) -> None:
+        result = runner.invoke(
+            app,
+            [
+                "validate",
+                "hello world today",
+                "-r",
+                "hello world here",
+                "-m",
+                "rouge",
+            ],
+        )
+        assert result.exit_code == 0
+        assert "rouge_l" in result.stdout
+
+    def test_validate_inline_with_lexical(self) -> None:
+        result = runner.invoke(
+            app,
+            [
+                "validate",
+                "hello world",
+                "-r",
+                "hello everyone",
+                "-m",
+                "lexical",
+            ],
+        )
+        assert result.exit_code == 0
+        assert "jaccard" in result.stdout
+        assert "token_overlap" in result.stdout
+
+    def test_validate_inline_json_output(self) -> None:
+        result = runner.invoke(
+            app,
+            [
+                "validate",
+                "hello world today",
+                "-r",
+                "hello world today",
+                "-m",
+                "bleu",
+                "-o",
+                "json",
+            ],
+        )
+        assert result.exit_code == 0
+        data = json.loads(result.stdout)
+        assert "bleu4" in data
+
+    def test_validate_inline_simple_output(self) -> None:
+        result = runner.invoke(
+            app,
+            [
+                "validate",
+                "hello world today",
+                "-r",
+                "hello world today",
+                "-m",
+                "rouge",
+                "-o",
+                "simple",
+            ],
+        )
+        assert result.exit_code == 0
+        assert "rouge_l:" in result.stdout
+
+    def test_validate_inline_missing_reference(self) -> None:
+        result = runner.invoke(
+            app,
+            ["validate", "hello world", "-m", "bleu"],
+        )
+        assert result.exit_code == 1
+        assert "Error" in result.stdout
+
+    def test_validate_inline_invalid_metric(self) -> None:
+        result = runner.invoke(
+            app,
+            ["validate", "hello", "-r", "world", "-m", "invalid_metric"],
+        )
+        assert result.exit_code == 1
+        assert "Unknown metrics" in result.stdout
+
+
+class TestValidateFile:
+    def test_validate_file_basic(self, tmp_path: Path) -> None:
+        data_file = tmp_path / "data.jsonl"
+        data_file.write_text(
+            '{"candidate": "hello world today", "reference": "hello world today"}\n'
+            '{"candidate": "foo bar baz", "reference": "foo bar baz"}'
+        )
+
+        result = runner.invoke(
+            app,
+            ["validate", "-f", str(data_file), "-m", "bleu"],
+        )
+        assert result.exit_code == 0
+        assert "bleu4" in result.stdout
+        assert "Evaluated 2 text pairs" in result.stdout
+
+    def test_validate_file_not_found(self) -> None:
+        result = runner.invoke(
+            app,
+            ["validate", "-f", "/nonexistent/file.jsonl", "-m", "bleu"],
+        )
+        assert result.exit_code == 1
+        assert "Error" in result.stdout
+
+    def test_validate_paired_files(self, tmp_path: Path) -> None:
+        candidates_file = tmp_path / "candidates.jsonl"
+        references_file = tmp_path / "references.jsonl"
+
+        candidates_file.write_text(
+            '{"text": "hello world today"}\n{"text": "foo bar baz"}'
+        )
+        references_file.write_text(
+            '{"text": "hello world today"}\n{"text": "foo bar baz"}'
+        )
+
+        result = runner.invoke(
+            app,
+            [
+                "validate",
+                "-f",
+                str(candidates_file),
+                "-R",
+                str(references_file),
+                "-m",
+                "bleu",
+            ],
+        )
+        assert result.exit_code == 0
+        assert "Evaluated 2 text pairs" in result.stdout
+
+
+class TestValidateOptions:
+    def test_validate_with_threshold(self) -> None:
+        result = runner.invoke(
+            app,
+            [
+                "validate",
+                "hello world today",
+                "-r",
+                "hello world today",
+                "-m",
+                "bleu",
+                "-t",
+                "0.5",
+            ],
+        )
+        assert result.exit_code == 0
+        # Table output should include Status column
+        assert "Status" in result.stdout or "PASS" in result.stdout
+
+    def test_validate_invalid_output_format(self) -> None:
+        result = runner.invoke(
+            app,
+            [
+                "validate",
+                "hello",
+                "-r",
+                "world",
+                "-m",
+                "bleu",
+                "-o",
+                "invalid",
+            ],
+        )
+        assert result.exit_code == 1
+        assert "Invalid output format" in result.stdout
+
+    def test_validate_multiple_metrics(self) -> None:
+        result = runner.invoke(
+            app,
+            [
+                "validate",
+                "The quick brown fox",
+                "-r",
+                "The quick brown fox",
+                "-m",
+                "bleu,rouge,lexical",
+            ],
+        )
+        assert result.exit_code == 0
+        assert "bleu4" in result.stdout
+        assert "rouge_l" in result.stdout
+        assert "jaccard" in result.stdout