cli tests

Add comprehensive test suite for validate command, benchmark commands,
input readers, and output formatters using Typer CliRunner.
This commit is contained in:
2025-05-11 14:13:30 +00:00
parent 5f619a626b
commit 8511594697
5 changed files with 775 additions and 0 deletions

View File

@@ -0,0 +1,214 @@
"""Tests for CLI validate command."""
import json
from pathlib import Path
from typer.testing import CliRunner
from veritext.cli.main import app
runner = CliRunner()
class TestValidateInline:
def test_validate_inline_basic(self) -> None:
result = runner.invoke(
app,
[
"validate",
"The quick brown fox jumps",
"-r",
"The quick brown fox jumps",
"-m",
"bleu",
],
)
assert result.exit_code == 0
assert "bleu4" in result.stdout
def test_validate_inline_with_rouge(self) -> None:
result = runner.invoke(
app,
[
"validate",
"hello world today",
"-r",
"hello world here",
"-m",
"rouge",
],
)
assert result.exit_code == 0
assert "rouge_l" in result.stdout
def test_validate_inline_with_lexical(self) -> None:
result = runner.invoke(
app,
[
"validate",
"hello world",
"-r",
"hello everyone",
"-m",
"lexical",
],
)
assert result.exit_code == 0
assert "jaccard" in result.stdout
assert "token_overlap" in result.stdout
def test_validate_inline_json_output(self) -> None:
result = runner.invoke(
app,
[
"validate",
"hello world today",
"-r",
"hello world today",
"-m",
"bleu",
"-o",
"json",
],
)
assert result.exit_code == 0
data = json.loads(result.stdout)
assert "bleu4" in data
def test_validate_inline_simple_output(self) -> None:
result = runner.invoke(
app,
[
"validate",
"hello world today",
"-r",
"hello world today",
"-m",
"rouge",
"-o",
"simple",
],
)
assert result.exit_code == 0
assert "rouge_l:" in result.stdout
def test_validate_inline_missing_reference(self) -> None:
result = runner.invoke(
app,
["validate", "hello world", "-m", "bleu"],
)
assert result.exit_code == 1
assert "Error" in result.stdout
def test_validate_inline_invalid_metric(self) -> None:
result = runner.invoke(
app,
["validate", "hello", "-r", "world", "-m", "invalid_metric"],
)
assert result.exit_code == 1
assert "Unknown metrics" in result.stdout
class TestValidateFile:
def test_validate_file_basic(self, tmp_path: Path) -> None:
data_file = tmp_path / "data.jsonl"
data_file.write_text(
'{"candidate": "hello world today", "reference": "hello world today"}\n'
'{"candidate": "foo bar baz", "reference": "foo bar baz"}'
)
result = runner.invoke(
app,
["validate", "-f", str(data_file), "-m", "bleu"],
)
assert result.exit_code == 0
assert "bleu4" in result.stdout
assert "Evaluated 2 text pairs" in result.stdout
def test_validate_file_not_found(self) -> None:
result = runner.invoke(
app,
["validate", "-f", "/nonexistent/file.jsonl", "-m", "bleu"],
)
assert result.exit_code == 1
assert "Error" in result.stdout
def test_validate_paired_files(self, tmp_path: Path) -> None:
candidates_file = tmp_path / "candidates.jsonl"
references_file = tmp_path / "references.jsonl"
candidates_file.write_text(
'{"text": "hello world today"}\n{"text": "foo bar baz"}'
)
references_file.write_text(
'{"text": "hello world today"}\n{"text": "foo bar baz"}'
)
result = runner.invoke(
app,
[
"validate",
"-f",
str(candidates_file),
"-R",
str(references_file),
"-m",
"bleu",
],
)
assert result.exit_code == 0
assert "Evaluated 2 text pairs" in result.stdout
class TestValidateOptions:
def test_validate_with_threshold(self) -> None:
result = runner.invoke(
app,
[
"validate",
"hello world today",
"-r",
"hello world today",
"-m",
"bleu",
"-t",
"0.5",
],
)
assert result.exit_code == 0
# Table output should include Status column
assert "Status" in result.stdout or "PASS" in result.stdout
def test_validate_invalid_output_format(self) -> None:
result = runner.invoke(
app,
[
"validate",
"hello",
"-r",
"world",
"-m",
"bleu",
"-o",
"invalid",
],
)
assert result.exit_code == 1
assert "Invalid output format" in result.stdout
def test_validate_multiple_metrics(self) -> None:
result = runner.invoke(
app,
[
"validate",
"The quick brown fox",
"-r",
"The quick brown fox",
"-m",
"bleu,rouge,lexical",
],
)
assert result.exit_code == 0
assert "bleu4" in result.stdout
assert "rouge_l" in result.stdout
assert "jaccard" in result.stdout