cli tests
Add comprehensive test suite for validate command, benchmark commands, input readers, and output formatters using Typer CliRunner.
This commit is contained in:
214
tests/test_cli/test_validate.py
Normal file
214
tests/test_cli/test_validate.py
Normal file
@@ -0,0 +1,214 @@
|
||||
"""Tests for CLI validate command."""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from veritext.cli.main import app
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
|
||||
class TestValidateInline:
|
||||
def test_validate_inline_basic(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"validate",
|
||||
"The quick brown fox jumps",
|
||||
"-r",
|
||||
"The quick brown fox jumps",
|
||||
"-m",
|
||||
"bleu",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert "bleu4" in result.stdout
|
||||
|
||||
def test_validate_inline_with_rouge(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"validate",
|
||||
"hello world today",
|
||||
"-r",
|
||||
"hello world here",
|
||||
"-m",
|
||||
"rouge",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert "rouge_l" in result.stdout
|
||||
|
||||
def test_validate_inline_with_lexical(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"validate",
|
||||
"hello world",
|
||||
"-r",
|
||||
"hello everyone",
|
||||
"-m",
|
||||
"lexical",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert "jaccard" in result.stdout
|
||||
assert "token_overlap" in result.stdout
|
||||
|
||||
def test_validate_inline_json_output(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"validate",
|
||||
"hello world today",
|
||||
"-r",
|
||||
"hello world today",
|
||||
"-m",
|
||||
"bleu",
|
||||
"-o",
|
||||
"json",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
data = json.loads(result.stdout)
|
||||
assert "bleu4" in data
|
||||
|
||||
def test_validate_inline_simple_output(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"validate",
|
||||
"hello world today",
|
||||
"-r",
|
||||
"hello world today",
|
||||
"-m",
|
||||
"rouge",
|
||||
"-o",
|
||||
"simple",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert "rouge_l:" in result.stdout
|
||||
|
||||
def test_validate_inline_missing_reference(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
["validate", "hello world", "-m", "bleu"],
|
||||
)
|
||||
assert result.exit_code == 1
|
||||
assert "Error" in result.stdout
|
||||
|
||||
def test_validate_inline_invalid_metric(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
["validate", "hello", "-r", "world", "-m", "invalid_metric"],
|
||||
)
|
||||
assert result.exit_code == 1
|
||||
assert "Unknown metrics" in result.stdout
|
||||
|
||||
|
||||
class TestValidateFile:
|
||||
def test_validate_file_basic(self, tmp_path: Path) -> None:
|
||||
data_file = tmp_path / "data.jsonl"
|
||||
data_file.write_text(
|
||||
'{"candidate": "hello world today", "reference": "hello world today"}\n'
|
||||
'{"candidate": "foo bar baz", "reference": "foo bar baz"}'
|
||||
)
|
||||
|
||||
result = runner.invoke(
|
||||
app,
|
||||
["validate", "-f", str(data_file), "-m", "bleu"],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert "bleu4" in result.stdout
|
||||
assert "Evaluated 2 text pairs" in result.stdout
|
||||
|
||||
def test_validate_file_not_found(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
["validate", "-f", "/nonexistent/file.jsonl", "-m", "bleu"],
|
||||
)
|
||||
assert result.exit_code == 1
|
||||
assert "Error" in result.stdout
|
||||
|
||||
def test_validate_paired_files(self, tmp_path: Path) -> None:
|
||||
candidates_file = tmp_path / "candidates.jsonl"
|
||||
references_file = tmp_path / "references.jsonl"
|
||||
|
||||
candidates_file.write_text(
|
||||
'{"text": "hello world today"}\n{"text": "foo bar baz"}'
|
||||
)
|
||||
references_file.write_text(
|
||||
'{"text": "hello world today"}\n{"text": "foo bar baz"}'
|
||||
)
|
||||
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"validate",
|
||||
"-f",
|
||||
str(candidates_file),
|
||||
"-R",
|
||||
str(references_file),
|
||||
"-m",
|
||||
"bleu",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert "Evaluated 2 text pairs" in result.stdout
|
||||
|
||||
|
||||
class TestValidateOptions:
|
||||
def test_validate_with_threshold(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"validate",
|
||||
"hello world today",
|
||||
"-r",
|
||||
"hello world today",
|
||||
"-m",
|
||||
"bleu",
|
||||
"-t",
|
||||
"0.5",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
# Table output should include Status column
|
||||
assert "Status" in result.stdout or "PASS" in result.stdout
|
||||
|
||||
def test_validate_invalid_output_format(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"validate",
|
||||
"hello",
|
||||
"-r",
|
||||
"world",
|
||||
"-m",
|
||||
"bleu",
|
||||
"-o",
|
||||
"invalid",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 1
|
||||
assert "Invalid output format" in result.stdout
|
||||
|
||||
def test_validate_multiple_metrics(self) -> None:
|
||||
result = runner.invoke(
|
||||
app,
|
||||
[
|
||||
"validate",
|
||||
"The quick brown fox",
|
||||
"-r",
|
||||
"The quick brown fox",
|
||||
"-m",
|
||||
"bleu,rouge,lexical",
|
||||
],
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
assert "bleu4" in result.stdout
|
||||
assert "rouge_l" in result.stdout
|
||||
assert "jaccard" in result.stdout
|
||||
Reference in New Issue
Block a user