Add comprehensive test suite for validate command, benchmark commands, input readers, and output formatters using Typer CliRunner.
234 lines
6.7 KiB
Python
234 lines
6.7 KiB
Python
"""Tests for CLI validate command."""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
from typer.testing import CliRunner
|
|
|
|
from veritext.cli.main import app
|
|
|
|
runner = CliRunner()
|
|
|
|
|
|
class TestValidateInline:
|
|
"""Tests for inline validation mode."""
|
|
|
|
def test_validate_inline_basic(self) -> None:
|
|
"""Test basic inline validation."""
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"validate",
|
|
"The quick brown fox jumps",
|
|
"-r",
|
|
"The quick brown fox jumps",
|
|
"-m",
|
|
"bleu",
|
|
],
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "bleu4" in result.stdout
|
|
|
|
def test_validate_inline_with_rouge(self) -> None:
|
|
"""Test inline validation with ROUGE metric."""
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"validate",
|
|
"hello world today",
|
|
"-r",
|
|
"hello world here",
|
|
"-m",
|
|
"rouge",
|
|
],
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "rouge_l" in result.stdout
|
|
|
|
def test_validate_inline_with_lexical(self) -> None:
|
|
"""Test inline validation with lexical metric."""
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"validate",
|
|
"hello world",
|
|
"-r",
|
|
"hello everyone",
|
|
"-m",
|
|
"lexical",
|
|
],
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "jaccard" in result.stdout
|
|
assert "token_overlap" in result.stdout
|
|
|
|
def test_validate_inline_json_output(self) -> None:
|
|
"""Test inline validation with JSON output."""
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"validate",
|
|
"hello world today",
|
|
"-r",
|
|
"hello world today",
|
|
"-m",
|
|
"bleu",
|
|
"-o",
|
|
"json",
|
|
],
|
|
)
|
|
assert result.exit_code == 0
|
|
data = json.loads(result.stdout)
|
|
assert "bleu4" in data
|
|
|
|
def test_validate_inline_simple_output(self) -> None:
|
|
"""Test inline validation with simple output."""
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"validate",
|
|
"hello world today",
|
|
"-r",
|
|
"hello world today",
|
|
"-m",
|
|
"rouge",
|
|
"-o",
|
|
"simple",
|
|
],
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "rouge_l:" in result.stdout
|
|
|
|
def test_validate_inline_missing_reference(self) -> None:
|
|
"""Test inline validation without reference."""
|
|
result = runner.invoke(
|
|
app,
|
|
["validate", "hello world", "-m", "bleu"],
|
|
)
|
|
assert result.exit_code == 1
|
|
assert "Error" in result.stdout
|
|
|
|
def test_validate_inline_invalid_metric(self) -> None:
|
|
"""Test inline validation with invalid metric."""
|
|
result = runner.invoke(
|
|
app,
|
|
["validate", "hello", "-r", "world", "-m", "invalid_metric"],
|
|
)
|
|
assert result.exit_code == 1
|
|
assert "Unknown metrics" in result.stdout
|
|
|
|
|
|
class TestValidateFile:
|
|
"""Tests for file-based validation mode."""
|
|
|
|
def test_validate_file_basic(self, tmp_path: Path) -> None:
|
|
"""Test basic file-based validation."""
|
|
data_file = tmp_path / "data.jsonl"
|
|
data_file.write_text(
|
|
'{"candidate": "hello world today", "reference": "hello world today"}\n'
|
|
'{"candidate": "foo bar baz", "reference": "foo bar baz"}'
|
|
)
|
|
|
|
result = runner.invoke(
|
|
app,
|
|
["validate", "-f", str(data_file), "-m", "bleu"],
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "bleu4" in result.stdout
|
|
assert "Evaluated 2 text pairs" in result.stdout
|
|
|
|
def test_validate_file_not_found(self) -> None:
|
|
"""Test file-based validation with non-existent file."""
|
|
result = runner.invoke(
|
|
app,
|
|
["validate", "-f", "/nonexistent/file.jsonl", "-m", "bleu"],
|
|
)
|
|
assert result.exit_code == 1
|
|
assert "Error" in result.stdout
|
|
|
|
def test_validate_paired_files(self, tmp_path: Path) -> None:
|
|
"""Test validation with separate candidate and reference files."""
|
|
candidates_file = tmp_path / "candidates.jsonl"
|
|
references_file = tmp_path / "references.jsonl"
|
|
|
|
candidates_file.write_text(
|
|
'{"text": "hello world today"}\n{"text": "foo bar baz"}'
|
|
)
|
|
references_file.write_text(
|
|
'{"text": "hello world today"}\n{"text": "foo bar baz"}'
|
|
)
|
|
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"validate",
|
|
"-f",
|
|
str(candidates_file),
|
|
"-R",
|
|
str(references_file),
|
|
"-m",
|
|
"bleu",
|
|
],
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "Evaluated 2 text pairs" in result.stdout
|
|
|
|
|
|
class TestValidateOptions:
|
|
"""Tests for validate command options."""
|
|
|
|
def test_validate_with_threshold(self) -> None:
|
|
"""Test validation with threshold option."""
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"validate",
|
|
"hello world today",
|
|
"-r",
|
|
"hello world today",
|
|
"-m",
|
|
"bleu",
|
|
"-t",
|
|
"0.5",
|
|
],
|
|
)
|
|
assert result.exit_code == 0
|
|
# Table output should include Status column
|
|
assert "Status" in result.stdout or "PASS" in result.stdout
|
|
|
|
def test_validate_invalid_output_format(self) -> None:
|
|
"""Test validation with invalid output format."""
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"validate",
|
|
"hello",
|
|
"-r",
|
|
"world",
|
|
"-m",
|
|
"bleu",
|
|
"-o",
|
|
"invalid",
|
|
],
|
|
)
|
|
assert result.exit_code == 1
|
|
assert "Invalid output format" in result.stdout
|
|
|
|
def test_validate_multiple_metrics(self) -> None:
|
|
"""Test validation with multiple metrics."""
|
|
result = runner.invoke(
|
|
app,
|
|
[
|
|
"validate",
|
|
"The quick brown fox",
|
|
"-r",
|
|
"The quick brown fox",
|
|
"-m",
|
|
"bleu,rouge,lexical",
|
|
],
|
|
)
|
|
assert result.exit_code == 0
|
|
assert "bleu4" in result.stdout
|
|
assert "rouge_l" in result.stdout
|
|
assert "jaccard" in result.stdout
|