Add comprehensive test suite for validate command, benchmark commands, input readers, and output formatters using Typer CliRunner.
127 lines
4.6 KiB
Python
127 lines
4.6 KiB
Python
"""Tests for CLI input readers."""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from veritext.cli.readers import TextPair, read_jsonl, read_paired_jsonl
|
|
|
|
|
|
class TestTextPair:
|
|
def test_create_text_pair(self) -> None:
|
|
pair = TextPair(candidate="hello", reference="world")
|
|
assert pair.candidate == "hello"
|
|
assert pair.reference == "world"
|
|
|
|
|
|
class TestReadJsonl:
|
|
def test_read_valid_jsonl(self, tmp_path: Path) -> None:
|
|
data = [
|
|
{"candidate": "foo", "reference": "bar"},
|
|
{"candidate": "baz", "reference": "qux"},
|
|
]
|
|
jsonl_file = tmp_path / "data.jsonl"
|
|
jsonl_file.write_text("\n".join(json.dumps(d) for d in data))
|
|
|
|
pairs = read_jsonl(jsonl_file)
|
|
|
|
assert len(pairs) == 2
|
|
assert pairs[0].candidate == "foo"
|
|
assert pairs[0].reference == "bar"
|
|
assert pairs[1].candidate == "baz"
|
|
assert pairs[1].reference == "qux"
|
|
|
|
def test_read_empty_file(self, tmp_path: Path) -> None:
|
|
jsonl_file = tmp_path / "empty.jsonl"
|
|
jsonl_file.write_text("")
|
|
|
|
pairs = read_jsonl(jsonl_file)
|
|
|
|
assert pairs == []
|
|
|
|
def test_read_file_with_blank_lines(self, tmp_path: Path) -> None:
|
|
jsonl_file = tmp_path / "data.jsonl"
|
|
content = '{"candidate": "a", "reference": "b"}\n\n{"candidate": "c", "reference": "d"}\n'
|
|
jsonl_file.write_text(content)
|
|
|
|
pairs = read_jsonl(jsonl_file)
|
|
|
|
assert len(pairs) == 2
|
|
|
|
def test_read_file_not_found(self, tmp_path: Path) -> None:
|
|
with pytest.raises(FileNotFoundError):
|
|
read_jsonl(tmp_path / "nonexistent.jsonl")
|
|
|
|
def test_read_invalid_json(self, tmp_path: Path) -> None:
|
|
jsonl_file = tmp_path / "invalid.jsonl"
|
|
jsonl_file.write_text("not valid json")
|
|
|
|
with pytest.raises(ValueError, match="Invalid JSON on line 1"):
|
|
read_jsonl(jsonl_file)
|
|
|
|
def test_read_missing_candidate_key(self, tmp_path: Path) -> None:
|
|
jsonl_file = tmp_path / "data.jsonl"
|
|
jsonl_file.write_text('{"reference": "bar"}')
|
|
|
|
with pytest.raises(ValueError, match="Missing 'candidate' key on line 1"):
|
|
read_jsonl(jsonl_file)
|
|
|
|
def test_read_missing_reference_key(self, tmp_path: Path) -> None:
|
|
jsonl_file = tmp_path / "data.jsonl"
|
|
jsonl_file.write_text('{"candidate": "foo"}')
|
|
|
|
with pytest.raises(ValueError, match="Missing 'reference' key on line 1"):
|
|
read_jsonl(jsonl_file)
|
|
|
|
|
|
class TestReadPairedJsonl:
|
|
def test_read_paired_valid(self, tmp_path: Path) -> None:
|
|
candidates_file = tmp_path / "candidates.jsonl"
|
|
references_file = tmp_path / "references.jsonl"
|
|
|
|
candidates_file.write_text('{"text": "foo"}\n{"text": "bar"}')
|
|
references_file.write_text('{"text": "baz"}\n{"text": "qux"}')
|
|
|
|
pairs = read_paired_jsonl(candidates_file, references_file)
|
|
|
|
assert len(pairs) == 2
|
|
assert pairs[0].candidate == "foo"
|
|
assert pairs[0].reference == "baz"
|
|
assert pairs[1].candidate == "bar"
|
|
assert pairs[1].reference == "qux"
|
|
|
|
def test_read_paired_length_mismatch(self, tmp_path: Path) -> None:
|
|
candidates_file = tmp_path / "candidates.jsonl"
|
|
references_file = tmp_path / "references.jsonl"
|
|
|
|
candidates_file.write_text('{"text": "foo"}\n{"text": "bar"}')
|
|
references_file.write_text('{"text": "baz"}')
|
|
|
|
with pytest.raises(ValueError, match="does not match"):
|
|
read_paired_jsonl(candidates_file, references_file)
|
|
|
|
def test_read_paired_candidates_not_found(self, tmp_path: Path) -> None:
|
|
references_file = tmp_path / "references.jsonl"
|
|
references_file.write_text('{"text": "baz"}')
|
|
|
|
with pytest.raises(FileNotFoundError, match="Candidates file not found"):
|
|
read_paired_jsonl(tmp_path / "nonexistent.jsonl", references_file)
|
|
|
|
def test_read_paired_references_not_found(self, tmp_path: Path) -> None:
|
|
candidates_file = tmp_path / "candidates.jsonl"
|
|
candidates_file.write_text('{"text": "foo"}')
|
|
|
|
with pytest.raises(FileNotFoundError, match="References file not found"):
|
|
read_paired_jsonl(candidates_file, tmp_path / "nonexistent.jsonl")
|
|
|
|
def test_read_paired_missing_text_key(self, tmp_path: Path) -> None:
|
|
candidates_file = tmp_path / "candidates.jsonl"
|
|
references_file = tmp_path / "references.jsonl"
|
|
|
|
candidates_file.write_text('{"value": "foo"}')
|
|
references_file.write_text('{"text": "baz"}')
|
|
|
|
with pytest.raises(ValueError, match="Missing 'text' key in candidates file"):
|
|
read_paired_jsonl(candidates_file, references_file)
|