Files
veritext/tests/test_cli/test_readers.py
Kai Chappell 8511594697 cli tests
Add comprehensive test suite for validate command, benchmark commands,
input readers, and output formatters using Typer CliRunner.
2025-05-11 14:13:30 +00:00

127 lines
4.6 KiB
Python

"""Tests for CLI input readers."""
import json
from pathlib import Path
import pytest
from veritext.cli.readers import TextPair, read_jsonl, read_paired_jsonl
class TestTextPair:
def test_create_text_pair(self) -> None:
pair = TextPair(candidate="hello", reference="world")
assert pair.candidate == "hello"
assert pair.reference == "world"
class TestReadJsonl:
def test_read_valid_jsonl(self, tmp_path: Path) -> None:
data = [
{"candidate": "foo", "reference": "bar"},
{"candidate": "baz", "reference": "qux"},
]
jsonl_file = tmp_path / "data.jsonl"
jsonl_file.write_text("\n".join(json.dumps(d) for d in data))
pairs = read_jsonl(jsonl_file)
assert len(pairs) == 2
assert pairs[0].candidate == "foo"
assert pairs[0].reference == "bar"
assert pairs[1].candidate == "baz"
assert pairs[1].reference == "qux"
def test_read_empty_file(self, tmp_path: Path) -> None:
jsonl_file = tmp_path / "empty.jsonl"
jsonl_file.write_text("")
pairs = read_jsonl(jsonl_file)
assert pairs == []
def test_read_file_with_blank_lines(self, tmp_path: Path) -> None:
jsonl_file = tmp_path / "data.jsonl"
content = '{"candidate": "a", "reference": "b"}\n\n{"candidate": "c", "reference": "d"}\n'
jsonl_file.write_text(content)
pairs = read_jsonl(jsonl_file)
assert len(pairs) == 2
def test_read_file_not_found(self, tmp_path: Path) -> None:
with pytest.raises(FileNotFoundError):
read_jsonl(tmp_path / "nonexistent.jsonl")
def test_read_invalid_json(self, tmp_path: Path) -> None:
jsonl_file = tmp_path / "invalid.jsonl"
jsonl_file.write_text("not valid json")
with pytest.raises(ValueError, match="Invalid JSON on line 1"):
read_jsonl(jsonl_file)
def test_read_missing_candidate_key(self, tmp_path: Path) -> None:
jsonl_file = tmp_path / "data.jsonl"
jsonl_file.write_text('{"reference": "bar"}')
with pytest.raises(ValueError, match="Missing 'candidate' key on line 1"):
read_jsonl(jsonl_file)
def test_read_missing_reference_key(self, tmp_path: Path) -> None:
jsonl_file = tmp_path / "data.jsonl"
jsonl_file.write_text('{"candidate": "foo"}')
with pytest.raises(ValueError, match="Missing 'reference' key on line 1"):
read_jsonl(jsonl_file)
class TestReadPairedJsonl:
def test_read_paired_valid(self, tmp_path: Path) -> None:
candidates_file = tmp_path / "candidates.jsonl"
references_file = tmp_path / "references.jsonl"
candidates_file.write_text('{"text": "foo"}\n{"text": "bar"}')
references_file.write_text('{"text": "baz"}\n{"text": "qux"}')
pairs = read_paired_jsonl(candidates_file, references_file)
assert len(pairs) == 2
assert pairs[0].candidate == "foo"
assert pairs[0].reference == "baz"
assert pairs[1].candidate == "bar"
assert pairs[1].reference == "qux"
def test_read_paired_length_mismatch(self, tmp_path: Path) -> None:
candidates_file = tmp_path / "candidates.jsonl"
references_file = tmp_path / "references.jsonl"
candidates_file.write_text('{"text": "foo"}\n{"text": "bar"}')
references_file.write_text('{"text": "baz"}')
with pytest.raises(ValueError, match="does not match"):
read_paired_jsonl(candidates_file, references_file)
def test_read_paired_candidates_not_found(self, tmp_path: Path) -> None:
references_file = tmp_path / "references.jsonl"
references_file.write_text('{"text": "baz"}')
with pytest.raises(FileNotFoundError, match="Candidates file not found"):
read_paired_jsonl(tmp_path / "nonexistent.jsonl", references_file)
def test_read_paired_references_not_found(self, tmp_path: Path) -> None:
candidates_file = tmp_path / "candidates.jsonl"
candidates_file.write_text('{"text": "foo"}')
with pytest.raises(FileNotFoundError, match="References file not found"):
read_paired_jsonl(candidates_file, tmp_path / "nonexistent.jsonl")
def test_read_paired_missing_text_key(self, tmp_path: Path) -> None:
candidates_file = tmp_path / "candidates.jsonl"
references_file = tmp_path / "references.jsonl"
candidates_file.write_text('{"value": "foo"}')
references_file.write_text('{"text": "baz"}')
with pytest.raises(ValueError, match="Missing 'text' key in candidates file"):
read_paired_jsonl(candidates_file, references_file)