Add comprehensive test suite for validate command, benchmark commands, input readers, and output formatters using Typer CliRunner.
146 lines
5.4 KiB
Python
146 lines
5.4 KiB
Python
"""Tests for CLI input readers."""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from veritext.cli.readers import TextPair, read_jsonl, read_paired_jsonl
|
|
|
|
|
|
class TestTextPair:
|
|
"""Tests for TextPair dataclass."""
|
|
|
|
def test_create_text_pair(self) -> None:
|
|
"""Test creating a TextPair."""
|
|
pair = TextPair(candidate="hello", reference="world")
|
|
assert pair.candidate == "hello"
|
|
assert pair.reference == "world"
|
|
|
|
|
|
class TestReadJsonl:
|
|
"""Tests for read_jsonl function."""
|
|
|
|
def test_read_valid_jsonl(self, tmp_path: Path) -> None:
|
|
"""Test reading a valid JSONL file."""
|
|
data = [
|
|
{"candidate": "foo", "reference": "bar"},
|
|
{"candidate": "baz", "reference": "qux"},
|
|
]
|
|
jsonl_file = tmp_path / "data.jsonl"
|
|
jsonl_file.write_text("\n".join(json.dumps(d) for d in data))
|
|
|
|
pairs = read_jsonl(jsonl_file)
|
|
|
|
assert len(pairs) == 2
|
|
assert pairs[0].candidate == "foo"
|
|
assert pairs[0].reference == "bar"
|
|
assert pairs[1].candidate == "baz"
|
|
assert pairs[1].reference == "qux"
|
|
|
|
def test_read_empty_file(self, tmp_path: Path) -> None:
|
|
"""Test reading an empty JSONL file."""
|
|
jsonl_file = tmp_path / "empty.jsonl"
|
|
jsonl_file.write_text("")
|
|
|
|
pairs = read_jsonl(jsonl_file)
|
|
|
|
assert pairs == []
|
|
|
|
def test_read_file_with_blank_lines(self, tmp_path: Path) -> None:
|
|
"""Test reading a JSONL file with blank lines."""
|
|
jsonl_file = tmp_path / "data.jsonl"
|
|
content = '{"candidate": "a", "reference": "b"}\n\n{"candidate": "c", "reference": "d"}\n'
|
|
jsonl_file.write_text(content)
|
|
|
|
pairs = read_jsonl(jsonl_file)
|
|
|
|
assert len(pairs) == 2
|
|
|
|
def test_read_file_not_found(self, tmp_path: Path) -> None:
|
|
"""Test reading a non-existent file."""
|
|
with pytest.raises(FileNotFoundError):
|
|
read_jsonl(tmp_path / "nonexistent.jsonl")
|
|
|
|
def test_read_invalid_json(self, tmp_path: Path) -> None:
|
|
"""Test reading a file with invalid JSON."""
|
|
jsonl_file = tmp_path / "invalid.jsonl"
|
|
jsonl_file.write_text("not valid json")
|
|
|
|
with pytest.raises(ValueError, match="Invalid JSON on line 1"):
|
|
read_jsonl(jsonl_file)
|
|
|
|
def test_read_missing_candidate_key(self, tmp_path: Path) -> None:
|
|
"""Test reading a file missing the candidate key."""
|
|
jsonl_file = tmp_path / "data.jsonl"
|
|
jsonl_file.write_text('{"reference": "bar"}')
|
|
|
|
with pytest.raises(ValueError, match="Missing 'candidate' key on line 1"):
|
|
read_jsonl(jsonl_file)
|
|
|
|
def test_read_missing_reference_key(self, tmp_path: Path) -> None:
|
|
"""Test reading a file missing the reference key."""
|
|
jsonl_file = tmp_path / "data.jsonl"
|
|
jsonl_file.write_text('{"candidate": "foo"}')
|
|
|
|
with pytest.raises(ValueError, match="Missing 'reference' key on line 1"):
|
|
read_jsonl(jsonl_file)
|
|
|
|
|
|
class TestReadPairedJsonl:
|
|
"""Tests for read_paired_jsonl function."""
|
|
|
|
def test_read_paired_valid(self, tmp_path: Path) -> None:
|
|
"""Test reading valid paired JSONL files."""
|
|
candidates_file = tmp_path / "candidates.jsonl"
|
|
references_file = tmp_path / "references.jsonl"
|
|
|
|
candidates_file.write_text('{"text": "foo"}\n{"text": "bar"}')
|
|
references_file.write_text('{"text": "baz"}\n{"text": "qux"}')
|
|
|
|
pairs = read_paired_jsonl(candidates_file, references_file)
|
|
|
|
assert len(pairs) == 2
|
|
assert pairs[0].candidate == "foo"
|
|
assert pairs[0].reference == "baz"
|
|
assert pairs[1].candidate == "bar"
|
|
assert pairs[1].reference == "qux"
|
|
|
|
def test_read_paired_length_mismatch(self, tmp_path: Path) -> None:
|
|
"""Test reading paired files with different lengths."""
|
|
candidates_file = tmp_path / "candidates.jsonl"
|
|
references_file = tmp_path / "references.jsonl"
|
|
|
|
candidates_file.write_text('{"text": "foo"}\n{"text": "bar"}')
|
|
references_file.write_text('{"text": "baz"}')
|
|
|
|
with pytest.raises(ValueError, match="does not match"):
|
|
read_paired_jsonl(candidates_file, references_file)
|
|
|
|
def test_read_paired_candidates_not_found(self, tmp_path: Path) -> None:
|
|
"""Test reading when candidates file doesn't exist."""
|
|
references_file = tmp_path / "references.jsonl"
|
|
references_file.write_text('{"text": "baz"}')
|
|
|
|
with pytest.raises(FileNotFoundError, match="Candidates file not found"):
|
|
read_paired_jsonl(tmp_path / "nonexistent.jsonl", references_file)
|
|
|
|
def test_read_paired_references_not_found(self, tmp_path: Path) -> None:
|
|
"""Test reading when references file doesn't exist."""
|
|
candidates_file = tmp_path / "candidates.jsonl"
|
|
candidates_file.write_text('{"text": "foo"}')
|
|
|
|
with pytest.raises(FileNotFoundError, match="References file not found"):
|
|
read_paired_jsonl(candidates_file, tmp_path / "nonexistent.jsonl")
|
|
|
|
def test_read_paired_missing_text_key(self, tmp_path: Path) -> None:
|
|
"""Test reading paired files with missing text key."""
|
|
candidates_file = tmp_path / "candidates.jsonl"
|
|
references_file = tmp_path / "references.jsonl"
|
|
|
|
candidates_file.write_text('{"value": "foo"}')
|
|
references_file.write_text('{"text": "baz"}')
|
|
|
|
with pytest.raises(ValueError, match="Missing 'text' key in candidates file"):
|
|
read_paired_jsonl(candidates_file, references_file)
|