"""Tests for CLI input readers.""" import json from pathlib import Path import pytest from veritext.cli.readers import TextPair, read_jsonl, read_paired_jsonl class TestTextPair: """Tests for TextPair dataclass.""" def test_create_text_pair(self) -> None: """Test creating a TextPair.""" pair = TextPair(candidate="hello", reference="world") assert pair.candidate == "hello" assert pair.reference == "world" class TestReadJsonl: """Tests for read_jsonl function.""" def test_read_valid_jsonl(self, tmp_path: Path) -> None: """Test reading a valid JSONL file.""" data = [ {"candidate": "foo", "reference": "bar"}, {"candidate": "baz", "reference": "qux"}, ] jsonl_file = tmp_path / "data.jsonl" jsonl_file.write_text("\n".join(json.dumps(d) for d in data)) pairs = read_jsonl(jsonl_file) assert len(pairs) == 2 assert pairs[0].candidate == "foo" assert pairs[0].reference == "bar" assert pairs[1].candidate == "baz" assert pairs[1].reference == "qux" def test_read_empty_file(self, tmp_path: Path) -> None: """Test reading an empty JSONL file.""" jsonl_file = tmp_path / "empty.jsonl" jsonl_file.write_text("") pairs = read_jsonl(jsonl_file) assert pairs == [] def test_read_file_with_blank_lines(self, tmp_path: Path) -> None: """Test reading a JSONL file with blank lines.""" jsonl_file = tmp_path / "data.jsonl" content = '{"candidate": "a", "reference": "b"}\n\n{"candidate": "c", "reference": "d"}\n' jsonl_file.write_text(content) pairs = read_jsonl(jsonl_file) assert len(pairs) == 2 def test_read_file_not_found(self, tmp_path: Path) -> None: """Test reading a non-existent file.""" with pytest.raises(FileNotFoundError): read_jsonl(tmp_path / "nonexistent.jsonl") def test_read_invalid_json(self, tmp_path: Path) -> None: """Test reading a file with invalid JSON.""" jsonl_file = tmp_path / "invalid.jsonl" jsonl_file.write_text("not valid json") with pytest.raises(ValueError, match="Invalid JSON on line 1"): read_jsonl(jsonl_file) def test_read_missing_candidate_key(self, tmp_path: Path) -> None: """Test reading a file missing the candidate key.""" jsonl_file = tmp_path / "data.jsonl" jsonl_file.write_text('{"reference": "bar"}') with pytest.raises(ValueError, match="Missing 'candidate' key on line 1"): read_jsonl(jsonl_file) def test_read_missing_reference_key(self, tmp_path: Path) -> None: """Test reading a file missing the reference key.""" jsonl_file = tmp_path / "data.jsonl" jsonl_file.write_text('{"candidate": "foo"}') with pytest.raises(ValueError, match="Missing 'reference' key on line 1"): read_jsonl(jsonl_file) class TestReadPairedJsonl: """Tests for read_paired_jsonl function.""" def test_read_paired_valid(self, tmp_path: Path) -> None: """Test reading valid paired JSONL files.""" candidates_file = tmp_path / "candidates.jsonl" references_file = tmp_path / "references.jsonl" candidates_file.write_text('{"text": "foo"}\n{"text": "bar"}') references_file.write_text('{"text": "baz"}\n{"text": "qux"}') pairs = read_paired_jsonl(candidates_file, references_file) assert len(pairs) == 2 assert pairs[0].candidate == "foo" assert pairs[0].reference == "baz" assert pairs[1].candidate == "bar" assert pairs[1].reference == "qux" def test_read_paired_length_mismatch(self, tmp_path: Path) -> None: """Test reading paired files with different lengths.""" candidates_file = tmp_path / "candidates.jsonl" references_file = tmp_path / "references.jsonl" candidates_file.write_text('{"text": "foo"}\n{"text": "bar"}') references_file.write_text('{"text": "baz"}') with pytest.raises(ValueError, match="does not match"): read_paired_jsonl(candidates_file, references_file) def test_read_paired_candidates_not_found(self, tmp_path: Path) -> None: """Test reading when candidates file doesn't exist.""" references_file = tmp_path / "references.jsonl" references_file.write_text('{"text": "baz"}') with pytest.raises(FileNotFoundError, match="Candidates file not found"): read_paired_jsonl(tmp_path / "nonexistent.jsonl", references_file) def test_read_paired_references_not_found(self, tmp_path: Path) -> None: """Test reading when references file doesn't exist.""" candidates_file = tmp_path / "candidates.jsonl" candidates_file.write_text('{"text": "foo"}') with pytest.raises(FileNotFoundError, match="References file not found"): read_paired_jsonl(candidates_file, tmp_path / "nonexistent.jsonl") def test_read_paired_missing_text_key(self, tmp_path: Path) -> None: """Test reading paired files with missing text key.""" candidates_file = tmp_path / "candidates.jsonl" references_file = tmp_path / "references.jsonl" candidates_file.write_text('{"value": "foo"}') references_file.write_text('{"text": "baz"}') with pytest.raises(ValueError, match="Missing 'text' key in candidates file"): read_paired_jsonl(candidates_file, references_file)