chore(project): add pyproject.toml and project configuration

Configure Python project with pydantic, structlog, typer, rich dependencies. Set up ruff, mypy, pytest tooling with strict type checking.
2026-02-03 16:15:48 +00:00
parent 818e241ab2
commit 60aaa33327
5 changed files with 1943 additions and 0 deletions
@@ -0,0 +1,116 @@
+# CLAUDE.md
+
+Guidelines for working on the Veritext project.
+
+## Project Overview
+
+Veritext is a semantic text validation framework for Python. It validates text outputs
+against quality criteria using metrics like BLEU, ROUGE, and semantic similarity.
+
+## Directory Structure
+
+```
+veritext/
+├── src/veritext/          # Package source
+│   ├── core/              # Shared types, tokenisation, config
+│   ├── metrics/           # BLEU, ROUGE, lexical, readability
+│   ├── semantic/          # Optional embedding-based similarity
+│   ├── validators/        # Composable validation checks
+│   ├── benchmark/         # Quality tracking & regression detection
+│   ├── pytest_plugin/     # Native pytest integration
+│   └── cli/               # Command-line interface
+├── tests/                 # Test suite (mirrors src structure)
+├── docs/                  # Project documentation
+└── examples/              # Usage examples
+```
+
+## Code Style
+
+### Python Conventions
+
+- **Python 3.11+** with modern type hints
+- **UK English** in all text (colour, behaviour, summarisation, tokenisation)
+- **snake_case** for variables, functions, modules
+- **PascalCase** for classes
+- Absolute imports from package root: `from veritext.core.types import ...`
+
+### Quality Gates
+
+All must pass with zero issues before any commit:
+
+```bash
+uv run ruff check .              # Linting
+uv run ruff format --check .     # Formatting
+uv run mypy src/                 # Type checking
+uv run pytest                    # Tests
+```
+
+### Documentation
+
+- Docstrings for all public APIs (Google style)
+- Type hints on all function signatures
+- Keep docstrings concise; let types speak where possible
+
+## Architecture
+
+### Layer Dependencies
+
+```
+CLI / pytest_plugin  (presentation)
+        ↓
+validators / benchmark  (decision logic)
+        ↓
+metrics  (pure computation)
+        ↓
+core  (shared types, tokenisation)
+```
+
+Each layer depends only on layers below it.
+
+### Metrics vs Validators
+
+| Concept | Responsibility | Output |
+|---------|----------------|--------|
+| **Metric** | Compute a score | Typed result (e.g., `BleuResult`) |
+| **Validator** | Make pass/fail decision | `ValidationResult` with diagnostics |
+
+### Edge Case Handling
+
+- Empty text: Metrics return zero scores; validators fail
+- Empty reference: Comparison metrics raise `ValueError`
+- Whitespace-only: Treated as empty after tokenisation
+- Unicode: NFC normalisation by default
+
+## Git Workflow
+
+### Commits
+
+- Format: `type(scope): description`
+- Types: feat, fix, chore, refactor, docs, test
+- Atomic: ≤3 new files, ≤150 LOC per commit
+- Update changelog.md before completing a task
+
+### Branches
+
+- `feat/kebab-case` — new features
+- `fix/kebab-case` — bug fixes
+- `chore/` — maintenance
+- `refactor/` — code restructure
+- `docs/` — documentation only
+
+## Testing
+
+- Test files mirror source structure: `tests/test_core/test_types.py`
+- Use pytest fixtures for common setup
+- Target ≥80% coverage
+- Include edge cases: empty input, Unicode, boundary values
+
+## Pre-Completion Checklist
+
+Before marking ANY task complete:
+
+- [ ] All linting/formatting/type checks pass
+- [ ] Tests pass with adequate coverage
+- [ ] changelog.md updated if user-facing changes
+- [ ] Filenames are lowercase (except CLAUDE.md)
+- [ ] Commit follows `type(scope): description` format
@@ -0,0 +1,17 @@
+# Changelog
+
+All notable changes to Veritext will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+
+- Project scaffold with pyproject.toml and development tooling
+- Core exception hierarchy (`VeritextError` and subclasses)
+- Core types: `ValidationContext`, `CheckResult`, `ValidationResult`
+- Word tokeniser with Unicode normalisation support
+- Configuration module with pydantic-settings
+- Structured logging with structlog
@@ -0,0 +1,121 @@
+[project]
+name = "veritext"
+version = "0.1.0-dev"
+description = "Semantic text validation framework"
+readme = "readme.md"
+requires-python = ">=3.11"
+license = "MIT"
+authors = [{ name = "Kai Chappell", email = "git@kschappell.com" }]
+keywords = ["validation", "text", "nlp", "testing", "quality"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development :: Testing",
+    "Topic :: Text Processing",
+    "Typing :: Typed",
+]
+dependencies = [
+    "pydantic>=2.0",
+    "pydantic-settings>=2.0",
+    "structlog>=23.0",
+    "typer>=0.9",
+    "rich>=13.0",
+]
+
+[project.optional-dependencies]
+semantic = ["sentence-transformers>=2.2"]
+dev = [
+    "pytest>=7.0",
+    "pytest-cov>=4.0",
+    "mypy>=1.0",
+    "ruff>=0.1",
+]
+all = ["veritext[semantic]"]
+
+[project.scripts]
+veritext = "veritext.cli.main:app"
+
+[project.entry-points.pytest11]
+veritext = "veritext.pytest_plugin"
+
+[project.urls]
+Repository = "https://gitea.kschappell.com/kschappell/veritext"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/veritext"]
+
+[tool.ruff]
+line-length = 88
+target-version = "py311"
+src = ["src", "tests"]
+
+[tool.ruff.lint]
+select = [
+    "E",      # pycodestyle errors
+    "W",      # pycodestyle warnings
+    "F",      # pyflakes
+    "I",      # isort
+    "B",      # flake8-bugbear
+    "C4",     # flake8-comprehensions
+    "UP",     # pyupgrade
+    "ARG",    # flake8-unused-arguments
+    "SIM",    # flake8-simplify
+    "TCH",    # flake8-type-checking
+    "PTH",    # flake8-use-pathlib
+    "RUF",    # ruff-specific
+]
+ignore = [
+    "E501",   # line too long (handled by formatter)
+]
+
+[tool.ruff.lint.isort]
+known-first-party = ["veritext"]
+
+[tool.mypy]
+python_version = "3.11"
+mypy_path = ["src"]
+strict = true
+warn_return_any = true
+warn_unused_ignores = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_configs = true
+show_error_codes = true
+files = ["src/veritext"]
+
+[[tool.mypy.overrides]]
+module = ["sentence_transformers.*"]
+ignore_missing_imports = true
+
+[[tool.mypy.overrides]]
+module = ["structlog", "structlog.*"]
+ignore_missing_imports = true
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "-v --tb=short"
+pythonpath = ["src"]
+
+[tool.coverage.run]
+source = ["src/veritext"]
+branch = true
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "if TYPE_CHECKING:",
+    "raise NotImplementedError",
+]
@@ -0,0 +1,50 @@
+# Veritext
+
+Semantic text validation framework for Python.
+
+Validates text outputs against quality criteria using metrics like BLEU, ROUGE,
+and semantic similarity. Designed for developers building systems that produce
+text (chatbots, content generators, summarisation tools) who need automated
+quality assurance beyond simple string matching.
+
+## Status
+
+Under active development. See [changelog.md](changelog.md) for progress.
+
+## Installation
+
+```bash
+pip install veritext
+
+# With semantic similarity support
+pip install veritext[semantic]
+```
+
+## Quick Start
+
+```python
+from veritext import validators as v
+from veritext.core.types import ValidationContext
+
+# Create validators
+validator = v.all_of([
+    v.bleu(min_score=0.7),
+    v.length(max_chars=500),
+])
+
+# Validate text
+context = ValidationContext(reference="The cat sat on the mat.")
+result = validator.check("A cat is sitting on the mat.", context)
+
+if not result.passed:
+    print(result.failure_summary)
+```
+
+## Documentation
+
+- [Project Plan](docs/project-plan.md)
+- [Implementation Plan](docs/implementation-plan.md)
+
+## Licence
+
+MIT