diff --git a/src/veritext/core/config.py b/src/veritext/core/config.py new file mode 100644 index 0000000..1ab6590 --- /dev/null +++ b/src/veritext/core/config.py @@ -0,0 +1,60 @@ +"""Configuration management using pydantic-settings.""" + +from functools import lru_cache +from pathlib import Path +from typing import Literal + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class VeritextSettings(BaseSettings): + """Configuration settings for Veritext.""" + + model_config = SettingsConfigDict( + env_prefix="VERITEXT_", + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + ) + + # Logging settings + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = Field( + default="INFO", + description="Logging level", + ) + log_format: Literal["console", "json"] = Field( + default="console", + description="Log output format", + ) + + # Benchmark settings + benchmark_storage_path: Path = Field( + default=Path("benchmarks"), + description="Path to benchmark storage directory", + ) + + # Tokenisation defaults + tokeniser_lowercase: bool = Field( + default=True, + description="Whether to lowercase tokens by default", + ) + tokeniser_remove_punctuation: bool = Field( + default=True, + description="Whether to remove punctuation by default", + ) + + # Semantic similarity settings (when available) + semantic_model: str = Field( + default="all-MiniLM-L6-v2", + description="Default sentence-transformers model", + ) + semantic_cache_embeddings: bool = Field( + default=True, + description="Whether to cache embeddings", + ) + + +@lru_cache +def get_settings() -> VeritextSettings: + return VeritextSettings() diff --git a/src/veritext/core/logging.py b/src/veritext/core/logging.py new file mode 100644 index 0000000..70ebf72 --- /dev/null +++ b/src/veritext/core/logging.py @@ -0,0 +1,66 @@ +"""Structured logging configuration using structlog.""" + +import logging +import sys +from typing import Any + +import structlog + +from veritext.core.config import get_settings + + +def configure_logging( + level: str | None = None, + log_format: str | None = None, +) -> None: + """ + Configure structlog for the application. + + Args: + level: Log level (DEBUG, INFO, WARNING, ERROR). Uses settings if not provided. + log_format: Output format (console, json). Uses settings if not provided. + """ + settings = get_settings() + level = level or settings.log_level + log_format = log_format or settings.log_format + + logging.basicConfig( + format="%(message)s", + stream=sys.stderr, + level=getattr(logging, level), + ) + + shared_processors: list[Any] = [ + structlog.contextvars.merge_contextvars, + structlog.processors.add_log_level, + structlog.processors.TimeStamper(fmt="iso"), + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.StackInfoRenderer(), + structlog.processors.UnicodeDecoder(), + ] + + if log_format == "json": + processors = [ + *shared_processors, + structlog.processors.format_exc_info, + structlog.processors.JSONRenderer(), + ] + else: + processors = [ + *shared_processors, + structlog.dev.ConsoleRenderer(colors=True), + ] + + structlog.configure( + processors=processors, + wrapper_class=structlog.make_filtering_bound_logger( + getattr(logging, level), + ), + context_class=dict, + logger_factory=structlog.PrintLoggerFactory(), + cache_logger_on_first_use=True, + ) + + +def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger: + return structlog.get_logger(name or "veritext")