From a65249fa44736414e060c49c96931855a939f4e1 Mon Sep 17 00:00:00 2001 From: Kai Chappell Date: Tue, 3 Feb 2026 16:16:13 +0000 Subject: [PATCH] feat(core): add config and structured logging Implement pydantic-settings based configuration with environment variable support and structlog integration for JSON/console output modes. --- src/veritext/core/config.py | 59 +++++++++++++++++++++++++++ src/veritext/core/logging.py | 79 ++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 src/veritext/core/config.py create mode 100644 src/veritext/core/logging.py diff --git a/src/veritext/core/config.py b/src/veritext/core/config.py new file mode 100644 index 0000000..f794acc --- /dev/null +++ b/src/veritext/core/config.py @@ -0,0 +1,59 @@ +"""Configuration management using pydantic-settings.""" + +from pathlib import Path +from typing import Literal + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class VeritextSettings(BaseSettings): + """Configuration settings for Veritext.""" + + model_config = SettingsConfigDict( + env_prefix="VERITEXT_", + env_file=".env", + env_file_encoding="utf-8", + extra="ignore", + ) + + # Logging settings + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = Field( + default="INFO", + description="Logging level", + ) + log_format: Literal["console", "json"] = Field( + default="console", + description="Log output format", + ) + + # Benchmark settings + benchmark_storage_path: Path = Field( + default=Path("benchmarks"), + description="Path to benchmark storage directory", + ) + + # Tokenisation defaults + tokeniser_lowercase: bool = Field( + default=True, + description="Whether to lowercase tokens by default", + ) + tokeniser_remove_punctuation: bool = Field( + default=True, + description="Whether to remove punctuation by default", + ) + + # Semantic similarity settings (when available) + semantic_model: str = Field( + default="all-MiniLM-L6-v2", + description="Default sentence-transformers model", + ) + semantic_cache_embeddings: bool = Field( + default=True, + description="Whether to cache embeddings", + ) + + +def get_settings() -> VeritextSettings: + """Get the current settings instance.""" + return VeritextSettings() diff --git a/src/veritext/core/logging.py b/src/veritext/core/logging.py new file mode 100644 index 0000000..7591b5d --- /dev/null +++ b/src/veritext/core/logging.py @@ -0,0 +1,79 @@ +"""Structured logging configuration using structlog.""" + +import logging +import sys +from typing import Any + +import structlog + +from veritext.core.config import get_settings + + +def configure_logging( + level: str | None = None, + log_format: str | None = None, +) -> None: + """ + Configure structlog for the application. + + Args: + level: Log level (DEBUG, INFO, WARNING, ERROR). Uses settings if not provided. + log_format: Output format (console, json). Uses settings if not provided. + """ + settings = get_settings() + level = level or settings.log_level + log_format = log_format or settings.log_format + + # Configure standard library logging + logging.basicConfig( + format="%(message)s", + stream=sys.stderr, + level=getattr(logging, level), + ) + + # Shared processors + shared_processors: list[Any] = [ + structlog.contextvars.merge_contextvars, + structlog.processors.add_log_level, + structlog.processors.TimeStamper(fmt="iso"), + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.StackInfoRenderer(), + structlog.processors.UnicodeDecoder(), + ] + + if log_format == "json": + # JSON output for production/log aggregation + processors = [ + *shared_processors, + structlog.processors.format_exc_info, + structlog.processors.JSONRenderer(), + ] + else: + # Console output for development + processors = [ + *shared_processors, + structlog.dev.ConsoleRenderer(colors=True), + ] + + structlog.configure( + processors=processors, + wrapper_class=structlog.make_filtering_bound_logger( + getattr(logging, level), + ), + context_class=dict, + logger_factory=structlog.PrintLoggerFactory(), + cache_logger_on_first_use=True, + ) + + +def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger: + """ + Get a logger instance. + + Args: + name: Logger name. Uses 'veritext' if not provided. + + Returns: + A bound logger instance. + """ + return structlog.get_logger(name or "veritext")