feat(core): add config and structured logging

Implement pydantic-settings based configuration with environment variable
support and structlog integration for JSON/console output modes.
This commit is contained in:
2026-02-03 16:16:13 +00:00
parent 697b1ddfeb
commit a65249fa44
2 changed files with 138 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
"""Configuration management using pydantic-settings."""
from pathlib import Path
from typing import Literal
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class VeritextSettings(BaseSettings):
"""Configuration settings for Veritext."""
model_config = SettingsConfigDict(
env_prefix="VERITEXT_",
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
# Logging settings
log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR"] = Field(
default="INFO",
description="Logging level",
)
log_format: Literal["console", "json"] = Field(
default="console",
description="Log output format",
)
# Benchmark settings
benchmark_storage_path: Path = Field(
default=Path("benchmarks"),
description="Path to benchmark storage directory",
)
# Tokenisation defaults
tokeniser_lowercase: bool = Field(
default=True,
description="Whether to lowercase tokens by default",
)
tokeniser_remove_punctuation: bool = Field(
default=True,
description="Whether to remove punctuation by default",
)
# Semantic similarity settings (when available)
semantic_model: str = Field(
default="all-MiniLM-L6-v2",
description="Default sentence-transformers model",
)
semantic_cache_embeddings: bool = Field(
default=True,
description="Whether to cache embeddings",
)
def get_settings() -> VeritextSettings:
"""Get the current settings instance."""
return VeritextSettings()

View File

@@ -0,0 +1,79 @@
"""Structured logging configuration using structlog."""
import logging
import sys
from typing import Any
import structlog
from veritext.core.config import get_settings
def configure_logging(
level: str | None = None,
log_format: str | None = None,
) -> None:
"""
Configure structlog for the application.
Args:
level: Log level (DEBUG, INFO, WARNING, ERROR). Uses settings if not provided.
log_format: Output format (console, json). Uses settings if not provided.
"""
settings = get_settings()
level = level or settings.log_level
log_format = log_format or settings.log_format
# Configure standard library logging
logging.basicConfig(
format="%(message)s",
stream=sys.stderr,
level=getattr(logging, level),
)
# Shared processors
shared_processors: list[Any] = [
structlog.contextvars.merge_contextvars,
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.StackInfoRenderer(),
structlog.processors.UnicodeDecoder(),
]
if log_format == "json":
# JSON output for production/log aggregation
processors = [
*shared_processors,
structlog.processors.format_exc_info,
structlog.processors.JSONRenderer(),
]
else:
# Console output for development
processors = [
*shared_processors,
structlog.dev.ConsoleRenderer(colors=True),
]
structlog.configure(
processors=processors,
wrapper_class=structlog.make_filtering_bound_logger(
getattr(logging, level),
),
context_class=dict,
logger_factory=structlog.PrintLoggerFactory(),
cache_logger_on_first_use=True,
)
def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger:
"""
Get a logger instance.
Args:
name: Logger name. Uses 'veritext' if not provided.
Returns:
A bound logger instance.
"""
return structlog.get_logger(name or "veritext")