#!/usr/bin/env python """Validate YAML content files for correctness.""" import sys from pathlib import Path from typing import Any import yaml def validate_categories(data_dir: Path) -> tuple[set[str], list[str]]: """Validate categories.yaml and return valid slugs.""" errors: list[str] = [] slugs: set[str] = set() categories_file = data_dir / "categories" / "categories.yaml" if not categories_file.exists(): errors.append(f"Missing file: {categories_file}") return slugs, errors with open(categories_file) as f: data = yaml.safe_load(f) if "categories" not in data: errors.append("categories.yaml: missing 'categories' key") return slugs, errors for i, cat in enumerate(data["categories"]): prefix = f"categories.yaml[{i}]" if "name" not in cat: errors.append(f"{prefix}: missing 'name'") if "slug" not in cat: errors.append(f"{prefix}: missing 'slug'") else: if cat["slug"] in slugs: errors.append(f"{prefix}: duplicate slug '{cat['slug']}'") slugs.add(cat["slug"]) return slugs, errors def validate_patterns(data_dir: Path) -> tuple[set[str], list[str]]: """Validate patterns.yaml and return valid slugs.""" errors: list[str] = [] slugs: set[str] = set() patterns_file = data_dir / "patterns" / "patterns.yaml" if not patterns_file.exists(): errors.append(f"Missing file: {patterns_file}") return slugs, errors with open(patterns_file) as f: data = yaml.safe_load(f) if "patterns" not in data: errors.append("patterns.yaml: missing 'patterns' key") return slugs, errors for i, pat in enumerate(data["patterns"]): prefix = f"patterns.yaml[{i}]" if "name" not in pat: errors.append(f"{prefix}: missing 'name'") if "slug" not in pat: errors.append(f"{prefix}: missing 'slug'") else: if pat["slug"] in slugs: errors.append(f"{prefix}: duplicate slug '{pat['slug']}'") slugs.add(pat["slug"]) return slugs, errors def validate_question( question_file: Path, valid_categories: set[str], valid_patterns: set[str], seen_slugs: set[str], seen_leetcode_ids: set[int], ) -> list[str]: """Validate a single question file.""" errors: list[str] = [] filename = question_file.name try: with open(question_file) as f: data: dict[str, Any] = yaml.safe_load(f) except yaml.YAMLError as e: errors.append(f"{filename}: invalid YAML - {e}") return errors # Required fields required = ["title", "slug", "difficulty", "description"] for field in required: if field not in data: errors.append(f"{filename}: missing required field '{field}'") # Validate slug if "slug" in data: if data["slug"] in seen_slugs: errors.append(f"{filename}: duplicate slug '{data['slug']}'") seen_slugs.add(data["slug"]) # Slug should match filename expected_filename = f"{data['slug']}.yaml" if question_file.name != expected_filename: errors.append(f"{filename}: filename should be '{expected_filename}'") # Validate difficulty if "difficulty" in data: valid_difficulties = {"easy", "medium", "hard"} if data["difficulty"] not in valid_difficulties: errors.append( f"{filename}: invalid difficulty '{data['difficulty']}' " f"(must be one of {valid_difficulties})" ) # Validate categories for cat in data.get("categories", []): if cat not in valid_categories: errors.append(f"{filename}: unknown category '{cat}'") # Validate patterns for pat in data.get("patterns", []): if pat not in valid_patterns: errors.append(f"{filename}: unknown pattern '{pat}'") # Validate leetcode_id uniqueness if "leetcode_id" in data and data["leetcode_id"] is not None: lid = data["leetcode_id"] if lid in seen_leetcode_ids: errors.append(f"{filename}: duplicate leetcode_id {lid}") seen_leetcode_ids.add(lid) # Validate explanation if "explanation" in data: exp = data["explanation"] exp_required = ["approach", "intuition", "time_complexity", "space_complexity"] for field in exp_required: if field not in exp: errors.append(f"{filename}: explanation missing '{field}'") # Validate solutions if "solutions" in data: for i, sol in enumerate(data["solutions"]): if "approach_name" not in sol: errors.append(f"{filename}: solutions[{i}] missing 'approach_name'") if "code" not in sol: errors.append(f"{filename}: solutions[{i}] missing 'code'") else: errors.append(f"{filename}: missing 'solutions'") return errors def validate_questions( data_dir: Path, valid_categories: set[str], valid_patterns: set[str], ) -> list[str]: """Validate all question files.""" errors: list[str] = [] questions_dir = data_dir / "questions" if not questions_dir.exists(): errors.append(f"Missing directory: {questions_dir}") return errors seen_slugs: set[str] = set() seen_leetcode_ids: set[int] = set() question_files = list(questions_dir.glob("*.yaml")) if not question_files: errors.append("No question files found") return errors for question_file in sorted(question_files): file_errors = validate_question( question_file, valid_categories, valid_patterns, seen_slugs, seen_leetcode_ids, ) errors.extend(file_errors) return errors def main() -> int: """Validate all content files.""" data_dir = Path(__file__).parent.parent / "data" print(f"Validating content in {data_dir}...\n") all_errors: list[str] = [] # Validate categories valid_categories, cat_errors = validate_categories(data_dir) all_errors.extend(cat_errors) print(f"Categories: {len(valid_categories)} valid, {len(cat_errors)} errors") # Validate patterns valid_patterns, pat_errors = validate_patterns(data_dir) all_errors.extend(pat_errors) print(f"Patterns: {len(valid_patterns)} valid, {len(pat_errors)} errors") # Validate questions questions_dir = data_dir / "questions" question_count = len(list(questions_dir.glob("*.yaml"))) if questions_dir.exists() else 0 question_errors = validate_questions(data_dir, valid_categories, valid_patterns) all_errors.extend(question_errors) print(f"Questions: {question_count} files, {len(question_errors)} errors") print() if all_errors: print("Validation errors:") for error in all_errors: print(f" - {error}") return 1 print("All content is valid!") return 0 if __name__ == "__main__": sys.exit(main())