228 lines
7.0 KiB
Python
228 lines
7.0 KiB
Python
#!/usr/bin/env python
|
|
"""Validate YAML content files for correctness."""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import yaml
|
|
|
|
|
|
def validate_categories(data_dir: Path) -> tuple[set[str], list[str]]:
|
|
"""Validate categories.yaml and return valid slugs."""
|
|
errors: list[str] = []
|
|
slugs: set[str] = set()
|
|
|
|
categories_file = data_dir / "categories" / "categories.yaml"
|
|
if not categories_file.exists():
|
|
errors.append(f"Missing file: {categories_file}")
|
|
return slugs, errors
|
|
|
|
with open(categories_file) as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
if "categories" not in data:
|
|
errors.append("categories.yaml: missing 'categories' key")
|
|
return slugs, errors
|
|
|
|
for i, cat in enumerate(data["categories"]):
|
|
prefix = f"categories.yaml[{i}]"
|
|
if "name" not in cat:
|
|
errors.append(f"{prefix}: missing 'name'")
|
|
if "slug" not in cat:
|
|
errors.append(f"{prefix}: missing 'slug'")
|
|
else:
|
|
if cat["slug"] in slugs:
|
|
errors.append(f"{prefix}: duplicate slug '{cat['slug']}'")
|
|
slugs.add(cat["slug"])
|
|
|
|
return slugs, errors
|
|
|
|
|
|
def validate_patterns(data_dir: Path) -> tuple[set[str], list[str]]:
|
|
"""Validate patterns.yaml and return valid slugs."""
|
|
errors: list[str] = []
|
|
slugs: set[str] = set()
|
|
|
|
patterns_file = data_dir / "patterns" / "patterns.yaml"
|
|
if not patterns_file.exists():
|
|
errors.append(f"Missing file: {patterns_file}")
|
|
return slugs, errors
|
|
|
|
with open(patterns_file) as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
if "patterns" not in data:
|
|
errors.append("patterns.yaml: missing 'patterns' key")
|
|
return slugs, errors
|
|
|
|
for i, pat in enumerate(data["patterns"]):
|
|
prefix = f"patterns.yaml[{i}]"
|
|
if "name" not in pat:
|
|
errors.append(f"{prefix}: missing 'name'")
|
|
if "slug" not in pat:
|
|
errors.append(f"{prefix}: missing 'slug'")
|
|
else:
|
|
if pat["slug"] in slugs:
|
|
errors.append(f"{prefix}: duplicate slug '{pat['slug']}'")
|
|
slugs.add(pat["slug"])
|
|
|
|
return slugs, errors
|
|
|
|
|
|
def validate_question(
|
|
question_file: Path,
|
|
valid_categories: set[str],
|
|
valid_patterns: set[str],
|
|
seen_slugs: set[str],
|
|
seen_leetcode_ids: set[int],
|
|
) -> list[str]:
|
|
"""Validate a single question file."""
|
|
errors: list[str] = []
|
|
filename = question_file.name
|
|
|
|
try:
|
|
with open(question_file) as f:
|
|
data: dict[str, Any] = yaml.safe_load(f)
|
|
except yaml.YAMLError as e:
|
|
errors.append(f"{filename}: invalid YAML - {e}")
|
|
return errors
|
|
|
|
# Required fields
|
|
required = ["title", "slug", "difficulty", "description"]
|
|
for field in required:
|
|
if field not in data:
|
|
errors.append(f"{filename}: missing required field '{field}'")
|
|
|
|
# Validate slug
|
|
if "slug" in data:
|
|
if data["slug"] in seen_slugs:
|
|
errors.append(f"{filename}: duplicate slug '{data['slug']}'")
|
|
seen_slugs.add(data["slug"])
|
|
|
|
# Slug should match filename
|
|
expected_filename = f"{data['slug']}.yaml"
|
|
if question_file.name != expected_filename:
|
|
errors.append(f"{filename}: filename should be '{expected_filename}'")
|
|
|
|
# Validate difficulty
|
|
if "difficulty" in data:
|
|
valid_difficulties = {"easy", "medium", "hard"}
|
|
if data["difficulty"] not in valid_difficulties:
|
|
errors.append(
|
|
f"{filename}: invalid difficulty '{data['difficulty']}' "
|
|
f"(must be one of {valid_difficulties})"
|
|
)
|
|
|
|
# Validate categories
|
|
for cat in data.get("categories", []):
|
|
if cat not in valid_categories:
|
|
errors.append(f"{filename}: unknown category '{cat}'")
|
|
|
|
# Validate patterns
|
|
for pat in data.get("patterns", []):
|
|
if pat not in valid_patterns:
|
|
errors.append(f"{filename}: unknown pattern '{pat}'")
|
|
|
|
# Validate leetcode_id uniqueness
|
|
if "leetcode_id" in data and data["leetcode_id"] is not None:
|
|
lid = data["leetcode_id"]
|
|
if lid in seen_leetcode_ids:
|
|
errors.append(f"{filename}: duplicate leetcode_id {lid}")
|
|
seen_leetcode_ids.add(lid)
|
|
|
|
# Validate explanation
|
|
if "explanation" in data:
|
|
exp = data["explanation"]
|
|
exp_required = ["approach", "intuition", "time_complexity", "space_complexity"]
|
|
for field in exp_required:
|
|
if field not in exp:
|
|
errors.append(f"{filename}: explanation missing '{field}'")
|
|
|
|
# Validate solutions
|
|
if "solutions" in data:
|
|
for i, sol in enumerate(data["solutions"]):
|
|
if "approach_name" not in sol:
|
|
errors.append(f"{filename}: solutions[{i}] missing 'approach_name'")
|
|
if "code" not in sol:
|
|
errors.append(f"{filename}: solutions[{i}] missing 'code'")
|
|
else:
|
|
errors.append(f"{filename}: missing 'solutions'")
|
|
|
|
return errors
|
|
|
|
|
|
def validate_questions(
|
|
data_dir: Path,
|
|
valid_categories: set[str],
|
|
valid_patterns: set[str],
|
|
) -> list[str]:
|
|
"""Validate all question files."""
|
|
errors: list[str] = []
|
|
questions_dir = data_dir / "questions"
|
|
|
|
if not questions_dir.exists():
|
|
errors.append(f"Missing directory: {questions_dir}")
|
|
return errors
|
|
|
|
seen_slugs: set[str] = set()
|
|
seen_leetcode_ids: set[int] = set()
|
|
|
|
question_files = list(questions_dir.glob("*.yaml"))
|
|
if not question_files:
|
|
errors.append("No question files found")
|
|
return errors
|
|
|
|
for question_file in sorted(question_files):
|
|
file_errors = validate_question(
|
|
question_file,
|
|
valid_categories,
|
|
valid_patterns,
|
|
seen_slugs,
|
|
seen_leetcode_ids,
|
|
)
|
|
errors.extend(file_errors)
|
|
|
|
return errors
|
|
|
|
|
|
def main() -> int:
|
|
"""Validate all content files."""
|
|
data_dir = Path(__file__).parent.parent / "data"
|
|
|
|
print(f"Validating content in {data_dir}...\n")
|
|
|
|
all_errors: list[str] = []
|
|
|
|
# Validate categories
|
|
valid_categories, cat_errors = validate_categories(data_dir)
|
|
all_errors.extend(cat_errors)
|
|
print(f"Categories: {len(valid_categories)} valid, {len(cat_errors)} errors")
|
|
|
|
# Validate patterns
|
|
valid_patterns, pat_errors = validate_patterns(data_dir)
|
|
all_errors.extend(pat_errors)
|
|
print(f"Patterns: {len(valid_patterns)} valid, {len(pat_errors)} errors")
|
|
|
|
# Validate questions
|
|
questions_dir = data_dir / "questions"
|
|
question_count = len(list(questions_dir.glob("*.yaml"))) if questions_dir.exists() else 0
|
|
question_errors = validate_questions(data_dir, valid_categories, valid_patterns)
|
|
all_errors.extend(question_errors)
|
|
print(f"Questions: {question_count} files, {len(question_errors)} errors")
|
|
|
|
print()
|
|
|
|
if all_errors:
|
|
print("Validation errors:")
|
|
for error in all_errors:
|
|
print(f" - {error}")
|
|
return 1
|
|
|
|
print("All content is valid!")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|