feat(patterns): tutorial system

2025-08-07 00:41:51 +01:00
parent 1a1558cfad
commit 83bf313305
15 changed files with 1386 additions and 45 deletions
@@ -0,0 +1,202 @@
+name: Binary Search
+slug: binary-search
+difficulty_level: 2
+
+description: >
+  Efficiently search sorted data by repeatedly dividing the search space in half.
+  This transforms O(n) linear search into O(log n) by eliminating half the
+  remaining possibilities with each comparison.
+
+when_to_use: |
+  - Sorted arrays or search spaces
+  - Finding boundaries (first/last occurrence)
+  - Searching in rotated sorted arrays
+  - Finding peak elements
+  - Minimizing/maximizing with monotonic constraints
+
+metaphor: |
+  Imagine playing a number guessing game where someone says "higher" or "lower"
+  after each guess. The optimal strategy is always guessing the middle—you
+  eliminate half the possibilities each time regardless of the answer.
+
+  Another analogy: looking up a word in a physical dictionary. You don't read
+  page by page from the start. You open roughly to the middle, see if you're
+  before or after your word, then repeat in the appropriate half.
+
+core_concept: |
+  Binary search works because the data has **monotonic ordering**—if you find
+  something too small, everything before it is also too small. If something is
+  too big, everything after is also too big.
+
+  The key insight extends beyond simple arrays:
+
+  1. **Value search**: Find a specific target in sorted array
+  2. **Boundary search**: Find the first/last element satisfying a condition
+  3. **Search space**: Binary search over answers (e.g., "minimum capacity")
+
+  At each step, you make one comparison and eliminate half the space. After k
+  comparisons, you've narrowed n elements down to n/2^k. Solving n/2^k = 1
+  gives k = log₂(n).
+
+visualization: |
+  **Example: Find target = 7 in sorted array**
+
+  ```
+  Array: [1, 3, 5, 7, 9, 11, 13]
+          L        M          R
+
+  Step 1: mid = 7, target = 7 → Found!
+  ```
+
+  **Example: Find target = 9**
+
+  ```
+  [1, 3, 5, 7, 9, 11, 13]
+   L        M          R
+
+  Step 1: mid = 7 < 9 → Search right half
+                   L  M   R
+
+  Step 2: mid = 11 > 9 → Search left half
+                   L
+                   M
+                   R
+
+  Step 3: mid = 9 → Found at index 4!
+  ```
+
+  **Binary search on answer: Minimum capacity to ship packages in D days**
+
+  ```
+  Search space: [max(weights), sum(weights)]
+
+  mid = some capacity
+  Can ship in D days with capacity mid?
+    Yes → try smaller capacity (go left)
+    No  → need more capacity (go right)
+  ```
+
+code_template: |
+  def binary_search(arr: list, target: int) -> int:
+      """Classic binary search for exact match."""
+      left, right = 0, len(arr) - 1
+
+      while left <= right:
+          mid = left + (right - left) // 2  # Avoid overflow
+
+          if arr[mid] == target:
+              return mid
+          elif arr[mid] < target:
+              left = mid + 1
+          else:
+              right = mid - 1
+
+      return -1  # Not found
+
+
+  def lower_bound(arr: list, target: int) -> int:
+      """Find first position where arr[i] >= target."""
+      left, right = 0, len(arr)
+
+      while left < right:
+          mid = left + (right - left) // 2
+
+          if arr[mid] < target:
+              left = mid + 1
+          else:
+              right = mid
+
+      return left  # First valid position
+
+
+  def binary_search_answer(low: int, high: int, is_valid) -> int:
+      """Binary search on answer space."""
+      while low < high:
+          mid = low + (high - low) // 2
+
+          if is_valid(mid):
+              high = mid  # Try smaller
+          else:
+              low = mid + 1  # Need bigger
+
+      return low
+
+recognition_signals:
+  - "sorted array"
+  - "O(log n)"
+  - "find minimum/maximum"
+  - "find first/last occurrence"
+  - "rotated sorted array"
+  - "peak element"
+  - "minimum capacity"
+  - "search space"
+  - "lower bound"
+  - "upper bound"
+
+common_mistakes:
+  - title: Integer overflow in mid calculation
+    description: |
+      Using `(left + right) / 2` can overflow if left and right are both
+      large positive integers (in languages with fixed-size integers).
+    fix: |
+      Use `left + (right - left) // 2` instead. This is mathematically
+      equivalent but avoids overflow.
+
+  - title: Infinite loop with wrong boundary update
+    description: |
+      Using `right = mid` with `left <= right` condition, or `left = mid`
+      when mid could equal left, causes infinite loops.
+    fix: |
+      For `left <= right`, always use `left = mid + 1` and `right = mid - 1`.
+      For `left < right`, use `left = mid + 1` and `right = mid`.
+
+  - title: Off-by-one with boundary search
+    description: |
+      Returning `left` when you should return `left - 1` (or vice versa)
+      gives the wrong boundary element.
+    fix: |
+      Think carefully about loop invariants. What does `left` represent when
+      the loop ends? Test with edge cases.
+
+  - title: Not recognizing binary search applicability
+    description: |
+      Missing that a problem can use binary search because the "array" is
+      implicit (search space of possible answers).
+    fix: |
+      If you need to minimize/maximize something and can write a function
+      `is_valid(x)` that's monotonic, binary search applies.
+
+variations:
+  - name: Classic search
+    description: |
+      Find exact target in sorted array. Returns index or -1.
+    example: "Binary Search, Search Insert Position"
+
+  - name: Lower/Upper bound
+    description: |
+      Find first or last position satisfying a condition. Used for ranges
+      and counting occurrences.
+    example: "First Bad Version, Find First and Last Position"
+
+  - name: Rotated array
+    description: |
+      Sorted array rotated at some pivot. One half is always sorted—determine
+      which half and search appropriately.
+    example: "Search in Rotated Sorted Array, Find Minimum"
+
+  - name: Binary search on answer
+    description: |
+      Search the space of possible answers. Need a monotonic predicate function
+      to determine feasibility.
+    example: "Capacity To Ship Packages, Koko Eating Bananas, Split Array Largest Sum"
+
+  - name: Peak finding
+    description: |
+      Find local maximum in bitonic array. Compare mid with neighbors to
+      determine which side has the peak.
+    example: "Find Peak Element, Find in Mountain Array"
+
+related_patterns:
+  - two-pointers
+
+prerequisite_patterns: []
@@ -0,0 +1,224 @@
+name: Dynamic Programming
+slug: dynamic-programming
+difficulty_level: 4
+
+description: >
+  Break problems into overlapping subproblems, storing results to avoid
+  recomputation. This transforms exponential time complexity into polynomial
+  by trading space for time.
+
+when_to_use: |
+  - Optimization problems (min/max)
+  - Counting problems
+  - Problems with optimal substructure
+  - Sequence alignment
+  - Knapsack-type problems
+
+metaphor: |
+  Imagine building with LEGO bricks. Instead of reconstructing the same base
+  structure every time you try a new top, you save your work. Each completed
+  substructure becomes a building block for larger structures.
+
+  Another analogy: calculating Fibonacci numbers. To find fib(5), you need
+  fib(4) and fib(3). But fib(4) also needs fib(3). Rather than recalculating
+  fib(3) twice, save it the first time and reuse it.
+
+core_concept: |
+  Dynamic programming requires two properties:
+
+  1. **Optimal substructure**: The optimal solution contains optimal solutions
+     to its subproblems.
+
+  2. **Overlapping subproblems**: The same subproblems are solved multiple
+     times in a naive recursive approach.
+
+  The key insight is identifying the **state**—what information do you need
+  to solve a subproblem? And the **transition**—how do you combine smaller
+  subproblems into larger ones?
+
+  Two implementation approaches:
+  - **Top-down (memoization)**: Recursive with caching
+  - **Bottom-up (tabulation)**: Iterative, filling a table from base cases
+
+visualization: |
+  **Example: Fibonacci with memoization**
+
+  ```
+  Without memoization (exponential calls):
+                    fib(5)
+                   /      \
+              fib(4)      fib(3)
+             /    \       /    \
+         fib(3)  fib(2) fib(2) fib(1)
+         /   \
+     fib(2) fib(1)
+     ...
+
+  With memoization:
+  fib(5) → fib(4) → fib(3) → fib(2) → fib(1)
+           ↓          ↓
+           use cached use cached
+           fib(3)     fib(2)
+  ```
+
+  **Example: Coin Change (minimum coins for amount 11)**
+
+  ```
+  Coins: [1, 5, 6]   Amount: 11
+
+  dp[0] = 0 (base case: 0 coins for amount 0)
+
+  dp[1] = dp[0] + 1 = 1  (use coin 1)
+  dp[5] = min(dp[4]+1, dp[0]+1) = 1  (use coin 5)
+  dp[6] = min(dp[5]+1, dp[0]+1) = 1  (use coin 6)
+
+  dp[11] = min(dp[10]+1, dp[6]+1, dp[5]+1)
+         = min(?, 2, 3)
+         = 2  (6 + 5)
+  ```
+
+code_template: |
+  # Top-down (memoization)
+  from functools import lru_cache
+
+  def solve_top_down(n: int) -> int:
+      @lru_cache(maxsize=None)
+      def dp(state):
+          # Base case
+          if base_condition(state):
+              return base_value
+
+          # Recursive case with memoization
+          result = initial_value
+          for choice in choices(state):
+              subproblem = dp(next_state(state, choice))
+              result = combine(result, subproblem)
+
+          return result
+
+      return dp(initial_state(n))
+
+
+  # Bottom-up (tabulation)
+  def solve_bottom_up(n: int) -> int:
+      # Initialize DP table
+      dp = [initial_value] * (n + 1)
+
+      # Base case
+      dp[0] = base_value
+
+      # Fill table iteratively
+      for i in range(1, n + 1):
+          for choice in choices(i):
+              if valid(i, choice):
+                  dp[i] = combine(dp[i], dp[prev_state(i, choice)])
+
+      return dp[n]
+
+
+  # 2D DP example (Longest Common Subsequence)
+  def lcs(text1: str, text2: str) -> int:
+      m, n = len(text1), len(text2)
+      dp = [[0] * (n + 1) for _ in range(m + 1)]
+
+      for i in range(1, m + 1):
+          for j in range(1, n + 1):
+              if text1[i-1] == text2[j-1]:
+                  dp[i][j] = dp[i-1][j-1] + 1
+              else:
+                  dp[i][j] = max(dp[i-1][j], dp[i][j-1])
+
+      return dp[m][n]
+
+recognition_signals:
+  - "minimum/maximum"
+  - "count ways"
+  - "can you reach"
+  - "optimal"
+  - "longest/shortest"
+  - "number of ways"
+  - "subset sum"
+  - "partition"
+  - "knapsack"
+  - "sequence"
+  - "subsequence"
+
+common_mistakes:
+  - title: Incorrect state definition
+    description: |
+      Choosing a state that doesn't capture all necessary information leads
+      to incorrect transitions or missing cases.
+    fix: |
+      Ask: "What do I need to know to solve this subproblem?" The answer
+      defines your state. Test with small examples to verify.
+
+  - title: Wrong base case
+    description: |
+      Incorrect initialization causes wrong answers to propagate through
+      the entire DP table.
+    fix: |
+      Think about the smallest/simplest subproblem. What's the answer when
+      there's nothing left to consider? Start from there.
+
+  - title: Off-by-one in 2D DP
+    description: |
+      Confusion about whether dp[i] represents the first i elements or the
+      element at index i causes index errors.
+    fix: |
+      Be consistent. Common convention: dp[i] = answer for first i elements,
+      so dp[0] = empty case. Indices in strings/arrays are 0-based.
+
+  - title: Forgetting to handle impossible cases
+    description: |
+      Not returning infinity for minimum problems or 0 for counting when
+      a state is unreachable gives wrong aggregations.
+    fix: |
+      Initialize dp with appropriate "impossible" values (infinity for min,
+      -infinity for max, 0 for counting). Return -1 if final answer is
+      still impossible.
+
+  - title: Space complexity not optimized
+    description: |
+      Using O(n*m) space when only the previous row/column is needed
+      wastes memory on large inputs.
+    fix: |
+      If dp[i] only depends on dp[i-1], use two arrays (current and previous)
+      or even a single array updated carefully.
+
+variations:
+  - name: 1D DP
+    description: |
+      Single dimension state, typically indexed by position or remaining
+      capacity. Common for linear sequences.
+    example: "Climbing Stairs, House Robber, Coin Change"
+
+  - name: 2D DP
+    description: |
+      Two-dimensional state, often for comparing two sequences or tracking
+      two variables (position and capacity).
+    example: "Longest Common Subsequence, Edit Distance, 0/1 Knapsack"
+
+  - name: Interval DP
+    description: |
+      State represents a range [i, j]. Solve for all subranges and combine.
+      Often O(n^3) time.
+    example: "Burst Balloons, Matrix Chain Multiplication"
+
+  - name: Bitmask DP
+    description: |
+      State includes a bitmask representing a subset. Used when order matters
+      among a small set of items.
+    example: "Traveling Salesman, Shortest Superstring"
+
+  - name: DP on Trees
+    description: |
+      State associated with tree nodes. Transition from children to parent
+      (or vice versa).
+    example: "House Robber III, Binary Tree Maximum Path Sum"
+
+related_patterns:
+  - greedy
+  - backtracking
+
+prerequisite_patterns:
+  - backtracking
@@ -0,0 +1,194 @@
+name: Sliding Window
+slug: sliding-window
+difficulty_level: 2
+
+description: >
+  Maintain a window of elements that slides through the data, tracking a
+  constraint or computing aggregates. This transforms O(n*k) brute force into
+  O(n) by incrementally updating the window instead of recalculating from scratch.
+
+when_to_use: |
+  - Finding subarrays/substrings with specific properties
+  - Maximum/minimum sum of fixed-size windows
+  - Longest substring with at most K distinct characters
+  - Problems mentioning "contiguous" elements
+
+metaphor: |
+  Imagine looking at a landscape through a train window. As the train moves
+  forward, the scenery at the back of your view disappears while new scenery
+  appears at the front. You don't need to memorize the entire journey—just
+  keep track of what's currently visible through your window.
+
+  Another analogy: a cashier's sliding tray at a bank. As new items are added
+  to one end, old items fall off the other. You only count what's on the tray
+  at any moment.
+
+core_concept: |
+  The **sliding window** technique avoids redundant computation by maintaining
+  state as the window moves. Instead of recalculating the entire window each
+  time, you *add* what enters and *remove* what leaves.
+
+  There are two main types:
+
+  1. **Fixed-size window**: Window size is constant (e.g., "find max sum of k elements")
+  2. **Variable-size window**: Window expands and contracts based on constraints
+     (e.g., "longest substring with at most 2 distinct characters")
+
+  The key insight is that consecutive windows share most of their elements.
+  Only the edges change, so only update those.
+
+visualization: |
+  **Example: Maximum sum of 3 consecutive elements**
+
+  ```
+  Array: [2, 1, 5, 1, 3, 2]  Window size: 3
+
+  Window 1: [2, 1, 5] = 8    (calculate full sum)
+             └─────┘
+
+  Window 2: [1, 5, 1] = 8-2+1 = 7  (remove 2, add 1)
+                └─────┘
+
+  Window 3: [5, 1, 3] = 7-1+3 = 9  (remove 1, add 3) ← Maximum!
+                   └─────┘
+
+  Window 4: [1, 3, 2] = 9-5+2 = 6  (remove 5, add 2)
+                      └─────┘
+  ```
+
+  **Variable window: Longest substring with at most 2 distinct chars**
+
+  ```
+  String: "eceba"
+
+  "e"     → 1 distinct, expand  → length 1
+  "ec"    → 2 distinct, expand  → length 2
+  "ece"   → 2 distinct, expand  → length 3 ← Answer!
+  "eceb"  → 3 distinct, shrink from left
+  "ceb"   → 3 distinct, shrink from left
+  "eb"    → 2 distinct, expand  → length 2
+  "eba"   → 3 distinct, shrink...
+  ```
+
+code_template: |
+  def fixed_window(arr: list, k: int) -> int:
+      """Fixed-size sliding window."""
+      n = len(arr)
+      if n < k:
+          return 0
+
+      # Calculate initial window
+      window_sum = sum(arr[:k])
+      max_sum = window_sum
+
+      # Slide the window
+      for i in range(k, n):
+          window_sum += arr[i] - arr[i - k]  # Add new, remove old
+          max_sum = max(max_sum, window_sum)
+
+      return max_sum
+
+
+  def variable_window(s: str, k: int) -> int:
+      """Variable-size sliding window."""
+      char_count = {}
+      left = 0
+      max_length = 0
+
+      for right in range(len(s)):
+          # Expand: add character at right
+          char_count[s[right]] = char_count.get(s[right], 0) + 1
+
+          # Contract: shrink from left if constraint violated
+          while len(char_count) > k:
+              char_count[s[left]] -= 1
+              if char_count[s[left]] == 0:
+                  del char_count[s[left]]
+              left += 1
+
+          # Update answer
+          max_length = max(max_length, right - left + 1)
+
+      return max_length
+
+recognition_signals:
+  - "contiguous subarray"
+  - "substring"
+  - "maximum sum of k elements"
+  - "window"
+  - "consecutive"
+  - "at most k distinct"
+  - "minimum window"
+  - "longest substring"
+  - "sliding"
+
+common_mistakes:
+  - title: Forgetting to handle window smaller than required
+    description: |
+      When array length is less than window size k, trying to create a window
+      causes index errors or incorrect results.
+    fix: |
+      Add an early check:
+      ```python
+      if len(arr) < k:
+          return 0  # or appropriate default
+      ```
+
+  - title: Off-by-one in variable window
+    description: |
+      When calculating window length, using `right - left` instead of
+      `right - left + 1` gives length off by one.
+    fix: |
+      Window length is always `right - left + 1` (inclusive on both ends).
+
+  - title: Not cleaning up empty entries in hash map
+    description: |
+      When shrinking a variable window, decrementing a counter to 0 but not
+      removing the key causes the distinct count to be wrong.
+    fix: |
+      Always delete keys when count reaches 0:
+      ```python
+      if char_count[s[left]] == 0:
+          del char_count[s[left]]
+      ```
+
+  - title: Updating answer at wrong time
+    description: |
+      For "minimum" problems, updating the answer inside the while loop
+      captures invalid states. For "maximum" problems, updating only inside
+      the while loop misses valid states.
+    fix: |
+      For maximum problems, update after expanding. For minimum problems,
+      update when the constraint is first satisfied (inside the while loop).
+
+variations:
+  - name: Fixed-size window
+    description: |
+      Window size stays constant throughout. Simple slide operation: add one
+      element, remove one element.
+    example: "Maximum Sum Subarray of Size K, Find All Anagrams"
+
+  - name: Variable-size (shrinkable)
+    description: |
+      Window expands freely but contracts when constraints are violated.
+      Uses a while loop to shrink until valid.
+    example: "Longest Substring Without Repeating, Minimum Window Substring"
+
+  - name: Two-pointer variant
+    description: |
+      Some problems use two pointers that feel like sliding window but track
+      different metrics. The mechanics are similar.
+    example: "Container With Most Water, Trapping Rain Water"
+
+  - name: Caterpillar method
+    description: |
+      Another name for the variable sliding window, emphasizing how the window
+      stretches and contracts like a caterpillar moving.
+    example: "Common in competitive programming contexts"
+
+related_patterns:
+  - two-pointers
+  - prefix-sum
+
+prerequisite_patterns:
+  - two-pointers
@@ -0,0 +1,166 @@
+name: Two Pointers
+slug: two-pointers
+difficulty_level: 2
+
+description: >
+  Use two pointers to traverse data from different positions, often moving
+  toward or away from each other. This technique transforms O(n²) brute force
+  into O(n) by eliminating redundant comparisons.
+
+when_to_use: |
+  - Sorted arrays where you need to find pairs
+  - Linked list cycle detection
+  - Removing duplicates in-place
+  - Partitioning arrays
+  - Palindrome checking
+
+metaphor: |
+  Imagine two people reading a book from opposite ends, each moving toward the
+  middle. The person at the back skips ahead when they find what they're looking
+  for, while the person at the front moves forward when they don't match. They
+  meet somewhere in the middle, having searched the entire book without either
+  person reading the same page twice.
+
+  Another way to think about it: squeezing toothpaste from both ends of the
+  tube. You apply pressure from each side, working toward the center until
+  you've gotten everything out.
+
+core_concept: |
+  The **two pointers** technique eliminates the need for nested loops by
+  maintaining two positions that move through the data based on conditions.
+
+  The key insight is that when data has *structure* (like being sorted), you
+  can make intelligent decisions about which pointer to move. If the current
+  pair is too small, moving the left pointer right increases the sum. If it's
+  too large, moving the right pointer left decreases it.
+
+  This reduces O(n²) brute force (checking all pairs) to O(n) because each
+  element is visited at most twice—once by each pointer.
+
+visualization: |
+  **Example: Find pair with sum = 10 in sorted array**
+
+  ```
+  Array: [1, 2, 4, 6, 8, 10]  Target: 10
+          L              R
+
+  Step 1: 1 + 10 = 11 > 10  → Sum too large, move R left
+          L           R
+
+  Step 2: 1 + 8 = 9 < 10    → Sum too small, move L right
+             L        R
+
+  Step 3: 2 + 8 = 10 ✓      → Found! Return [1, 4]
+  ```
+
+  **Key insight**: Because the array is sorted, we know exactly which pointer
+  to move. Too big? Decrease the larger value. Too small? Increase the smaller.
+
+code_template: |
+  def two_pointers(arr: list, target: int) -> list:
+      """Two pointers converging from opposite ends."""
+      left, right = 0, len(arr) - 1
+
+      while left < right:
+          current = arr[left] + arr[right]
+
+          if current == target:
+              return [left, right]  # Found!
+          elif current < target:
+              left += 1   # Need larger sum
+          else:
+              right -= 1  # Need smaller sum
+
+      return []  # No solution found
+
+
+  def two_pointers_same_direction(arr: list) -> int:
+      """Two pointers moving in same direction (slow/fast)."""
+      slow = 0
+
+      for fast in range(len(arr)):
+          if some_condition(arr[fast]):
+              arr[slow] = arr[fast]
+              slow += 1
+
+      return slow  # New length
+
+recognition_signals:
+  - "sorted array"
+  - "find pair with sum"
+  - "two sum"
+  - "in-place modification"
+  - "remove duplicates"
+  - "partition array"
+  - "palindrome"
+  - "container with most water"
+  - "trapping rain water"
+  - "move zeros"
+
+common_mistakes:
+  - title: Off-by-one with boundaries
+    description: |
+      Using `<=` instead of `<` when pointers should not overlap causes
+      infinite loops or double-counting elements.
+    fix: |
+      For converging pointers, use `while left < right`. Only use `<=` when
+      the same element can be part of the answer twice.
+
+  - title: Not handling duplicates
+    description: |
+      When the problem asks for unique pairs, forgetting to skip duplicate
+      values leads to repeated answers.
+    fix: |
+      After finding a match, skip over duplicates:
+      ```python
+      while left < right and arr[left] == arr[left + 1]:
+          left += 1
+      ```
+
+  - title: Moving both pointers at once
+    description: |
+      Moving both pointers simultaneously after finding a match can skip
+      valid solutions.
+    fix: |
+      Move one pointer at a time and let the next iteration decide the other.
+      After a match, move both only when you've recorded the result.
+
+  - title: Forgetting the sorted requirement
+    description: |
+      Two pointers only works predictably on sorted data. Applying it to
+      unsorted arrays gives wrong results.
+    fix: |
+      Sort first if needed (adds O(n log n)), or use a hash map approach
+      instead if sorting changes the problem semantics.
+
+variations:
+  - name: Opposite-direction (converging)
+    description: |
+      Pointers start at opposite ends and move toward each other. Used for
+      pair problems in sorted arrays.
+    example: "Two Sum II, Container With Most Water, Valid Palindrome"
+
+  - name: Same-direction (fast-slow)
+    description: |
+      Both pointers start at the same end but move at different speeds or
+      based on different conditions. Used for in-place modifications.
+    example: "Remove Duplicates, Move Zeros, Remove Element"
+
+  - name: Sliding window variant
+    description: |
+      Two pointers defining a window that expands and contracts. Technically
+      a separate pattern but uses similar mechanics.
+    example: "Minimum Window Substring, Longest Substring Without Repeating"
+
+  - name: Three pointers
+    description: |
+      Extension with three pointers for problems involving triplets or
+      partitioning into three sections.
+    example: "3Sum, Sort Colors (Dutch National Flag)"
+
+related_patterns:
+  - sliding-window
+  - fast-slow-pointers
+  - binary-search
+
+prerequisite_patterns: []