feat(patterns): pattern taxonomy + is_optimal

2025-09-08 16:03:14 +01:00
parent 5b768f6a21
commit 13bab63618
28 changed files with 1434 additions and 26 deletions
@@ -0,0 +1,92 @@
+name: Counting / Bucket Sort
+slug: counting-sort
+difficulty_level: 2
+pattern_type: technique
+display_order: 20
+
+description: >
+  Exploit bounded value ranges to achieve linear time sorting or selection
+  by using values as array indices.
+
+when_to_use: |
+  - Finding top K elements when frequencies are bounded
+  - Sorting when values are in a known, limited range
+  - Problems involving frequency counting with bounded inputs
+  - Color sorting (Dutch National Flag)
+
+metaphor: |
+  Imagine sorting mail into numbered PO boxes. Instead of comparing letters
+  to each other, you simply look at the box number and drop it in. If you
+  have 100 boxes, sorting 1000 letters takes 1000 steps, not 1000 x log(1000).
+
+core_concept: |
+  When values are bounded within a known range [0, k], you can use the value
+  itself as an index into an array of "buckets." This converts comparison-based
+  O(n log n) sorting into O(n + k) counting operations.
+
+  The key insight: **bounded values = direct addressing is possible**.
+
+code_template: |
+  def bucket_sort_approach(nums: list[int], k: int) -> list[int]:
+      # Create buckets indexed by value/frequency
+      n = len(nums)
+      buckets = [[] for _ in range(n + 1)]  # n+1 for frequency 0 to n
+
+      # Count frequencies
+      count = {}
+      for num in nums:
+          count[num] = count.get(num, 0) + 1
+
+      # Place elements in frequency buckets
+      for num, freq in count.items():
+          buckets[freq].append(num)
+
+      # Collect from highest frequency
+      result = []
+      for i in range(n, 0, -1):
+          for num in buckets[i]:
+              result.append(num)
+              if len(result) == k:
+                  return result
+      return result
+
+recognition_signals:
+  - "top k frequent"
+  - "sort colors"
+  - "values in range [0, n]"
+  - "frequency bounded by array size"
+  - "O(n) time required"
+  - "counting occurrences"
+
+common_mistakes:
+  - title: Using Heap When Bucket Sort is Optimal
+    description: |
+      Heap gives O(n log k) but bucket sort gives O(n) when frequencies
+      are bounded. Always check if values/frequencies have a known upper bound.
+    fix: |
+      Ask: "What's the maximum possible value/frequency?" If bounded by n,
+      use bucket sort.
+
+  - title: Off-by-One in Bucket Array
+    description: |
+      Creating `n` buckets for frequencies 0 to n-1 misses frequency `n`
+      (when all elements are identical).
+    fix: |
+      Create `n + 1` buckets to handle frequencies from 0 to n inclusive.
+
+variations:
+  - name: Top K Frequent Elements
+    description: Use frequency as bucket index, collect from highest
+    example: "top-k-frequent-elements"
+  - name: Sort Colors (Dutch National Flag)
+    description: Three buckets for 0, 1, 2
+    example: "sort-colors"
+  - name: H-Index
+    description: Citation count buckets
+    example: "h-index"
+
+related_patterns:
+  - heap
+  - two-pointers
+
+prerequisite_patterns: []