diff --git a/backend/data/patterns/heap.yaml b/backend/data/patterns/heap.yaml new file mode 100644 index 0000000..2356dc0 --- /dev/null +++ b/backend/data/patterns/heap.yaml @@ -0,0 +1,307 @@ +name: Heap / Priority Queue +slug: heap +difficulty_level: 3 + +description: > + A data structure that efficiently maintains the minimum or maximum element, + supporting O(log n) insertion and extraction. Heaps are essential when you + repeatedly need to access the smallest or largest element from a changing set. + +when_to_use: | + - Finding K largest/smallest elements + - K-way merge of sorted lists + - Finding median from data stream + - Task scheduling by priority + - Dijkstra's shortest path algorithm + +metaphor: | + Imagine a hospital emergency room where patients are treated by urgency, not + arrival time. A priority queue (heap) lets you always know who's next without + sorting everyone whenever someone new arrives. The most urgent patient "bubbles + up" to the front automatically. + + Another analogy: a to-do list that always shows your most important task first. + When you add or complete tasks, the list reorganizes itself so the highest + priority is always accessible in O(1) time. + +core_concept: | + A **heap** is a complete binary tree where each parent is smaller (min-heap) or + larger (max-heap) than its children. This property guarantees: + + - **Peek min/max**: O(1) — it's always at the root + - **Insert**: O(log n) — bubble up to maintain heap property + - **Extract min/max**: O(log n) — remove root, bubble down to reheapify + + Key insight: heaps don't fully sort the data. They only guarantee the root is + the min/max. This partial ordering is enough for many problems and is more + efficient than maintaining full sorted order. + + **When to use heaps:** + - Need repeated access to min/max element + - Data changes frequently (insertions/deletions) + - Full sorting is overkill (only need top K, not all elements sorted) + +visualization: | + **Min-Heap Structure:** + + ``` + Array: [1, 3, 2, 7, 6, 4, 5] + + As tree: + 1 (index 0) + / \ + 3 2 (indices 1, 2) + / \ / \ + 7 6 4 5 (indices 3, 4, 5, 6) + + Parent of index i: (i-1) // 2 + Left child: 2*i + 1 + Right child: 2*i + 2 + ``` + + **Inserting 0 into heap:** + + ``` + Add 0 at end: + 1 + / \ + 3 2 + / \ / \ + 7 6 4 5 + / + 0 + + Bubble up (0 < 7, swap): + 1 + / \ + 3 2 + / \ / \ + 0 6 4 5 + / + 7 + + Bubble up (0 < 3, swap): + 1 + / \ + 0 2 + / \ / \ + 3 6 4 5 + / + 7 + + Bubble up (0 < 1, swap): + 0 + / \ + 1 2 + / \ / \ + 3 6 4 5 + / + 7 + ``` + + **Top K Elements using Min-Heap:** + + ``` + Find 3 largest from [3, 1, 4, 1, 5, 9, 2, 6] + + Maintain min-heap of size 3: + + Process 3: heap = [3] + Process 1: heap = [1, 3] + Process 4: heap = [1, 3, 4] + Process 1: 1 <= heap[0]=1, skip + Process 5: 5 > 1, remove 1, add 5 → heap = [3, 5, 4] + Process 9: 9 > 3, remove 3, add 9 → heap = [4, 5, 9] + Process 2: 2 <= 4, skip + Process 6: 6 > 4, remove 4, add 6 → heap = [5, 9, 6] + + Result: [5, 6, 9] are the top 3 + ``` + +code_template: | + import heapq + + def find_k_largest(nums: list[int], k: int) -> list[int]: + """Find k largest elements using min-heap.""" + # Min-heap of size k keeps k largest + heap = [] + + for num in nums: + if len(heap) < k: + heapq.heappush(heap, num) + elif num > heap[0]: + heapq.heapreplace(heap, num) # Pop min, push new + + return heap + + + def find_k_smallest(nums: list[int], k: int) -> list[int]: + """Find k smallest elements using max-heap (negated values).""" + # Max-heap (negated) of size k keeps k smallest + heap = [] + + for num in nums: + if len(heap) < k: + heapq.heappush(heap, -num) + elif num < -heap[0]: + heapq.heapreplace(heap, -num) + + return [-x for x in heap] + + + def merge_k_sorted_lists(lists: list[list[int]]) -> list[int]: + """Merge k sorted lists using min-heap.""" + heap = [] + result = [] + + # Initialize heap with first element from each list + for i, lst in enumerate(lists): + if lst: + heapq.heappush(heap, (lst[0], i, 0)) + + while heap: + val, list_idx, elem_idx = heapq.heappop(heap) + result.append(val) + + # Add next element from same list + if elem_idx + 1 < len(lists[list_idx]): + next_val = lists[list_idx][elem_idx + 1] + heapq.heappush(heap, (next_val, list_idx, elem_idx + 1)) + + return result + + + class MedianFinder: + """Find median from data stream using two heaps.""" + + def __init__(self): + self.small = [] # Max-heap (negated) for smaller half + self.large = [] # Min-heap for larger half + + def add_num(self, num: int) -> None: + # Add to max-heap (smaller half) + heapq.heappush(self.small, -num) + + # Balance: largest of small should be <= smallest of large + if self.large and -self.small[0] > self.large[0]: + heapq.heappush(self.large, -heapq.heappop(self.small)) + + # Size balance: small can have at most 1 more element + if len(self.small) > len(self.large) + 1: + heapq.heappush(self.large, -heapq.heappop(self.small)) + elif len(self.large) > len(self.small): + heapq.heappush(self.small, -heapq.heappop(self.large)) + + def find_median(self) -> float: + if len(self.small) > len(self.large): + return -self.small[0] + return (-self.small[0] + self.large[0]) / 2 + + + def kth_smallest_in_matrix(matrix: list[list[int]], k: int) -> int: + """Find kth smallest in row-wise and column-wise sorted matrix.""" + n = len(matrix) + heap = [(matrix[0][0], 0, 0)] + visited = {(0, 0)} + + for _ in range(k - 1): + val, r, c = heapq.heappop(heap) + + # Add right neighbor + if c + 1 < n and (r, c + 1) not in visited: + visited.add((r, c + 1)) + heapq.heappush(heap, (matrix[r][c + 1], r, c + 1)) + + # Add bottom neighbor + if r + 1 < n and (r + 1, c) not in visited: + visited.add((r + 1, c)) + heapq.heappush(heap, (matrix[r + 1][c], r + 1, c)) + + return heap[0][0] + +recognition_signals: + - "kth largest" + - "kth smallest" + - "top k" + - "merge sorted" + - "median" + - "priority" + - "schedule" + - "Dijkstra" + - "frequency" + - "closest points" + +common_mistakes: + - title: Using max-heap when min-heap needed (or vice versa) + description: | + Python's heapq is a min-heap. Using it directly for "k largest" keeps + k smallest instead. + fix: | + For max-heap behavior, negate values: + ```python + heapq.heappush(heap, -num) # Push negative + max_val = -heapq.heappop(heap) # Negate back + ``` + + - title: Wrong heap size for "top K" problems + description: | + For "k largest," keeping a max-heap of all elements and extracting k times + is O(n + k log n). Using min-heap of size k is O(n log k). + fix: | + For k largest: use min-heap of size k, remove smallest when full. + For k smallest: use max-heap of size k, remove largest when full. + + - title: Forgetting tuple comparison order + description: | + When heap contains tuples, Python compares by first element, then second, + etc. If first elements are equal, comparison moves to second element. + fix: | + Put the comparison key first in the tuple: + ```python + heapq.heappush(heap, (priority, item)) + ``` + If items aren't comparable, use a counter as tiebreaker. + + - title: Modifying heap elements directly + description: | + Changing an element's value after it's in the heap breaks heap property. + fix: | + Heaps don't support "decrease key" directly. Either: (1) use lazy deletion + (mark as invalid, skip when popped), or (2) re-heapify the entire heap. + +variations: + - name: Top K elements + description: | + Keep k largest using min-heap of size k, or k smallest using max-heap + of size k. + example: "Kth Largest Element, Top K Frequent Elements" + + - name: K-way merge + description: | + Merge k sorted lists efficiently by maintaining heap of current elements + from each list. + example: "Merge K Sorted Lists, Smallest Range Covering K Lists" + + - name: Two heaps (median) + description: | + Maintain two heaps: max-heap for smaller half, min-heap for larger half. + Median is at the roots. + example: "Find Median from Data Stream, Sliding Window Median" + + - name: Dijkstra's algorithm + description: | + Min-heap tracks vertices by shortest known distance. Extract minimum, + relax edges, update heap. + example: "Network Delay Time, Cheapest Flights Within K Stops" + + - name: Task scheduling + description: | + Prioritize tasks by some criteria (deadline, duration). Process highest + priority first. + example: "Task Scheduler, Meeting Rooms III" + +related_patterns: + - binary-search + - two-pointers + +prerequisite_patterns: [] diff --git a/backend/data/patterns/monotonic-stack.yaml b/backend/data/patterns/monotonic-stack.yaml new file mode 100644 index 0000000..7baec82 --- /dev/null +++ b/backend/data/patterns/monotonic-stack.yaml @@ -0,0 +1,269 @@ +name: Monotonic Stack +slug: monotonic-stack +difficulty_level: 3 + +description: > + Maintain a stack where elements are always in sorted order (either increasing or + decreasing). This enables efficient solutions for "next greater element" problems + by leveraging the stack's ability to track candidates that might be the answer + for future elements. + +when_to_use: | + - Next greater/smaller element + - Previous greater/smaller element + - Largest rectangle in histogram + - Daily temperatures + - Stock span problems + +metaphor: | + Imagine standing in a line of people of varying heights, all facing forward. + You want to know who's the next taller person for each person in line. The + trick: as you walk backward through the line, keep track of "potentially + useful" tall people. When you encounter someone taller than people you're + tracking, those shorter people will never be the answer—remove them. The + remaining stack always contains candidates in decreasing height order. + + Another analogy: a bouncer at a club with height requirements. As people line + up, anyone shorter than the person in front can be removed from consideration— + they'll never be visible from the front. + +core_concept: | + A **monotonic stack** maintains elements in sorted order by popping elements + that violate the ordering when pushing new ones: + + - **Monotonically decreasing**: Pop elements smaller than current before pushing + - **Monotonically increasing**: Pop elements larger than current before pushing + + The key insight is that when we pop an element, we've found its "next + greater/smaller"—it's the current element we're about to push. The stack + efficiently tracks candidates that might be answers for future elements. + + **Pattern recognition:** + - "Next greater" → decreasing stack (pop when current > top) + - "Next smaller" → increasing stack (pop when current < top) + - "Previous greater/smaller" → process elements and query stack before pushing + +visualization: | + **Next Greater Element:** + + ``` + Array: [4, 5, 2, 10, 8] + Find next greater element for each + + Process right to left (or left to right with index tracking): + + Process 8: stack=[] → no greater, push 8 + stack=[8] → answer[4] = -1 + + Process 10: stack=[8] → 10 > 8, pop 8 + stack=[] → no greater, push 10 + stack=[10] → answer[3] = -1 + + Process 2: stack=[10] → 2 < 10, don't pop + stack=[10,2] → answer[2] = 10 + + Process 5: stack=[10,2] → 5 > 2, pop 2 + stack=[10] → 5 < 10, don't pop + stack=[10,5] → answer[1] = 10 + + Process 4: stack=[10,5] → 4 < 5, don't pop + stack=[10,5,4] → answer[0] = 5 + + Result: [5, 10, 10, -1, -1] + ``` + + **Largest Rectangle in Histogram:** + + ``` + Heights: [2, 1, 5, 6, 2, 3] + + Use increasing stack (pop when current < top) + When popping, calculate rectangle with popped height as the smallest bar. + + Process each bar: + - 2: push (0,2) + - 1: 1 < 2, pop (0,2) → width=1, area=2×1=2 + push (0,1) [take popped index] + - 5: push (2,5) + - 6: push (3,6) + - 2: 2 < 6, pop (3,6) → width=1, area=6×1=6 + 2 < 5, pop (2,5) → width=2, area=5×2=10 + push (2,2) + - 3: push (5,3) + - end: pop remaining, calculate areas + + Max area = 10 + ``` + +code_template: | + def next_greater_element(nums: list[int]) -> list[int]: + """Find next greater element for each position.""" + n = len(nums) + result = [-1] * n + stack = [] # Stack of indices + + for i in range(n): + # Pop elements smaller than current + while stack and nums[stack[-1]] < nums[i]: + idx = stack.pop() + result[idx] = nums[i] + + stack.append(i) + + return result + + + def next_smaller_element(nums: list[int]) -> list[int]: + """Find next smaller element for each position.""" + n = len(nums) + result = [-1] * n + stack = [] + + for i in range(n): + # Pop elements larger than current + while stack and nums[stack[-1]] > nums[i]: + idx = stack.pop() + result[idx] = nums[i] + + stack.append(i) + + return result + + + def daily_temperatures(temperatures: list[int]) -> list[int]: + """Days until warmer temperature.""" + n = len(temperatures) + result = [0] * n + stack = [] # Stack of indices + + for i in range(n): + while stack and temperatures[stack[-1]] < temperatures[i]: + idx = stack.pop() + result[idx] = i - idx # Days difference + + stack.append(i) + + return result + + + def largest_rectangle_histogram(heights: list[int]) -> int: + """Largest rectangle area in histogram.""" + stack = [] # Stack of (index, height) + max_area = 0 + + for i, h in enumerate(heights): + start = i + + while stack and stack[-1][1] > h: + idx, height = stack.pop() + max_area = max(max_area, height * (i - idx)) + start = idx # This index can extend back + + stack.append((start, h)) + + # Process remaining in stack + for idx, height in stack: + max_area = max(max_area, height * (len(heights) - idx)) + + return max_area + + + def stock_span(prices: list[int]) -> list[int]: + """Days since last higher price (inclusive of today).""" + n = len(prices) + result = [0] * n + stack = [] # Stack of indices + + for i in range(n): + while stack and prices[stack[-1]] <= prices[i]: + stack.pop() + + # Span = distance to previous higher (or from start) + result[i] = i - stack[-1] if stack else i + 1 + + stack.append(i) + + return result + +recognition_signals: + - "next greater element" + - "next smaller element" + - "previous greater" + - "daily temperatures" + - "stock span" + - "largest rectangle" + - "histogram" + - "trapping rain water" + - "132 pattern" + - "buildings with ocean view" + +common_mistakes: + - title: Wrong comparison direction + description: | + Using `<` when you should use `>` (or vice versa) results in the wrong + type of monotonic stack. + fix: | + Remember: "next greater" needs decreasing stack, so pop when `nums[top] < current`. + "Next smaller" needs increasing stack, so pop when `nums[top] > current`. + + - title: Storing values instead of indices + description: | + Storing just values makes it impossible to calculate distances (like + "how many days until..."). + fix: | + Store indices in the stack. You can always access `nums[stack[-1]]` for + the value when needed. + + - title: Not processing remaining stack elements + description: | + Elements left in the stack after processing all input have no "next + greater/smaller" in the array. + fix: | + After the main loop, process remaining elements. For histogram problems, + their rectangle extends to the end. For "next greater," their answer is -1. + + - title: Off-by-one with span calculations + description: | + Forgetting whether to include the current element in span calculations + gives wrong results. + fix: | + For span problems, if stack is empty, span = i + 1 (from beginning). + If stack has elements, span = i - stack[-1] (not +1 because previous + greater is exclusive). + +variations: + - name: Next greater element + description: | + Find the first element to the right that is greater than current. + Decreasing monotonic stack. + example: "Next Greater Element I/II, Daily Temperatures" + + - name: Next smaller element + description: | + Find the first element to the right that is smaller than current. + Increasing monotonic stack. + example: "Next Smaller Element" + + - name: Previous greater/smaller + description: | + Query the stack before pushing to find the previous greater/smaller. + The top of stack is the answer. + example: "Stock Span, Buildings With Ocean View" + + - name: Largest rectangle + description: | + Use increasing stack. When popping, calculate area using popped height + and width from popped index to current index. + example: "Largest Rectangle in Histogram, Maximal Rectangle" + + - name: Trapping rain water + description: | + Can use monotonic stack to track left boundaries, calculating trapped + water when finding right boundary. (Alternative: two-pointer approach) + example: "Trapping Rain Water" + +related_patterns: + - two-pointers + - sliding-window + +prerequisite_patterns: [] diff --git a/backend/data/patterns/trie.yaml b/backend/data/patterns/trie.yaml new file mode 100644 index 0000000..2cec517 --- /dev/null +++ b/backend/data/patterns/trie.yaml @@ -0,0 +1,305 @@ +name: Trie +slug: trie +difficulty_level: 3 + +description: > + A tree-like data structure for efficient string prefix operations. Each node + represents a character, and paths from root to nodes spell out prefixes. Tries + enable O(m) search, insert, and prefix queries where m is the word length. + +when_to_use: | + - Autocomplete systems + - Spell checkers + - Word dictionary with prefix search + - Word break problems + - IP routing (longest prefix matching) + +metaphor: | + Imagine a filing cabinet where files are organized by name, one letter per + drawer. To find "apple," you open drawer 'a', then find sub-drawer 'p', then + 'p', then 'l', then 'e'. You don't search through all files—you navigate + directly to the right location. Finding "application" shares the same path + up to "appl" before diverging. + + Another analogy: a phone book organized as a tree. Instead of a flat + alphabetical list, common prefixes are grouped, making it fast to find all + names starting with "Joh" or check if "Johnson" exists. + +core_concept: | + A **Trie** (pronounced "try") stores strings character by character: + + - **Root**: Empty node representing the empty prefix + - **Edges**: Labeled with characters + - **Nodes**: Represent prefixes; may be marked as "end of word" + + Key insight: all words sharing a prefix share the same path from root. + This makes prefix operations extremely efficient: + + - **Insert word**: O(m) — create path from root + - **Search word**: O(m) — follow path, check end marker + - **Starts with prefix**: O(m) — just follow path, no end check needed + + **Trade-off**: Tries use more memory than hash sets (each character is a node), + but enable prefix queries that hash sets cannot support. + +visualization: | + **Trie containing: ["app", "apple", "apply", "apt", "bat"]** + + ``` + (root) + / \ + a b + | | + p a + / \ | + p t* t* + | + l + / \ + e* y* + + * = end of word marker + + Paths: + - "app" → a-p-p* + - "apple" → a-p-p-l-e* + - "apply" → a-p-p-l-y* + - "apt" → a-p-t* + - "bat" → b-a-t* + ``` + + **Search for "apple":** + + ``` + Start at root + → 'a': found, move to 'a' node + → 'p': found, move to 'p' node + → 'p': found, move to second 'p' node + → 'l': found, move to 'l' node + → 'e': found, move to 'e' node + → end of word marker? Yes! + + "apple" exists ✓ + ``` + + **Search for "app":** + + ``` + Follow path a-p-p + → end of word marker on second 'p'? Yes! + + "app" exists ✓ + ``` + + **Starts with "ap":** + + ``` + Follow path a-p + → reached end of prefix successfully + + Words with prefix "ap" exist ✓ + ``` + +code_template: | + class TrieNode: + def __init__(self): + self.children = {} + self.is_end = False + + + class Trie: + def __init__(self): + self.root = TrieNode() + + def insert(self, word: str) -> None: + """Insert a word into the trie.""" + node = self.root + for char in word: + if char not in node.children: + node.children[char] = TrieNode() + node = node.children[char] + node.is_end = True + + def search(self, word: str) -> bool: + """Check if word exists in trie.""" + node = self._traverse(word) + return node is not None and node.is_end + + def starts_with(self, prefix: str) -> bool: + """Check if any word starts with prefix.""" + return self._traverse(prefix) is not None + + def _traverse(self, s: str) -> TrieNode: + """Traverse trie following string s.""" + node = self.root + for char in s: + if char not in node.children: + return None + node = node.children[char] + return node + + + class WordDictionary: + """Trie with wildcard search support.""" + + def __init__(self): + self.root = TrieNode() + + def add_word(self, word: str) -> None: + node = self.root + for char in word: + if char not in node.children: + node.children[char] = TrieNode() + node = node.children[char] + node.is_end = True + + def search(self, word: str) -> bool: + """Search with '.' as wildcard for any character.""" + def dfs(node: TrieNode, i: int) -> bool: + if i == len(word): + return node.is_end + + char = word[i] + + if char == '.': + # Try all children + return any(dfs(child, i + 1) + for child in node.children.values()) + else: + if char not in node.children: + return False + return dfs(node.children[char], i + 1) + + return dfs(self.root, 0) + + + def word_break(s: str, word_dict: list[str]) -> bool: + """Check if string can be segmented into dictionary words.""" + trie = Trie() + for word in word_dict: + trie.insert(word) + + n = len(s) + dp = [False] * (n + 1) + dp[0] = True # Empty string can be segmented + + for i in range(n): + if not dp[i]: + continue + + node = trie.root + for j in range(i, n): + if s[j] not in node.children: + break + node = node.children[s[j]] + if node.is_end: + dp[j + 1] = True + + return dp[n] + + + def find_words_with_prefix(trie: Trie, prefix: str) -> list[str]: + """Find all words starting with prefix.""" + node = trie._traverse(prefix) + if not node: + return [] + + results = [] + + def dfs(node: TrieNode, path: str): + if node.is_end: + results.append(path) + for char, child in node.children.items(): + dfs(child, path + char) + + dfs(node, prefix) + return results + +recognition_signals: + - "prefix" + - "autocomplete" + - "word dictionary" + - "spell check" + - "word search" + - "word break" + - "longest common prefix" + - "starts with" + - "implement trie" + - "wildcard" + +common_mistakes: + - title: Confusing search vs starts_with + description: | + Search checks if the exact word exists (must have end marker). + Starts_with only checks if the prefix path exists. + fix: | + For search, always check `node.is_end` at the end: + ```python + def search(self, word): + node = self._traverse(word) + return node is not None and node.is_end + ``` + + - title: Not handling empty string + description: | + Empty string is a valid prefix (everything starts with it) but may not + be a valid word in the dictionary. + fix: | + starts_with("") should return True if trie has any words. + search("") should return True only if empty string was explicitly inserted. + + - title: Using array instead of dict for children + description: | + Using `children = [None] * 26` assumes only lowercase letters. This fails + for other character sets. + fix: | + Use a dictionary for flexibility: + ```python + self.children = {} # Works for any characters + ``` + Or use array only when character set is known and fixed. + + - title: Memory leaks when deleting words + description: | + Simply unmarking is_end doesn't free memory for nodes that are no longer + part of any word. + fix: | + For deletion, either: (1) accept memory isn't freed (common), or + (2) implement proper deletion that removes orphaned nodes bottom-up. + +variations: + - name: Basic Trie + description: | + Standard insert, search, and prefix check operations. + example: "Implement Trie (Prefix Tree)" + + - name: Wildcard search + description: | + Support '.' as wildcard matching any single character. Requires DFS + to explore all possibilities when encountering wildcard. + example: "Design Add and Search Words Data Structure" + + - name: Word search in grid + description: | + Use Trie to efficiently search for multiple words in a 2D grid. + Prune branches that don't match any word prefix. + example: "Word Search II" + + - name: Autocomplete + description: | + Find all words starting with a given prefix. DFS from the prefix + endpoint to collect all words. + example: "Design Search Autocomplete System" + + - name: Compressed Trie (Radix Tree) + description: | + Merge chains of single-child nodes into one node with a string label. + Saves space for sparse tries. + example: "Longest Common Prefix optimizations" + +related_patterns: + - dfs + - backtracking + - dynamic-programming + +prerequisite_patterns: [] diff --git a/backend/data/patterns/union-find.yaml b/backend/data/patterns/union-find.yaml new file mode 100644 index 0000000..4720029 --- /dev/null +++ b/backend/data/patterns/union-find.yaml @@ -0,0 +1,313 @@ +name: Union Find +slug: union-find +difficulty_level: 3 + +description: > + Track disjoint sets with efficient union and find operations. Union-Find + (also called Disjoint Set Union) excels at dynamically grouping elements and + answering "are these two elements in the same group?" queries. + +when_to_use: | + - Finding connected components + - Detecting cycles in undirected graphs + - Kruskal's minimum spanning tree + - Dynamic connectivity queries + - Grouping related items (accounts merge, friend circles) + +metaphor: | + Imagine a social network where you want to know if two people are connected + (directly or through friends of friends). Instead of searching the entire + network each time, everyone in a connected group points to a group leader. + To check if two people are connected, just check if they have the same leader. + When groups merge (someone bridges two groups), you just update one leader to + point to the other. + + Another analogy: corporate acquisitions. Each company has a parent company + (possibly itself). When companies merge, one becomes a subsidiary of the other. + To find the ultimate parent, you follow the chain of ownership. + +core_concept: | + Union-Find maintains a forest of trees where each tree represents a set. + Each element points to its parent, and the root of the tree is the set's + representative. + + **Two key operations:** + - **Find(x)**: Return the root (representative) of x's set + - **Union(x, y)**: Merge the sets containing x and y + + **Two key optimizations:** + - **Path compression**: During Find, make each node point directly to root. + This flattens the tree for future queries. + - **Union by rank/size**: Always attach the smaller tree under the larger. + This keeps trees shallow. + + With both optimizations, operations run in nearly O(1) time—technically + O(α(n)) where α is the inverse Ackermann function, which is ≤ 4 for any + practical input size. + +visualization: | + **Initial state (each element is its own set):** + + ``` + parent: [0, 1, 2, 3, 4] (each points to itself) + + Sets: {0}, {1}, {2}, {3}, {4} + ``` + + **Union(0, 1):** + + ``` + parent: [0, 0, 2, 3, 4] (1 now points to 0) + + 0 + | + 1 + + Sets: {0, 1}, {2}, {3}, {4} + ``` + + **Union(2, 3) then Union(3, 4):** + + ``` + parent: [0, 0, 2, 2, 3] + + 0 2 + | / \ + 1 3 (direct) + | + 4 + + Sets: {0, 1}, {2, 3, 4} + ``` + + **Union(1, 4) — merges the two trees:** + + ``` + Find(1) = 0, Find(4) = 2 + Union by rank: attach smaller under larger + + parent: [0, 0, 0, 2, 3] + + 0 + /| + 1 2 + / \ + 3 (direct) + | + 4 + + Sets: {0, 1, 2, 3, 4} + ``` + + **Path compression during Find(4):** + + ``` + Find(4): 4 → 3 → 2 → 0 (found root) + Compress: make 4, 3, 2 all point directly to 0 + + parent: [0, 0, 0, 0, 0] + + 0 + /|\ \ + 1 2 3 4 + + Now Find(4) is O(1)! + ``` + +code_template: | + class UnionFind: + """Union-Find with path compression and union by rank.""" + + def __init__(self, n: int): + self.parent = list(range(n)) + self.rank = [0] * n + self.count = n # Number of disjoint sets + + def find(self, x: int) -> int: + """Find root with path compression.""" + if self.parent[x] != x: + self.parent[x] = self.find(self.parent[x]) + return self.parent[x] + + def union(self, x: int, y: int) -> bool: + """Union by rank. Returns True if x and y were in different sets.""" + root_x, root_y = self.find(x), self.find(y) + + if root_x == root_y: + return False # Already in same set + + # Union by rank + if self.rank[root_x] < self.rank[root_y]: + root_x, root_y = root_y, root_x + + self.parent[root_y] = root_x + + if self.rank[root_x] == self.rank[root_y]: + self.rank[root_x] += 1 + + self.count -= 1 + return True + + def connected(self, x: int, y: int) -> bool: + """Check if x and y are in the same set.""" + return self.find(x) == self.find(y) + + + def count_components(n: int, edges: list[list[int]]) -> int: + """Count connected components in undirected graph.""" + uf = UnionFind(n) + + for u, v in edges: + uf.union(u, v) + + return uf.count + + + def has_cycle(n: int, edges: list[list[int]]) -> bool: + """Detect cycle in undirected graph.""" + uf = UnionFind(n) + + for u, v in edges: + if uf.connected(u, v): + return True # Adding edge creates cycle + uf.union(u, v) + + return False + + + def kruskal_mst(n: int, edges: list[tuple[int, int, int]]) -> int: + """Kruskal's MST algorithm using Union-Find.""" + # edges are (weight, u, v) + edges.sort() # Sort by weight + uf = UnionFind(n) + mst_weight = 0 + edges_used = 0 + + for weight, u, v in edges: + if uf.union(u, v): + mst_weight += weight + edges_used += 1 + if edges_used == n - 1: + break + + return mst_weight if edges_used == n - 1 else -1 + + + def accounts_merge(accounts: list[list[str]]) -> list[list[str]]: + """Merge accounts with common emails.""" + email_to_id = {} + email_to_name = {} + uf = UnionFind(len(accounts)) + + # Map emails to account indices + for i, account in enumerate(accounts): + name = account[0] + for email in account[1:]: + email_to_name[email] = name + if email in email_to_id: + uf.union(i, email_to_id[email]) + else: + email_to_id[email] = i + + # Group emails by root account + from collections import defaultdict + root_to_emails = defaultdict(set) + for email, idx in email_to_id.items(): + root = uf.find(idx) + root_to_emails[root].add(email) + + # Build result + return [[email_to_name[next(iter(emails))]] + sorted(emails) + for emails in root_to_emails.values()] + +recognition_signals: + - "connected components" + - "disjoint sets" + - "union" + - "groups" + - "merge accounts" + - "friend circles" + - "detect cycle undirected" + - "Kruskal" + - "minimum spanning tree" + - "redundant connection" + - "equivalence" + +common_mistakes: + - title: Forgetting path compression + description: | + Without path compression, repeated Find operations can be O(n) each, + degrading overall performance. + fix: | + Always compress paths during Find: + ```python + if self.parent[x] != x: + self.parent[x] = self.find(self.parent[x]) + ``` + + - title: Using Union-Find for directed graphs + description: | + Union-Find assumes undirected connections. For directed graphs, cycles + mean something different (back edges in DFS). + fix: | + Use DFS with coloring (WHITE/GRAY/BLACK) for cycle detection in directed + graphs. Union-Find is for undirected connectivity. + + - title: Not tracking component count + description: | + For problems asking "how many components," manually counting at the end + is inefficient. + fix: | + Decrement count in union when merging two different sets: + ```python + if root_x != root_y: + self.count -= 1 + ``` + + - title: Union returning wrong information + description: | + Some solutions need to know if a union actually merged two sets or if + they were already connected. + fix: | + Return boolean from union indicating if merge happened: + ```python + if root_x == root_y: + return False # Already same set + # ... do union ... + return True # Merged + ``` + +variations: + - name: Basic connectivity + description: | + Track whether elements are in the same connected component. + example: "Number of Connected Components, Friend Circles" + + - name: Cycle detection + description: | + If union is called on two already-connected elements, adding that edge + would create a cycle. + example: "Redundant Connection, Graph Valid Tree" + + - name: Kruskal's MST + description: | + Sort edges by weight, greedily add edges that don't create cycles + (checked via Union-Find). + example: "Min Cost to Connect All Points, Connecting Cities With Minimum Cost" + + - name: Dynamic connectivity + description: | + Handle streaming edge insertions while answering connectivity queries. + example: "Evaluate Division, Accounts Merge" + + - name: Weighted Union-Find + description: | + Track relative weights/distances between elements and their roots. + Used in problems with equivalence relationships. + example: "Evaluate Division (weighted paths)" + +related_patterns: + - dfs + - bfs + +prerequisite_patterns: []