From f2e4149e5278d0464484126e5780894bdbf44858 Mon Sep 17 00:00:00 2001 From: Kai Chappell Date: Mon, 18 Aug 2025 22:15:43 +0100 Subject: [PATCH] feat(patterns): pointer/array tutorials --- backend/data/patterns/fast-slow-pointers.yaml | 298 ++++++++++++++++++ backend/data/patterns/intervals.yaml | 275 ++++++++++++++++ .../data/patterns/linkedlist-reversal.yaml | 279 ++++++++++++++++ backend/data/patterns/prefix-sum.yaml | 245 ++++++++++++++ 4 files changed, 1097 insertions(+) create mode 100644 backend/data/patterns/fast-slow-pointers.yaml create mode 100644 backend/data/patterns/intervals.yaml create mode 100644 backend/data/patterns/linkedlist-reversal.yaml create mode 100644 backend/data/patterns/prefix-sum.yaml diff --git a/backend/data/patterns/fast-slow-pointers.yaml b/backend/data/patterns/fast-slow-pointers.yaml new file mode 100644 index 0000000..4649fb5 --- /dev/null +++ b/backend/data/patterns/fast-slow-pointers.yaml @@ -0,0 +1,298 @@ +name: Fast & Slow Pointers +slug: fast-slow-pointers +difficulty_level: 2 + +description: > + Use two pointers moving at different speeds to detect cycles, find midpoints, + or identify patterns in sequences. The fast pointer advances twice as quickly, + allowing detection of structural properties without extra space. + +when_to_use: | + - Detecting cycles in linked lists or sequences + - Finding the middle of a linked list + - Finding the start of a cycle + - Happy number problem + - Palindrome linked list verification + +metaphor: | + Imagine two runners on a circular track. If one runs twice as fast as the other, + the fast runner will eventually lap the slow runner—they'll meet at some point + on the track. This proves the track is circular (has a cycle). + + Another analogy: finding the middle of a line of people. Have two people start + at the front—one takes one step at a time, the other takes two. When the fast + person reaches the end, the slow person is at the middle. + +core_concept: | + The **fast & slow pointers** technique (also called Floyd's cycle detection or + "tortoise and hare") uses two pointers moving at different speeds: + + - **Slow pointer**: Moves 1 step at a time + - **Fast pointer**: Moves 2 steps at a time + + Key insights: + + 1. **Cycle detection**: In a cyclic structure, fast will eventually catch up to + slow (they'll meet inside the cycle). In a non-cyclic structure, fast will + reach the end. + + 2. **Finding middle**: When fast reaches the end, slow is at the middle (fast + traveled 2x the distance). + + 3. **Finding cycle start**: After detecting a cycle, reset one pointer to start. + Move both at the same speed—they meet at the cycle start. (Mathematical proof: + the distances work out perfectly.) + +visualization: | + **Cycle Detection:** + + ``` + List: 1 → 2 → 3 → 4 → 5 + ↑ ↓ + 7 ← 6 + + Step 1: slow=1, fast=1 + Step 2: slow=2, fast=3 + Step 3: slow=3, fast=5 + Step 4: slow=4, fast=7 + Step 5: slow=5, fast=4 + Step 6: slow=6, fast=6 ← They meet! Cycle exists. + ``` + + **Finding Middle:** + + ``` + List: 1 → 2 → 3 → 4 → 5 → null + + Step 1: slow=1, fast=1 + Step 2: slow=2, fast=3 + Step 3: slow=3, fast=5 + Step 4: fast reaches null + + slow is at middle (3) + ``` + + **Finding Cycle Start:** + + ``` + After detecting cycle at node X: + + 1. Reset slow to head, keep fast at meeting point + 2. Move both at same speed (1 step each) + 3. They meet at cycle start + + Why? Math: Let's say: + - Distance from head to cycle start = A + - Distance from cycle start to meeting point = B + - Cycle length = C + + At meeting: slow traveled A + B + fast traveled A + B + nC (some complete cycles) + + Since fast travels 2x: 2(A + B) = A + B + nC + Therefore: A + B = nC, so A = nC - B = (n-1)C + (C-B) + + This means: distance from head to cycle start + = distance from meeting point to cycle start (going forward) + ``` + +code_template: | + class ListNode: + def __init__(self, val=0, next=None): + self.val = val + self.next = next + + + def has_cycle(head: ListNode) -> bool: + """Detect if linked list has a cycle.""" + if not head or not head.next: + return False + + slow = head + fast = head + + while fast and fast.next: + slow = slow.next + fast = fast.next.next + + if slow == fast: + return True + + return False + + + def find_cycle_start(head: ListNode) -> ListNode: + """Find the node where the cycle begins.""" + if not head or not head.next: + return None + + # Phase 1: Detect cycle + slow = fast = head + while fast and fast.next: + slow = slow.next + fast = fast.next.next + if slow == fast: + break + else: + return None # No cycle + + # Phase 2: Find cycle start + slow = head + while slow != fast: + slow = slow.next + fast = fast.next + + return slow + + + def find_middle(head: ListNode) -> ListNode: + """Find the middle node of linked list.""" + if not head: + return None + + slow = fast = head + + while fast and fast.next: + slow = slow.next + fast = fast.next.next + + return slow # Middle (or second middle if even length) + + + def is_happy_number(n: int) -> bool: + """Check if number is happy (sum of squared digits eventually = 1).""" + def get_next(num: int) -> int: + total = 0 + while num > 0: + digit = num % 10 + total += digit * digit + num //= 10 + return total + + slow = n + fast = get_next(n) + + while fast != 1 and slow != fast: + slow = get_next(slow) + fast = get_next(get_next(fast)) + + return fast == 1 + + + def is_palindrome_linked_list(head: ListNode) -> bool: + """Check if linked list is a palindrome.""" + if not head or not head.next: + return True + + # Find middle + slow = fast = head + while fast and fast.next: + slow = slow.next + fast = fast.next.next + + # Reverse second half + prev = None + while slow: + next_node = slow.next + slow.next = prev + prev = slow + slow = next_node + + # Compare halves + left, right = head, prev + while right: + if left.val != right.val: + return False + left = left.next + right = right.next + + return True + +recognition_signals: + - "linked list cycle" + - "detect cycle" + - "find middle" + - "happy number" + - "palindrome linked list" + - "circular array" + - "Floyd's" + - "tortoise and hare" + - "meeting point" + - "cycle start" + +common_mistakes: + - title: Not checking fast.next before advancing + description: | + Accessing `fast.next.next` without first checking `fast.next` causes + null pointer errors when the list has even length. + fix: | + Always check both `fast` and `fast.next`: + ```python + while fast and fast.next: + fast = fast.next.next + ``` + + - title: Wrong initialization for cycle detection + description: | + Starting slow and fast at different positions (e.g., slow=head, fast=head.next) + changes the math for finding the cycle start. + fix: | + Start both at head for consistency. The algorithms are designed assuming + both start at the same position. + + - title: Forgetting to handle empty or single-node lists + description: | + Accessing head.next or head.next.next on empty or single-node lists + causes errors. + fix: | + Add early returns: + ```python + if not head or not head.next: + return False # or appropriate value + ``` + + - title: Confusing meeting point with cycle start + description: | + Returning the meeting point instead of finding the actual cycle start + gives the wrong answer. + fix: | + After detecting a cycle (meeting point), reset one pointer to head and + advance both at the same speed to find the cycle start. + +variations: + - name: Cycle detection + description: | + Determine if a linked list or sequence has a cycle. If fast catches slow, + there's a cycle. + example: "Linked List Cycle, Circular Array Loop" + + - name: Finding cycle start + description: | + After detecting a cycle, find the node where the cycle begins using the + two-phase approach. + example: "Linked List Cycle II" + + - name: Finding middle + description: | + When fast reaches the end, slow is at the middle. Useful for divide and + conquer on linked lists. + example: "Middle of Linked List, Sort List (merge sort needs middle)" + + - name: Happy number + description: | + Treat the sequence of digit-square sums as a linked list. Either reaches 1 + (happy) or cycles (unhappy). + example: "Happy Number" + + - name: Palindrome check + description: | + Find middle, reverse second half, compare. Combines finding middle with + linked list reversal. + example: "Palindrome Linked List" + +related_patterns: + - two-pointers + - linkedlist-reversal + +prerequisite_patterns: + - two-pointers diff --git a/backend/data/patterns/intervals.yaml b/backend/data/patterns/intervals.yaml new file mode 100644 index 0000000..d593a65 --- /dev/null +++ b/backend/data/patterns/intervals.yaml @@ -0,0 +1,275 @@ +name: Overlapping Intervals +slug: intervals +difficulty_level: 2 + +description: > + Process and manipulate intervals (ranges) that may share common regions. + The key insight is that sorting intervals by start time allows efficient + detection and handling of overlaps through a single linear pass. + +when_to_use: | + - Merging overlapping intervals + - Inserting an interval into a sorted list + - Finding gaps between intervals + - Meeting room scheduling + - Finding interval intersections + +metaphor: | + Imagine scheduling meeting rooms. Each meeting is an interval of time. When + two meetings overlap, you need either two rooms or to merge them into one + longer booking. By sorting meetings by start time, you can easily spot + overlaps—if the next meeting starts before the current one ends, they overlap. + + Another analogy: merging overlapping highlighter marks on a page. Sort them + left to right, and if one mark starts before the previous ends, combine them + into one continuous highlight. + +core_concept: | + The **interval pattern** relies on sorting intervals by start time. Once sorted, + overlapping detection becomes simple: + + **Two intervals [a, b] and [c, d] overlap if c <= b** (assuming a <= c after sorting) + + Key operations: + 1. **Merge**: Extend the current interval's end to include overlapping intervals + 2. **Insert**: Find where the new interval overlaps and merge as needed + 3. **Count overlaps**: Track how many intervals are "active" at any point + + For problems needing simultaneous tracking (like minimum meeting rooms), use + a **sweep line** approach: sort all start and end points together, then sweep + through counting active intervals. + +visualization: | + **Merging overlapping intervals:** + + ``` + Input: [[1,3], [2,6], [8,10], [15,18]] + (sorted by start) + + Process [1,3]: result = [[1,3]] + + Process [2,6]: 2 <= 3? Yes, overlap! + Merge: [1, max(3,6)] = [1,6] + result = [[1,6]] + + Process [8,10]: 8 <= 6? No, no overlap + result = [[1,6], [8,10]] + + Process [15,18]: 15 <= 10? No, no overlap + result = [[1,6], [8,10], [15,18]] + ``` + + **Visualized on number line:** + + ``` + Before: [1---3] + [2------6] + [8--10] + [15--18] + + After: [1--------6] [8--10] [15--18] + ``` + + **Meeting rooms (sweep line):** + + ``` + Meetings: [[0,30], [5,10], [15,20]] + + Events (sorted): + time=0: +1 (start) active=1 + time=5: +1 (start) active=2 ← max + time=10: -1 (end) active=1 + time=15: +1 (start) active=2 ← max + time=20: -1 (end) active=1 + time=30: -1 (end) active=0 + + Max concurrent = 2 → need 2 meeting rooms + ``` + +code_template: | + def merge_intervals(intervals: list[list[int]]) -> list[list[int]]: + """Merge overlapping intervals.""" + if not intervals: + return [] + + # Sort by start time + intervals.sort(key=lambda x: x[0]) + + result = [intervals[0]] + + for start, end in intervals[1:]: + last_end = result[-1][1] + + if start <= last_end: # Overlap + result[-1][1] = max(last_end, end) # Extend + else: + result.append([start, end]) + + return result + + + def insert_interval(intervals: list[list[int]], + new: list[int]) -> list[list[int]]: + """Insert and merge a new interval into sorted list.""" + result = [] + i = 0 + n = len(intervals) + + # Add all intervals before new interval + while i < n and intervals[i][1] < new[0]: + result.append(intervals[i]) + i += 1 + + # Merge overlapping intervals with new + while i < n and intervals[i][0] <= new[1]: + new[0] = min(new[0], intervals[i][0]) + new[1] = max(new[1], intervals[i][1]) + i += 1 + + result.append(new) + + # Add remaining intervals + while i < n: + result.append(intervals[i]) + i += 1 + + return result + + + def min_meeting_rooms(intervals: list[list[int]]) -> int: + """Find minimum meeting rooms needed (sweep line).""" + events = [] + + for start, end in intervals: + events.append((start, 1)) # +1 for start + events.append((end, -1)) # -1 for end + + # Sort by time, with ends before starts at same time + events.sort(key=lambda x: (x[0], x[1])) + + max_rooms = 0 + current_rooms = 0 + + for _, delta in events: + current_rooms += delta + max_rooms = max(max_rooms, current_rooms) + + return max_rooms + + + def interval_intersection(A: list[list[int]], + B: list[list[int]]) -> list[list[int]]: + """Find intersection of two sorted interval lists.""" + result = [] + i = j = 0 + + while i < len(A) and j < len(B): + # Find overlap + start = max(A[i][0], B[j][0]) + end = min(A[i][1], B[j][1]) + + if start <= end: + result.append([start, end]) + + # Advance the interval that ends first + if A[i][1] < B[j][1]: + i += 1 + else: + j += 1 + + return result + + + def can_attend_all(intervals: list[list[int]]) -> bool: + """Check if a person can attend all meetings.""" + intervals.sort(key=lambda x: x[0]) + + for i in range(1, len(intervals)): + if intervals[i][0] < intervals[i-1][1]: + return False # Overlap found + + return True + +recognition_signals: + - "intervals" + - "merge" + - "overlapping" + - "meeting rooms" + - "schedule" + - "time slots" + - "range" + - "insert interval" + - "non-overlapping" + - "intersection" + +common_mistakes: + - title: Not sorting first + description: | + Trying to process unsorted intervals leads to incorrect results because + overlaps aren't detected properly. + fix: | + Always sort intervals by start time before processing: + ```python + intervals.sort(key=lambda x: x[0]) + ``` + + - title: Wrong overlap condition + description: | + Using `start < last_end` instead of `start <= last_end` misses adjacent + intervals that should be merged (like [1,2] and [2,3]). + fix: | + Use `<=` for touching intervals, `<` for strict overlap only. Check problem + requirements for whether touching counts as overlapping. + + - title: Not updating end correctly when merging + description: | + Setting `end = new_end` instead of `end = max(old_end, new_end)` fails when + a smaller interval is contained within a larger one. + fix: | + Always take the maximum: + ```python + result[-1][1] = max(result[-1][1], end) + ``` + + - title: Off-by-one with closed vs open intervals + description: | + Confusion about whether interval endpoints are inclusive `[a, b]` or + exclusive `[a, b)` causes incorrect overlap detection. + fix: | + Clarify the convention from the problem. Most problems use closed intervals + where both endpoints are included. + +variations: + - name: Merge intervals + description: | + Combine all overlapping intervals into non-overlapping intervals. + example: "Merge Intervals" + + - name: Insert interval + description: | + Insert a new interval into a sorted list, merging with any overlapping + intervals. + example: "Insert Interval" + + - name: Meeting rooms + description: | + Find minimum resources needed for concurrent intervals using sweep line + or min-heap. + example: "Meeting Rooms II, Car Pooling" + + - name: Interval intersection + description: | + Find common regions between two lists of intervals using two pointers. + example: "Interval List Intersections" + + - name: Non-overlapping intervals + description: | + Find minimum removals to make all intervals non-overlapping. Greedy + approach: keep intervals that end earliest. + example: "Non-overlapping Intervals, Erase Overlap Intervals" + +related_patterns: + - greedy + - two-pointers + +prerequisite_patterns: [] diff --git a/backend/data/patterns/linkedlist-reversal.yaml b/backend/data/patterns/linkedlist-reversal.yaml new file mode 100644 index 0000000..941b114 --- /dev/null +++ b/backend/data/patterns/linkedlist-reversal.yaml @@ -0,0 +1,279 @@ +name: LinkedList In-Place Reversal +slug: linkedlist-reversal +difficulty_level: 2 + +description: > + Reverse linked list nodes in-place by manipulating pointers without allocating + extra space. This technique uses three pointers to track the previous, current, + and next nodes while systematically reversing the direction of links. + +when_to_use: | + - Reversing an entire linked list + - Reversing a portion of a linked list + - Reversing in groups of K nodes + - Palindrome linked list verification + - Reordering list problems + +metaphor: | + Imagine a conga line where everyone faces forward. To reverse it, you don't + rearrange people—you have each person turn around and grab the shoulders of + whoever was behind them. You process one person at a time: they turn around, + the next person steps forward, and so on until everyone faces the opposite direction. + + Another analogy: reversing a chain of paper clips. You unclip each one from its + forward neighbor and clip it to its backward neighbor, working through the chain. + +core_concept: | + Linked list reversal uses **three pointers** moving through the list: + + - **prev**: Points to the already-reversed portion (starts as null) + - **curr**: The node currently being processed + - **next**: Temporarily stores the next node before we break the link + + At each step: + 1. Save `curr.next` in `next` (before we lose it) + 2. Reverse the link: `curr.next = prev` + 3. Advance: `prev = curr`, `curr = next` + + The key insight is that we're not moving nodes—we're redirecting pointers. + This achieves O(n) time with O(1) space. + + For **partial reversal** (reversing between positions m and n), we: + 1. Navigate to position m-1 (the node before reversal starts) + 2. Reverse nodes from m to n + 3. Reconnect the reversed portion to the rest of the list + +visualization: | + **Full list reversal:** + + ``` + Initial: 1 → 2 → 3 → 4 → null + prev=null, curr=1 + + Step 1: Save next=2, reverse 1's link + null ← 1 2 → 3 → 4 + prev curr + + Step 2: Save next=3, reverse 2's link + null ← 1 ← 2 3 → 4 + prev curr + + Step 3: Save next=4, reverse 3's link + null ← 1 ← 2 ← 3 4 + prev curr + + Step 4: Save next=null, reverse 4's link + null ← 1 ← 2 ← 3 ← 4 + prev curr=null + + Result: 4 → 3 → 2 → 1 → null + ``` + + **Partial reversal (positions 2 to 4):** + + ``` + Initial: 1 → 2 → 3 → 4 → 5 + positions: 1 2 3 4 5 + + Goal: 1 → 4 → 3 → 2 → 5 + + Step 1: Find node before position 2 + before = node 1 + + Step 2: Reverse nodes 2, 3, 4 + 1 null ← 2 ← 3 ← 4 5 + ↑ ↑ ↑ + before prev curr + + Step 3: Reconnect + before.next.next = curr (2 → 5) + before.next = prev (1 → 4) + + Result: 1 → 4 → 3 → 2 → 5 + ``` + +code_template: | + class ListNode: + def __init__(self, val=0, next=None): + self.val = val + self.next = next + + + def reverse_list(head: ListNode) -> ListNode: + """Reverse entire linked list.""" + prev = None + curr = head + + while curr: + next_node = curr.next # Save next + curr.next = prev # Reverse link + prev = curr # Advance prev + curr = next_node # Advance curr + + return prev # New head + + + def reverse_between(head: ListNode, m: int, n: int) -> ListNode: + """Reverse nodes from position m to n (1-indexed).""" + if not head or m == n: + return head + + dummy = ListNode(0) + dummy.next = head + before = dummy + + # Move to node before reversal starts + for _ in range(m - 1): + before = before.next + + # Reverse n - m + 1 nodes + prev = None + curr = before.next + for _ in range(n - m + 1): + next_node = curr.next + curr.next = prev + prev = curr + curr = next_node + + # Reconnect + before.next.next = curr # tail of reversed → rest of list + before.next = prev # before → new head of reversed + + return dummy.next + + + def reverse_k_group(head: ListNode, k: int) -> ListNode: + """Reverse nodes in groups of k.""" + # Count total nodes + count = 0 + node = head + while node: + count += 1 + node = node.next + + dummy = ListNode(0) + dummy.next = head + before = dummy + + while count >= k: + # Reverse k nodes + prev = None + curr = before.next + for _ in range(k): + next_node = curr.next + curr.next = prev + prev = curr + curr = next_node + + # Reconnect + tail = before.next + tail.next = curr + before.next = prev + + # Move to next group + before = tail + count -= k + + return dummy.next + + + def reverse_list_recursive(head: ListNode) -> ListNode: + """Reverse list using recursion.""" + if not head or not head.next: + return head + + new_head = reverse_list_recursive(head.next) + head.next.next = head # Reverse link + head.next = None # Prevent cycle + + return new_head + +recognition_signals: + - "reverse linked list" + - "reverse between" + - "reverse in groups" + - "reverse k-group" + - "palindrome linked list" + - "reorder list" + - "swap nodes" + - "rotate list" + +common_mistakes: + - title: Losing reference to next node + description: | + Reversing `curr.next` before saving it means you can't advance to the + next node. + fix: | + Always save the next node first: + ```python + next_node = curr.next # Save FIRST + curr.next = prev # Then reverse + ``` + + - title: Forgetting to update connections in partial reversal + description: | + Reversing the middle portion without reconnecting it to the beginning + and end of the list breaks the list. + fix: | + After reversing, reconnect both ends: + ```python + before.next.next = curr # tail → rest + before.next = prev # before → new head + ``` + + - title: Not using a dummy node + description: | + When the reversal might include the head, handling the head separately + adds complexity and edge cases. + fix: | + Use a dummy node pointing to head. This simplifies edge cases: + ```python + dummy = ListNode(0) + dummy.next = head + # ... reversal logic ... + return dummy.next + ``` + + - title: Off-by-one with positions + description: | + Confusing 0-indexed vs 1-indexed positions causes reversal of wrong nodes. + fix: | + Clarify indexing convention. For 1-indexed positions, loop `m-1` times + to reach the node *before* position m. + +variations: + - name: Full reversal + description: | + Reverse the entire linked list. Simplest form—just walk through and + reverse each link. + example: "Reverse Linked List" + + - name: Partial reversal + description: | + Reverse only nodes between positions m and n. Need to track connection + points before and after the reversed section. + example: "Reverse Linked List II" + + - name: K-group reversal + description: | + Reverse every k consecutive nodes. Often requires counting total nodes + first to know when to stop. + example: "Reverse Nodes in k-Group" + + - name: Alternating reversal + description: | + Reverse every other group of k nodes. Combines k-group logic with + skip logic. + example: "Reverse Alternate K Nodes" + + - name: Recursive reversal + description: | + Elegant recursive solution that reverses by relying on the recursive + call to reverse the rest, then fixing up the current node. + example: "Reverse Linked List (recursive approach)" + +related_patterns: + - fast-slow-pointers + - two-pointers + +prerequisite_patterns: [] diff --git a/backend/data/patterns/prefix-sum.yaml b/backend/data/patterns/prefix-sum.yaml new file mode 100644 index 0000000..a0ee79b --- /dev/null +++ b/backend/data/patterns/prefix-sum.yaml @@ -0,0 +1,245 @@ +name: Prefix Sum +slug: prefix-sum +difficulty_level: 2 + +description: > + Precompute cumulative sums to answer range sum queries in O(1) time. This + transforms repeated O(n) range calculations into O(n) preprocessing plus + O(1) per query, making it essential for problems involving subarray sums. + +when_to_use: | + - Range sum queries + - Subarray sum equals target + - Count subarrays with given sum + - Product of array except self + - 2D matrix region sums + +metaphor: | + Imagine tracking your total running distance over a year. Instead of adding up + daily distances each time someone asks "how far did you run from day 50 to day + 75?", you keep a running total. The cumulative distance on day 75 minus day 49 + instantly gives you the answer. + + Another analogy: a bank account balance. To find how much you spent between + two dates, you subtract the earlier balance from the later balance—no need to + sum individual transactions. + +core_concept: | + A **prefix sum array** stores cumulative sums where `prefix[i]` = sum of all + elements from index 0 to i-1. This enables O(1) range sum queries: + + **sum(i, j) = prefix[j+1] - prefix[i]** + + The key insight is that any range sum can be computed from two prefix sums. + Instead of iterating through the range (O(n) per query), we do O(n) preprocessing + once and answer unlimited queries in O(1) each. + + This pattern extends to: + - **Prefix products**: For multiplication-based problems + - **Prefix counts**: For counting occurrences + - **2D prefix sums**: For matrix region queries + - **Prefix sum + hash map**: For "subarray sum equals K" + +visualization: | + **Building prefix sum array:** + + ``` + Array: [3, 1, 4, 1, 5, 9] + Index: 0 1 2 3 4 5 + + prefix[0] = 0 (empty prefix) + prefix[1] = 0 + 3 = 3 (sum of first 1 element) + prefix[2] = 3 + 1 = 4 (sum of first 2 elements) + prefix[3] = 4 + 4 = 8 + prefix[4] = 8 + 1 = 9 + prefix[5] = 9 + 5 = 14 + prefix[6] = 14 + 9 = 23 + + Prefix: [0, 3, 4, 8, 9, 14, 23] + Index: 0 1 2 3 4 5 6 + ``` + + **Range sum query: sum(2, 4) = elements at indices 2, 3, 4** + + ``` + sum(2, 4) = prefix[5] - prefix[2] + = 14 - 4 + = 10 + + Verification: arr[2] + arr[3] + arr[4] = 4 + 1 + 5 = 10 ✓ + ``` + + **Subarray sum equals K using hash map:** + + ``` + Array: [1, 2, 3, -2, 5] K = 4 + + As we iterate, track prefix sums and look for prefix_sum - K: + + i=0: prefix=1, need 1-4=-3, not found + i=1: prefix=3, need 3-4=-1, not found + i=2: prefix=6, need 6-4=2, not found + i=3: prefix=4, need 4-4=0, found! (empty prefix) + → subarray [0:4] sums to 4 + i=4: prefix=9, need 9-4=5, not found + + Wait, also: prefix at i=1 is 3, at i=4 is 9-4=5... let me recalculate + Actually arr[1:3] = 2+3-2=3, arr[2:4]=3-2+5=6... checking for K=4: + subarray [0:4] → 1+2+3-2=4 ✓ + ``` + +code_template: | + def build_prefix_sum(arr: list[int]) -> list[int]: + """Build prefix sum array. prefix[i] = sum of arr[0:i].""" + prefix = [0] * (len(arr) + 1) + for i in range(len(arr)): + prefix[i + 1] = prefix[i] + arr[i] + return prefix + + + def range_sum(prefix: list[int], i: int, j: int) -> int: + """Sum of elements from index i to j (inclusive).""" + return prefix[j + 1] - prefix[i] + + + def subarray_sum_equals_k(nums: list[int], k: int) -> int: + """Count subarrays with sum equal to k.""" + count = 0 + prefix_sum = 0 + # Map: prefix_sum -> count of occurrences + sum_count = {0: 1} # Empty prefix has sum 0 + + for num in nums: + prefix_sum += num + + # If (prefix_sum - k) exists, those prefixes form valid subarrays + if prefix_sum - k in sum_count: + count += sum_count[prefix_sum - k] + + # Record current prefix sum + sum_count[prefix_sum] = sum_count.get(prefix_sum, 0) + 1 + + return count + + + def product_except_self(nums: list[int]) -> list[int]: + """Product of array except self without division.""" + n = len(nums) + result = [1] * n + + # Prefix products (left to right) + prefix = 1 + for i in range(n): + result[i] = prefix + prefix *= nums[i] + + # Suffix products (right to left) + suffix = 1 + for i in range(n - 1, -1, -1): + result[i] *= suffix + suffix *= nums[i] + + return result + + + def matrix_region_sum(matrix: list[list[int]], + row1: int, col1: int, + row2: int, col2: int) -> int: + """2D prefix sum for matrix region queries.""" + # Build 2D prefix sum + m, n = len(matrix), len(matrix[0]) + prefix = [[0] * (n + 1) for _ in range(m + 1)] + + for i in range(1, m + 1): + for j in range(1, n + 1): + prefix[i][j] = (matrix[i-1][j-1] + + prefix[i-1][j] + + prefix[i][j-1] + - prefix[i-1][j-1]) + + # Query region sum using inclusion-exclusion + return (prefix[row2+1][col2+1] + - prefix[row1][col2+1] + - prefix[row2+1][col1] + + prefix[row1][col1]) + +recognition_signals: + - "range sum" + - "subarray sum" + - "cumulative" + - "sum equals k" + - "count subarrays" + - "product except self" + - "matrix region sum" + - "running total" + - "continuous subarray" + +common_mistakes: + - title: Off-by-one in prefix array indexing + description: | + Confusion about whether prefix[i] includes arr[i] or not leads to + incorrect range sums. + fix: | + Convention: `prefix[i]` = sum of first i elements = `arr[0:i]`. + So `prefix[0] = 0` (empty), and range sum is `prefix[j+1] - prefix[i]`. + + - title: Forgetting the empty prefix for "sum equals K" + description: | + Not initializing the hash map with `{0: 1}` misses subarrays starting + from index 0. + fix: | + Always initialize: `sum_count = {0: 1}`. This handles the case where the + subarray from the beginning has sum K. + + - title: Integer overflow with large sums + description: | + Prefix sums can grow very large when array elements are big, causing + overflow in some languages. + fix: | + In Python this isn't an issue. In Java/C++, use `long` for prefix sums + or check constraints carefully. + + - title: Not handling negative numbers + description: | + Prefix sum works with negative numbers, but some may expect only positive + sums and use wrong optimization (like sliding window). + fix: | + Prefix sum handles negatives correctly. But problems like "minimum sum + subarray of size k" need different approaches when negatives are present. + +variations: + - name: Basic prefix sum + description: | + Precompute cumulative sums for O(1) range queries. Foundation for other + variations. + example: "Range Sum Query - Immutable, Running Sum of 1d Array" + + - name: Prefix sum with hash map + description: | + Track prefix sums in a hash map to find subarrays summing to a target. + Key insight: `prefix[j] - prefix[i] = target` means subarray [i,j] works. + example: "Subarray Sum Equals K, Contiguous Array" + + - name: Prefix product + description: | + Same idea but with multiplication. Watch for zeros and consider using + left/right products separately. + example: "Product of Array Except Self" + + - name: 2D prefix sum + description: | + Extend to matrices for O(1) region sum queries. Uses inclusion-exclusion + principle. + example: "Range Sum Query 2D, Matrix Block Sum" + + - name: Difference array + description: | + Inverse of prefix sum. Store differences to support O(1) range updates. + Prefix sum of difference array gives original. + example: "Range Addition, Corporate Flight Bookings" + +related_patterns: + - sliding-window + - two-pointers + +prerequisite_patterns: []