From f757e28b247bcd4a23d64c0994aa6de1e8812668 Mon Sep 17 00:00:00 2001 From: Kai Chappell Date: Sat, 24 May 2025 21:40:39 +0100 Subject: [PATCH] questions A (01-matrix - avoid-flood) --- backend/data/questions/01-matrix.yaml | 185 +++++++++++ backend/data/questions/132-pattern.yaml | 227 ++++++++++++++ backend/data/questions/2-keys-keyboard.yaml | 169 ++++++++++ backend/data/questions/24-game.yaml | 294 +++++++++++++++++ .../questions/3sum-with-multiplicity.yaml | 227 ++++++++++++++ backend/data/questions/4sum-ii.yaml | 166 ++++++++++ .../a-number-after-a-double-reversal.yaml | 151 +++++++++ backend/data/questions/accounts-merge.yaml | 295 ++++++++++++++++++ backend/data/questions/add-binary.yaml | 185 +++++++++++ backend/data/questions/add-digits.yaml | 203 ++++++++++++ .../add-minimum-number-of-rungs.yaml | 160 ++++++++++ .../data/questions/add-one-row-to-tree.yaml | 241 ++++++++++++++ backend/data/questions/add-strings.yaml | 197 ++++++++++++ .../add-to-array-form-of-integer.yaml | 181 +++++++++++ backend/data/questions/add-two-integers.yaml | 110 +++++++ .../data/questions/add-two-numbers-ii.yaml | 222 +++++++++++++ backend/data/questions/add-two-numbers.yaml | 185 +++++++++++ .../questions/adding-spaces-to-a-string.yaml | 183 +++++++++++ .../adding-two-negabinary-numbers.yaml | 224 +++++++++++++ backend/data/questions/additive-number.yaml | 223 +++++++++++++ backend/data/questions/advantage-shuffle.yaml | 199 ++++++++++++ .../airplane-seat-assignment-probability.yaml | 200 ++++++++++++ .../data/questions/alert-using-same-key.yaml | 200 ++++++++++++ ...ns-with-highest-score-of-binary-array.yaml | 186 +++++++++++ ...l-elements-in-two-binary-search-trees.yaml | 214 +++++++++++++ .../all-nodes-distance-k-in-binary-tree.yaml | 223 +++++++++++++ .../all-paths-from-source-to-target.yaml | 200 ++++++++++++ .../all-possible-full-binary-trees.yaml | 215 +++++++++++++ .../data/questions/allocate-mailboxes.yaml | 252 +++++++++++++++ .../data/questions/alphabet-board-path.yaml | 236 ++++++++++++++ .../data/questions/alternating-digit-sum.yaml | 171 ++++++++++ .../data/questions/ambiguous-coordinates.yaml | 232 ++++++++++++++ ...f-time-for-binary-tree-to-be-infected.yaml | 227 ++++++++++++++ .../angle-between-hands-of-a-clock.yaml | 155 +++++++++ ...racters-to-string-to-make-subsequence.yaml | 165 ++++++++++ .../append-k-integers-with-minimal-sum.yaml | 191 ++++++++++++ ...wise-operations-to-make-strings-equal.yaml | 158 ++++++++++ .../apply-operations-to-an-array.yaml | 176 +++++++++++ .../arithmetic-slices-ii-subsequence.yaml | 209 +++++++++++++ backend/data/questions/arithmetic-slices.yaml | 180 +++++++++++ .../data/questions/arithmetic-subarrays.yaml | 214 +++++++++++++ backend/data/questions/arranging-coins.yaml | 170 ++++++++++ backend/data/questions/array-nesting.yaml | 218 +++++++++++++ .../questions/array-of-doubled-pairs.yaml | 172 ++++++++++ backend/data/questions/array-partition.yaml | 169 ++++++++++ ...nts-not-equal-to-average-of-neighbors.yaml | 200 ++++++++++++ .../as-far-from-land-as-possible.yaml | 201 ++++++++++++ backend/data/questions/assign-cookies.yaml | 174 +++++++++++ .../data/questions/asteroid-collision.yaml | 206 ++++++++++++ .../available-captures-for-rook.yaml | 152 +++++++++ .../average-of-levels-in-binary-tree.yaml | 200 ++++++++++++ ...-salary-excluding-minimum-and-maximum.yaml | 173 ++++++++++ ...ue-of-even-numbers-divisible-by-three.yaml | 141 +++++++++ .../data/questions/average-waiting-time.yaml | 175 +++++++++++ .../questions/avoid-flood-in-the-city.yaml | 231 ++++++++++++++ 55 files changed, 10813 insertions(+) create mode 100644 backend/data/questions/01-matrix.yaml create mode 100644 backend/data/questions/132-pattern.yaml create mode 100644 backend/data/questions/2-keys-keyboard.yaml create mode 100644 backend/data/questions/24-game.yaml create mode 100644 backend/data/questions/3sum-with-multiplicity.yaml create mode 100644 backend/data/questions/4sum-ii.yaml create mode 100644 backend/data/questions/a-number-after-a-double-reversal.yaml create mode 100644 backend/data/questions/accounts-merge.yaml create mode 100644 backend/data/questions/add-binary.yaml create mode 100644 backend/data/questions/add-digits.yaml create mode 100644 backend/data/questions/add-minimum-number-of-rungs.yaml create mode 100644 backend/data/questions/add-one-row-to-tree.yaml create mode 100644 backend/data/questions/add-strings.yaml create mode 100644 backend/data/questions/add-to-array-form-of-integer.yaml create mode 100644 backend/data/questions/add-two-integers.yaml create mode 100644 backend/data/questions/add-two-numbers-ii.yaml create mode 100644 backend/data/questions/add-two-numbers.yaml create mode 100644 backend/data/questions/adding-spaces-to-a-string.yaml create mode 100644 backend/data/questions/adding-two-negabinary-numbers.yaml create mode 100644 backend/data/questions/additive-number.yaml create mode 100644 backend/data/questions/advantage-shuffle.yaml create mode 100644 backend/data/questions/airplane-seat-assignment-probability.yaml create mode 100644 backend/data/questions/alert-using-same-key.yaml create mode 100644 backend/data/questions/all-divisions-with-highest-score-of-binary-array.yaml create mode 100644 backend/data/questions/all-elements-in-two-binary-search-trees.yaml create mode 100644 backend/data/questions/all-nodes-distance-k-in-binary-tree.yaml create mode 100644 backend/data/questions/all-paths-from-source-to-target.yaml create mode 100644 backend/data/questions/all-possible-full-binary-trees.yaml create mode 100644 backend/data/questions/allocate-mailboxes.yaml create mode 100644 backend/data/questions/alphabet-board-path.yaml create mode 100644 backend/data/questions/alternating-digit-sum.yaml create mode 100644 backend/data/questions/ambiguous-coordinates.yaml create mode 100644 backend/data/questions/amount-of-time-for-binary-tree-to-be-infected.yaml create mode 100644 backend/data/questions/angle-between-hands-of-a-clock.yaml create mode 100644 backend/data/questions/append-characters-to-string-to-make-subsequence.yaml create mode 100644 backend/data/questions/append-k-integers-with-minimal-sum.yaml create mode 100644 backend/data/questions/apply-bitwise-operations-to-make-strings-equal.yaml create mode 100644 backend/data/questions/apply-operations-to-an-array.yaml create mode 100644 backend/data/questions/arithmetic-slices-ii-subsequence.yaml create mode 100644 backend/data/questions/arithmetic-slices.yaml create mode 100644 backend/data/questions/arithmetic-subarrays.yaml create mode 100644 backend/data/questions/arranging-coins.yaml create mode 100644 backend/data/questions/array-nesting.yaml create mode 100644 backend/data/questions/array-of-doubled-pairs.yaml create mode 100644 backend/data/questions/array-partition.yaml create mode 100644 backend/data/questions/array-with-elements-not-equal-to-average-of-neighbors.yaml create mode 100644 backend/data/questions/as-far-from-land-as-possible.yaml create mode 100644 backend/data/questions/assign-cookies.yaml create mode 100644 backend/data/questions/asteroid-collision.yaml create mode 100644 backend/data/questions/available-captures-for-rook.yaml create mode 100644 backend/data/questions/average-of-levels-in-binary-tree.yaml create mode 100644 backend/data/questions/average-salary-excluding-minimum-and-maximum.yaml create mode 100644 backend/data/questions/average-value-of-even-numbers-divisible-by-three.yaml create mode 100644 backend/data/questions/average-waiting-time.yaml create mode 100644 backend/data/questions/avoid-flood-in-the-city.yaml diff --git a/backend/data/questions/01-matrix.yaml b/backend/data/questions/01-matrix.yaml new file mode 100644 index 0000000..c5fb26d --- /dev/null +++ b/backend/data/questions/01-matrix.yaml @@ -0,0 +1,185 @@ +title: 01 Matrix +slug: 01-matrix +difficulty: medium +leetcode_id: 542 +leetcode_url: https://leetcode.com/problems/01-matrix/ +categories: + - arrays + - graphs +patterns: + - bfs + - matrix-traversal + - dynamic-programming + +description: | + Given an `m x n` binary matrix `mat`, return *the distance of the nearest* `0` *for each cell*. + + The distance between two cells sharing a common edge is `1`. + +constraints: | + - `m == mat.length` + - `n == mat[i].length` + - `1 <= m, n <= 10^4` + - `1 <= m * n <= 10^4` + - `mat[i][j]` is either `0` or `1` + - There is at least one `0` in `mat` + +examples: + - input: "mat = [[0,0,0],[0,1,0],[0,0,0]]" + output: "[[0,0,0],[0,1,0],[0,0,0]]" + explanation: "The center cell has value 1, and its nearest 0 is any of its four adjacent cells, so its distance is 1. All other cells are already 0." + - input: "mat = [[0,0,0],[0,1,0],[1,1,1]]" + output: "[[0,0,0],[0,1,0],[1,2,1]]" + explanation: "The bottom-middle cell (1,1) has distance 2 because its nearest 0 is two steps away (e.g., up then up, or up then left/right)." + +explanation: + intuition: | + Imagine you're standing at each `1` cell and need to find the shortest path to any `0` cell. This sounds like a shortest path problem, and in an unweighted grid, **BFS** finds shortest paths. + + The key insight is to **reverse the perspective**: instead of starting from each `1` and searching for `0`s (which would be inefficient), start from **all `0` cells simultaneously** and expand outward. Think of it like dropping stones into a pond at every `0` position — the ripples spread outward, and each `1` cell records when the first ripple reaches it. + + This is called **multi-source BFS**. By starting from all sources at once, each cell is visited exactly once, and the first time we reach any cell is guaranteed to be via the shortest path from some `0`. + + Alternatively, you can solve this with **dynamic programming** using two passes: one top-left to bottom-right, and one bottom-right to top-left. Each pass propagates minimum distances from the directions already processed. + + approach: | + We solve this using **Multi-source BFS**: + + **Step 1: Initialise the result matrix and queue** + + - Create a `dist` matrix of the same size as `mat` + - Set `dist[i][j] = 0` for all cells where `mat[i][j] == 0`, and add these to the BFS queue + - Set `dist[i][j] = infinity` for all cells where `mat[i][j] == 1` + +   + + **Step 2: Perform BFS from all zeros simultaneously** + + - While the queue is not empty, dequeue a cell `(r, c)` + - For each of its four neighbours `(nr, nc)`: + - If `dist[r][c] + 1 < dist[nr][nc]`, we found a shorter path + - Update `dist[nr][nc] = dist[r][c] + 1` + - Add `(nr, nc)` to the queue + +   + + **Step 3: Return the result** + + - Return the `dist` matrix after BFS completes + +   + + The BFS guarantees that when we first reach a cell, it's via the shortest path from some zero. Since all zeros start at distance 0 and expand level by level, distance 1 cells are processed before distance 2 cells, and so on. + + common_pitfalls: + - title: Running BFS from Each Cell + description: | + A naive approach is to run BFS from each `1` cell to find the nearest `0`. This results in **O((m*n)^2)** time complexity in the worst case. + + With the constraint `m * n <= 10^4`, this means up to 100 million operations — likely too slow. + + Multi-source BFS visits each cell exactly once, achieving **O(m*n)** time. + wrong_approach: "Separate BFS from each 1 cell" + correct_approach: "Multi-source BFS starting from all 0 cells" + + - title: Forgetting to Mark Visited Cells + description: | + If you don't track which cells have been processed, you might add the same cell to the queue multiple times, leading to incorrect results or infinite loops. + + Using the `dist` matrix itself handles this: a cell is "visited" when its distance is no longer infinity. Only unvisited cells with infinite distance are added to the queue. + wrong_approach: "No visited tracking, cells re-added to queue" + correct_approach: "Use distance matrix to track visited status" + + - title: Incorrect Neighbour Bounds + description: | + When exploring neighbours, forgetting to check matrix bounds causes index errors. + + Always verify `0 <= nr < m` and `0 <= nc < n` before accessing `mat[nr][nc]`. + + key_takeaways: + - "**Multi-source BFS**: When finding shortest distances from multiple sources, start BFS from all sources simultaneously rather than running separate searches" + - "**Reverse the search direction**: Instead of searching from each target to sources, search from sources to targets — often more efficient" + - "**Level-by-level guarantee**: BFS processes cells in order of distance, so the first time you reach a cell is via the shortest path" + - "**Related problems**: This pattern applies to problems like 'Map of Highest Peak', 'Walls and Gates', and 'Rotting Oranges'" + + time_complexity: "O(m * n). Each cell is visited exactly once during BFS." + space_complexity: "O(m * n). We store the distance matrix and the BFS queue, which in the worst case contains all cells." + +solutions: + - approach_name: Multi-source BFS + is_optimal: true + code: | + from collections import deque + + def update_matrix(mat: list[list[int]]) -> list[list[int]]: + m, n = len(mat), len(mat[0]) + # Initialise distances: 0 for zeros, infinity for ones + dist = [[0 if mat[i][j] == 0 else float('inf') + for j in range(n)] for i in range(m)] + + # Queue all zero cells as starting points + queue = deque() + for i in range(m): + for j in range(n): + if mat[i][j] == 0: + queue.append((i, j)) + + # Four directions: up, down, left, right + directions = [(-1, 0), (1, 0), (0, -1), (0, 1)] + + # BFS from all zeros simultaneously + while queue: + r, c = queue.popleft() + for dr, dc in directions: + nr, nc = r + dr, c + dc + # Check bounds and if we found a shorter path + if 0 <= nr < m and 0 <= nc < n: + if dist[r][c] + 1 < dist[nr][nc]: + dist[nr][nc] = dist[r][c] + 1 + queue.append((nr, nc)) + + return dist + explanation: | + **Time Complexity:** O(m * n) — Each cell is enqueued and processed exactly once. + + **Space Complexity:** O(m * n) — For the distance matrix and BFS queue. + + We start BFS from all zero cells simultaneously. Since BFS explores level by level, each cell is reached via the shortest path from some zero. The first time we update a cell's distance is the final answer for that cell. + + - approach_name: Dynamic Programming (Two-Pass) + is_optimal: true + code: | + def update_matrix(mat: list[list[int]]) -> list[list[int]]: + m, n = len(mat), len(mat[0]) + # Use a large value instead of infinity for easier arithmetic + MAX_DIST = m + n + + # Initialise: 0 for zeros, large value for ones + dist = [[0 if mat[i][j] == 0 else MAX_DIST + for j in range(n)] for i in range(m)] + + # First pass: top-left to bottom-right + # Check cells above and to the left + for i in range(m): + for j in range(n): + if i > 0: + dist[i][j] = min(dist[i][j], dist[i-1][j] + 1) + if j > 0: + dist[i][j] = min(dist[i][j], dist[i][j-1] + 1) + + # Second pass: bottom-right to top-left + # Check cells below and to the right + for i in range(m - 1, -1, -1): + for j in range(n - 1, -1, -1): + if i < m - 1: + dist[i][j] = min(dist[i][j], dist[i+1][j] + 1) + if j < n - 1: + dist[i][j] = min(dist[i][j], dist[i][j+1] + 1) + + return dist + explanation: | + **Time Complexity:** O(m * n) — Two passes through the matrix. + + **Space Complexity:** O(m * n) — For the distance matrix (can be O(1) if modifying in-place is allowed). + + The DP approach works because the shortest path to any zero must come from one of four directions. The first pass propagates distances from top and left; the second pass propagates from bottom and right. After both passes, each cell has the minimum distance considering all four directions. diff --git a/backend/data/questions/132-pattern.yaml b/backend/data/questions/132-pattern.yaml new file mode 100644 index 0000000..905e108 --- /dev/null +++ b/backend/data/questions/132-pattern.yaml @@ -0,0 +1,227 @@ +title: 132 Pattern +slug: 132-pattern +difficulty: medium +leetcode_id: 456 +leetcode_url: https://leetcode.com/problems/132-pattern/ +categories: + - arrays + - stack +patterns: + - monotonic-stack + +description: | + Given an array of `n` integers `nums`, a **132 pattern** is a subsequence of three integers `nums[i]`, `nums[j]` and `nums[k]` such that `i < j < k` and `nums[i] < nums[k] < nums[j]`. + + Return `true` *if there is a **132 pattern** in* `nums`, *otherwise, return* `false`. + + The "132" name comes from the value ordering: the 1st element is smallest, the 3rd element (middle index) is largest, and the 2nd element (last index) is in between. + +constraints: | + - `n == nums.length` + - `1 <= n <= 2 * 10^5` + - `-10^9 <= nums[i] <= 10^9` + +examples: + - input: "nums = [1,2,3,4]" + output: "false" + explanation: "There is no 132 pattern in the sequence. The array is strictly increasing, so we can never find nums[k] < nums[j] where k > j." + - input: "nums = [3,1,4,2]" + output: "true" + explanation: "There is a 132 pattern in the sequence: [1, 4, 2]. Here i=1, j=2, k=3, and nums[1]=1 < nums[3]=2 < nums[2]=4." + - input: "nums = [-1,3,2,0]" + output: "true" + explanation: "There are three 132 patterns in the sequence: [-1, 3, 2], [-1, 3, 0] and [-1, 2, 0]." + +explanation: + intuition: | + Imagine you're looking for a specific "shape" in a sequence of numbers: a valley followed by a peak, with something in between on the right side. + + The pattern we seek is **"small, big, medium"** in terms of values, appearing at indices **i < j < k**. Think of it like finding a mountain range where: + - Position `i` is in a valley (smallest value) + - Position `j` is at the peak (largest value) + - Position `k` is partway up the mountain (between the two) + + The key insight is that if we scan from **right to left**, we can maintain candidates for the "2" (middle value, `nums[k]`) using a **monotonic stack**. As we pop elements from the stack, we're finding elements that could serve as valid `nums[k]` values — elements that are smaller than some `nums[j]` to their left. + + By tracking the largest popped value (our best candidate for `nums[k]`), we just need to find any `nums[i]` to the left that's smaller than this candidate. If we find one, we've completed the 132 pattern! + + approach: | + We solve this using a **Monotonic Stack (Right to Left)** approach: + + **Step 1: Initialise tracking variables** + + - `stack`: A monotonic decreasing stack to track potential `nums[j]` candidates + - `third`: The best candidate for `nums[k]` (the "2" in 132), initialised to negative infinity + +   + + **Step 2: Iterate from right to left** + + - For each element `num` at position `i`, we check if it could be `nums[i]` (the "1" in 132) + - If `num < third`, we found a valid pattern! The current `num` is smaller than our `third` candidate, and `third` came from being popped by some larger element (which serves as `nums[j]`) + - Otherwise, maintain the monotonic stack: + - While `num` is greater than the stack top, pop elements + - Each popped element becomes a candidate for `third` (keep the maximum) + - Push `num` onto the stack + +   + + **Step 3: Return the result** + + - If we found `num < third` at any point, return `true` + - If we finish the loop without finding a pattern, return `false` + +   + + Why does this work? When we pop an element from the stack, it means we found a larger element to its left (the current `num`). This popped element becomes a valid `third` because there exists some `nums[j] > nums[k]` with `j < k`. We then just need `nums[i] < nums[k]` with `i < j`. + + common_pitfalls: + - title: The Brute Force Trap + description: | + A natural first approach is three nested loops checking all combinations of `i < j < k`: + + ```python + for i in range(n): + for j in range(i+1, n): + for k in range(j+1, n): + if nums[i] < nums[k] < nums[j]: + return True + ``` + + This is **O(n³)** and with `n <= 2 * 10^5`, that's up to 8 × 10^15 operations — guaranteed **Time Limit Exceeded**. + wrong_approach: "Triple nested loops checking all triplets" + correct_approach: "Monotonic stack achieving O(n) time" + + - title: Confusing the Index Order vs Value Order + description: | + The "132" refers to the **value ordering**, not the index ordering: + - Index order: `i < j < k` (always left to right) + - Value order: `nums[i] < nums[k] < nums[j]` (small, medium, big → positions 1, 3, 2) + + A common mistake is looking for values in ascending order `nums[i] < nums[j] < nums[k]`, which would be a "123" pattern instead. + wrong_approach: "Looking for strictly increasing subsequence" + correct_approach: "Looking for small-big-medium pattern at indices i < j < k" + + - title: Forgetting to Track the Maximum Popped Element + description: | + When using the monotonic stack approach, some implementations only check the most recently popped element. However, we need to track the **maximum** of all popped elements. + + Consider `[1, 0, 1, -4, -3]`: we need to ensure our `third` candidate is the best possible one to maximise our chances of finding a valid `nums[i]`. + wrong_approach: "Only using the last popped element" + correct_approach: "Tracking the maximum of all popped elements as third" + + - title: Handling Duplicate Values + description: | + The pattern requires **strict inequalities**: `nums[i] < nums[k] < nums[j]`. Equal values don't count. + + For `[1, 2, 2, 2]`, there's no valid 132 pattern because we can't have `nums[k] < nums[j]` when they're equal. + wrong_approach: "Using <= comparisons" + correct_approach: "Using strict < comparisons throughout" + + key_takeaways: + - "**Monotonic stack mastery**: This problem demonstrates how a monotonic stack can track relationships between elements in O(n) time" + - "**Right-to-left traversal**: Sometimes scanning backwards simplifies the logic — here it lets us build up `third` candidates before we need them" + - "**The popped element insight**: Elements popped from a monotonic stack have a guaranteed relationship with the element that caused the pop" + - "**Pattern recognition**: The 132 pattern appears in many variations (stock prices, sequences) — recognising this structure is valuable" + + time_complexity: "O(n). Each element is pushed and popped from the stack at most once, giving us linear time." + space_complexity: "O(n). In the worst case (strictly decreasing array), the stack holds all n elements." + +solutions: + - approach_name: Monotonic Stack (Right to Left) + is_optimal: true + code: | + def find132pattern(nums: list[int]) -> bool: + n = len(nums) + if n < 3: + return False + + stack = [] # Monotonic decreasing stack for nums[j] candidates + third = float('-inf') # Best candidate for nums[k] (the "2" in 132) + + # Scan from right to left + for i in range(n - 1, -1, -1): + # If current num is less than third, we found the pattern! + # Current num is nums[i], third is nums[k], and nums[j] exists + # (it's the element that popped third from the stack) + if nums[i] < third: + return True + + # Maintain monotonic decreasing stack + # Pop smaller elements — they become candidates for third + while stack and nums[i] > stack[-1]: + third = stack.pop() # Update third to the largest popped value + + # Push current element as potential nums[j] + stack.append(nums[i]) + + return False + explanation: | + **Time Complexity:** O(n) — Each element is pushed and popped at most once. + + **Space Complexity:** O(n) — Stack can hold up to n elements. + + The key insight is that when we pop elements from the stack, those elements are smaller than the current element (potential `nums[j]`), making them valid candidates for `nums[k]`. We track the largest such candidate in `third`. Then any element smaller than `third` that we encounter later completes the pattern. + + - approach_name: Prefix Minimum with Stack + is_optimal: true + code: | + def find132pattern(nums: list[int]) -> bool: + n = len(nums) + if n < 3: + return False + + # Precompute minimum from the left for each position + min_left = [0] * n + min_left[0] = nums[0] + for i in range(1, n): + min_left[i] = min(min_left[i - 1], nums[i]) + + # Use stack to find valid j, k pairs where min_left[j] < nums[k] < nums[j] + stack = [] # Stack of indices for potential nums[k] + + # Scan from right to left + for j in range(n - 1, -1, -1): + # We need nums[i] < nums[k] < nums[j] + # min_left[j] gives us the minimum nums[i] to the left of j + if nums[j] > min_left[j]: + # Pop elements that are too small to be nums[k] + while stack and nums[stack[-1]] <= min_left[j]: + stack.pop() + + # Check if top of stack is a valid nums[k] + if stack and nums[stack[-1]] < nums[j]: + return True + + stack.append(j) + + return False + explanation: | + **Time Complexity:** O(n) — Two passes: one for prefix minimum, one with stack. + + **Space Complexity:** O(n) — For the prefix minimum array and stack. + + This approach explicitly tracks the minimum to the left of each position. For each potential `nums[j]`, we know exactly what `nums[i]` would need to beat (`min_left[j]`). The stack maintains candidates for `nums[k]` that satisfy the constraints. + + - approach_name: Brute Force + is_optimal: false + code: | + def find132pattern(nums: list[int]) -> bool: + n = len(nums) + + for j in range(1, n - 1): + # Find minimum to the left of j + min_i = min(nums[:j]) + + # Check if any element to the right satisfies the pattern + for k in range(j + 1, n): + if min_i < nums[k] < nums[j]: + return True + + return False + explanation: | + **Time Complexity:** O(n²) — For each j, we scan left for min and right for valid k. + + **Space Complexity:** O(1) — No additional data structures. + + This optimised brute force fixes `j` as the middle element, finds the minimum to its left, then searches for a valid `nums[k]` to its right. While better than O(n³), it's still too slow for large inputs and will TLE on LeetCode. Included to show the progression toward the optimal solution. diff --git a/backend/data/questions/2-keys-keyboard.yaml b/backend/data/questions/2-keys-keyboard.yaml new file mode 100644 index 0000000..f78465b --- /dev/null +++ b/backend/data/questions/2-keys-keyboard.yaml @@ -0,0 +1,169 @@ +title: 2 Keys Keyboard +slug: 2-keys-keyboard +difficulty: medium +leetcode_id: 650 +leetcode_url: https://leetcode.com/problems/2-keys-keyboard/ +categories: + - dynamic-programming + - math +patterns: + - dynamic-programming + +description: | + There is only one character `'A'` on the screen of a notepad. You can perform one of two operations on this notepad for each step: + + - **Copy All**: You can copy all the characters present on the screen (a partial copy is not allowed). + - **Paste**: You can paste the characters which are copied last time. + + Given an integer `n`, return *the minimum number of operations to get the character* `'A'` *exactly* `n` *times on the screen*. + +constraints: | + - `1 <= n <= 1000` + +examples: + - input: "n = 3" + output: "3" + explanation: "Initially, we have one character 'A'. In step 1, we use Copy All operation. In step 2, we use Paste operation to get 'AA'. In step 3, we use Paste operation to get 'AAA'." + - input: "n = 1" + output: "0" + explanation: "We already have one 'A' on the screen, so no operations are needed." + +explanation: + intuition: | + Imagine you're a factory manager trying to manufacture `n` identical items. You start with 1 item. Your only tools are a **copying machine** (which copies everything you have) and a **duplicator** (which produces copies of the last batch you made). + + The key insight is that this problem is fundamentally about **prime factorisation**. Here's why: + + If `n = 6`, you could: + - Copy 1, Paste 5 times → 6 operations (1 → 2 → 3 → 4 → 5 → 6) + - Or: Copy 1, Paste twice (get 3), Copy 3, Paste once (get 6) → 5 operations + + The second approach is better because we're **multiplying** rather than adding. When we copy at `k` A's and paste `p-1` times, we multiply our count by `p` using exactly `p` operations (1 copy + (p-1) pastes). + + This means: **to reach `n` A's optimally, we want to factor `n` into primes and "pay" for each prime factor**. If `n = p₁ × p₂ × ... × pₖ`, the minimum operations is `p₁ + p₂ + ... + pₖ`. + + Think of it like this: each prime factor represents a "multiplying stage" where we copy once and paste multiple times to multiply our current count. + + approach: | + We solve this using **Prime Factorisation**: + + **Step 1: Handle the base case** + + - If `n == 1`, return `0` — we already have one 'A', no operations needed + +   + + **Step 2: Find prime factors and sum them** + + - Start with a divisor `d = 2` + - While `d * d <= n`: + - While `n` is divisible by `d`: + - Add `d` to our operation count + - Divide `n` by `d` + - Increment `d` + - If `n > 1` after the loop, `n` itself is a prime factor — add it to the count + +   + + **Step 3: Return the sum of prime factors** + + - The total sum equals the minimum number of operations + +   + + **Why this works**: To multiply your screen count by a factor `p`, you need exactly `p` operations: 1 Copy All + (p-1) Pastes. Since we must reach exactly `n` starting from 1, we're essentially "building up" through multiplication, and the optimal way is to use the prime factors. + + common_pitfalls: + - title: Treating This as a Standard DP Problem + description: | + While this *can* be solved with dynamic programming where `dp[i]` = minimum operations to get `i` A's, the mathematical insight about prime factorisation is more elegant and efficient. + + The DP approach would require O(n√n) or O(n²) time to fill the table, checking all divisors for each number. The prime factorisation approach runs in O(√n) time. + wrong_approach: "DP with O(n²) divisor checking" + correct_approach: "Prime factorisation in O(√n)" + + - title: Forgetting That n=1 Needs Zero Operations + description: | + When `n = 1`, we already have one 'A' on screen. No copy or paste is needed. Make sure to handle this edge case by returning `0`. + wrong_approach: "Returning 1 for n=1" + correct_approach: "Return 0 since we start with one 'A'" + + - title: Missing the Last Prime Factor + description: | + After the factorisation loop where `d * d <= n`, if `n > 1`, then `n` itself is a prime factor that must be added. + + For example, with `n = 6`: after dividing by 2 (getting 3) and incrementing d to 3, we have `d * d = 9 > 3`, so the loop exits. But 3 is still a prime factor that needs to be counted. + wrong_approach: "Stopping when d*d > n without checking remaining n" + correct_approach: "Add remaining n to sum if n > 1 after loop" + + key_takeaways: + - "**Math insight transforms complexity**: Recognising the prime factorisation connection reduces this from a DP problem to a simple factorisation" + - "**Operations as multiplication**: Copy+Paste sequences multiply your count by factors, so optimal operations = sum of prime factors" + - "**Pattern recognition**: Many problems that seem to require DP have elegant mathematical solutions when you analyse the structure" + - "**Similar problems**: This pattern appears in problems involving minimum steps to reach a target through multiplication/division operations" + + time_complexity: "O(√n). We iterate up to √n to find all prime factors, and each division reduces n significantly." + space_complexity: "O(1). We only use a few variables to track the divisor and operation count." + +solutions: + - approach_name: Prime Factorisation + is_optimal: true + code: | + def min_steps(n: int) -> int: + # Base case: already have 1 'A', no operations needed + if n == 1: + return 0 + + operations = 0 + divisor = 2 + + # Find all prime factors and sum them + while divisor * divisor <= n: + # While current divisor is a factor, add it + while n % divisor == 0: + operations += divisor + n //= divisor + divisor += 1 + + # If n > 1, it's a prime factor itself + if n > 1: + operations += n + + return operations + explanation: | + **Time Complexity:** O(√n) — We only iterate up to √n to find prime factors. + + **Space Complexity:** O(1) — Only a few integer variables used. + + The algorithm finds all prime factors of n and sums them. Each prime factor p represents a sequence of 1 Copy + (p-1) Pastes, which multiplies the current count by p using exactly p operations. + + - approach_name: Dynamic Programming + is_optimal: false + code: | + def min_steps(n: int) -> int: + # dp[i] = minimum operations to get exactly i 'A's + dp = [0] * (n + 1) + + for i in range(2, n + 1): + # Worst case: copy 1 'A' and paste i-1 times + dp[i] = i + + # Try all possible divisors + # If j divides i, we can reach i from i//j + # by copying at i//j and pasting j-1 times + j = 2 + while j * j <= i: + if i % j == 0: + # j operations to multiply by j + dp[i] = min(dp[i], dp[i // j] + j) + # i//j operations to multiply by i//j + dp[i] = min(dp[i], dp[j] + i // j) + j += 1 + + return dp[n] + explanation: | + **Time Complexity:** O(n√n) — For each number up to n, we check divisors up to √i. + + **Space Complexity:** O(n) — We store the DP array of size n+1. + + This approach builds up the solution from smaller subproblems. For each count i, we find the minimum operations by checking all ways to reach i through copy-paste sequences. While correct, it's less efficient than the mathematical approach. diff --git a/backend/data/questions/24-game.yaml b/backend/data/questions/24-game.yaml new file mode 100644 index 0000000..2ba2002 --- /dev/null +++ b/backend/data/questions/24-game.yaml @@ -0,0 +1,294 @@ +title: 24 Game +slug: 24-game +difficulty: hard +leetcode_id: 679 +leetcode_url: https://leetcode.com/problems/24-game/ +categories: + - arrays + - math + - recursion +patterns: + - backtracking + +description: | + You are given an integer array `cards` of length `4`. You have four cards, each containing a number in the range `[1, 9]`. You should arrange the numbers on these cards in a mathematical expression using the operators `['+', '-', '*', '/']` and the parentheses `'('` and `')'` to get the value 24. + + You are restricted with the following rules: + + - The division operator `'/'` represents **real division**, not integer division. + - For example, `4 / (1 - 2 / 3) = 4 / (1 / 3) = 12`. + - Every operation done is between two numbers. In particular, we cannot use `'-'` as a unary operator. + - For example, if `cards = [1, 1, 1, 1]`, the expression `"-1 - 1 - 1 - 1"` is **not allowed**. + - You cannot concatenate numbers together. + - For example, if `cards = [1, 2, 1, 2]`, the expression `"12 + 12"` is not valid. + + Return `true` if you can get such expression that evaluates to `24`, and `false` otherwise. + +constraints: | + - `cards.length == 4` + - `1 <= cards[i] <= 9` + +examples: + - input: "cards = [4,1,8,7]" + output: "true" + explanation: "(8-4) * (7-1) = 24" + - input: "cards = [1,2,1,2]" + output: "false" + explanation: "No valid combination of operations can produce 24 with these cards." + +explanation: + intuition: | + Imagine you have four number cards on a table, and you need to combine them using basic arithmetic to reach exactly 24. You can use any order, any operations, and any grouping with parentheses. + + The key insight is that this is a **combinatorial search problem**. With only 4 cards, the search space is bounded and small enough to explore exhaustively. We need to try all possible ways to: + 1. Pick two numbers to combine + 2. Apply one of the four operations + 3. Replace those two numbers with the result + 4. Repeat until one number remains + + Think of it like this: each operation reduces the count of numbers by one. Starting with 4 numbers, after one operation we have 3 numbers; after another we have 2; after the final operation we have 1 result. If that result equals 24, we've found a solution. + + The backtracking approach naturally handles parentheses. When we pick which two numbers to combine first, we're implicitly choosing the grouping. For example, computing `(a + b)` first and then using that result with `c` is equivalent to `(a + b) op c`. + + One subtle point: we use **floating-point arithmetic** because division can produce non-integers. We also need an **epsilon comparison** for the final check since floating-point operations accumulate small errors. + + approach: | + We solve this using **Recursive Backtracking**: + + **Step 1: Base case** + + - If only one number remains in our list, check if it equals 24 (within a small epsilon for floating-point tolerance) + - If yes, return `True`; otherwise return `False` + +   + + **Step 2: Pick two numbers** + + - Iterate through all pairs of indices `(i, j)` where `i < j` + - Extract the two numbers at these positions + +   + + **Step 3: Apply all operations** + + - For each pair, try all four operations: `+`, `-`, `*`, `/` + - For non-commutative operations (`-` and `/`), try both orders: `a op b` and `b op a` + - Skip division by zero cases + +   + + **Step 4: Create new list and recurse** + + - Remove the two chosen numbers from the list + - Add the result of the operation + - Recursively call the function with the new smaller list + +   + + **Step 5: Backtrack on success** + + - If any recursive path returns `True`, propagate it up immediately + - If all paths fail, return `False` + +   + + This exhaustive search guarantees we find a solution if one exists. The small input size (exactly 4 cards) makes brute force feasible. + + common_pitfalls: + - title: Forgetting Non-Commutative Operations + description: | + Subtraction and division are not commutative: `a - b != b - a` and `a / b != b / a`. + + For example, with cards `[2, 3, 4, 6]`: + - `6 / (3 - 2) * 4 = 24` works + - But `(2 - 3) = -1` leads nowhere useful + + You must try **both orders** for `-` and `/` operations. + wrong_approach: "Only trying a - b and a / b" + correct_approach: "Try both a op b and b op a for non-commutative operations" + + - title: Integer Division vs Real Division + description: | + The problem specifies **real division**, not integer division. Using integer division will produce wrong results. + + For example, `1 / 3 = 0.333...` not `0`. The expression `4 / (1 - 2/3)` requires real division to work correctly. + + Use floating-point numbers throughout the computation. + wrong_approach: "Using integer division (// in Python)" + correct_approach: "Use float division and convert cards to floats" + + - title: Floating-Point Comparison + description: | + Due to floating-point precision errors, comparing `result == 24` directly can fail. + + For example, `8 / 3 * 9` might yield `23.999999999999996` instead of exactly `24`. + + Use an epsilon comparison: `abs(result - 24) < 1e-9`. + wrong_approach: "Checking result == 24 exactly" + correct_approach: "Checking abs(result - 24) < epsilon" + + - title: Division by Zero + description: | + When trying division, you must skip cases where the divisor is zero (or very close to zero). + + For example, if a subtraction produces `0`, dividing by it would cause an error or infinity. + + Check `abs(divisor) > epsilon` before attempting division. + + key_takeaways: + - "**Backtracking for exhaustive search**: When the search space is small and bounded, brute force with backtracking is both simple and effective" + - "**Reduce and recurse**: Combining two numbers into one reduces the problem size, leading naturally to recursion" + - "**Handle operator asymmetry**: Non-commutative operations (`-`, `/`) require trying both orderings" + - "**Floating-point awareness**: Real division requires float arithmetic and epsilon comparisons to handle precision errors" + + time_complexity: "O(1). With exactly 4 cards, the number of states is bounded: at most 4 choose 2 ways to pick pairs at each level, times 6 operations (4 ops, with 2 orderings for non-commutative ones), across 3 levels. This is roughly 4 * 6 * 3 * 6 * 2 * 6 = 2592 operations maximum." + space_complexity: "O(1). The recursion depth is at most 3 (reducing from 4 numbers to 1), and each level uses a constant-size list. Since input size is fixed at 4, space is constant." + +solutions: + - approach_name: Recursive Backtracking + is_optimal: true + code: | + def judgePoint24(cards: list[int]) -> bool: + # Tolerance for floating-point comparison + EPSILON = 1e-9 + TARGET = 24.0 + + def solve(nums: list[float]) -> bool: + # Base case: one number left, check if it equals 24 + if len(nums) == 1: + return abs(nums[0] - TARGET) < EPSILON + + # Try all pairs of numbers + for i in range(len(nums)): + for j in range(len(nums)): + if i == j: + continue + + # Create new list without the two chosen numbers + remaining = [] + for k in range(len(nums)): + if k != i and k != j: + remaining.append(nums[k]) + + a, b = nums[i], nums[j] + + # Try all operations + # Addition (commutative, only need one order) + if i < j: # Avoid duplicate for commutative ops + if solve(remaining + [a + b]): + return True + if solve(remaining + [a * b]): + return True + + # Subtraction (non-commutative, need both orders) + if solve(remaining + [a - b]): + return True + + # Division (non-commutative, need both orders) + if abs(b) > EPSILON: + if solve(remaining + [a / b]): + return True + + return False + + # Convert to floats for real division + return solve([float(c) for c in cards]) + explanation: | + **Time Complexity:** O(1) — The input is always exactly 4 cards, giving a bounded number of combinations to try. + + **Space Complexity:** O(1) — Recursion depth is at most 3, and each level uses constant space. + + The algorithm exhaustively tries all ways to combine pairs of numbers with all operations. By reducing the problem size with each combination, we naturally explore all possible expression trees. The epsilon comparison handles floating-point precision issues. + + - approach_name: Iterative with All Permutations + is_optimal: false + code: | + from itertools import permutations, product + + def judgePoint24(cards: list[int]) -> bool: + EPSILON = 1e-9 + TARGET = 24.0 + + def compute(a: float, b: float, op: int) -> float | None: + """Apply operation, return None for invalid (div by zero).""" + if op == 0: + return a + b + elif op == 1: + return a - b + elif op == 2: + return a * b + elif op == 3: + return a / b if abs(b) > EPSILON else None + return None + + # Try all permutations of cards + for perm in permutations(cards): + a, b, c, d = [float(x) for x in perm] + + # Try all combinations of 3 operations + for ops in product(range(4), repeat=3): + # There are 5 ways to parenthesise 4 numbers: + # 1. ((a op b) op c) op d + # 2. (a op (b op c)) op d + # 3. (a op b) op (c op d) + # 4. a op ((b op c) op d) + # 5. a op (b op (c op d)) + + results = [] + + # Structure 1: ((a op b) op c) op d + r1 = compute(a, b, ops[0]) + if r1 is not None: + r2 = compute(r1, c, ops[1]) + if r2 is not None: + r3 = compute(r2, d, ops[2]) + if r3 is not None: + results.append(r3) + + # Structure 2: (a op (b op c)) op d + r1 = compute(b, c, ops[1]) + if r1 is not None: + r2 = compute(a, r1, ops[0]) + if r2 is not None: + r3 = compute(r2, d, ops[2]) + if r3 is not None: + results.append(r3) + + # Structure 3: (a op b) op (c op d) + r1 = compute(a, b, ops[0]) + r2 = compute(c, d, ops[2]) + if r1 is not None and r2 is not None: + r3 = compute(r1, r2, ops[1]) + if r3 is not None: + results.append(r3) + + # Structure 4: a op ((b op c) op d) + r1 = compute(b, c, ops[1]) + if r1 is not None: + r2 = compute(r1, d, ops[2]) + if r2 is not None: + r3 = compute(a, r2, ops[0]) + if r3 is not None: + results.append(r3) + + # Structure 5: a op (b op (c op d)) + r1 = compute(c, d, ops[2]) + if r1 is not None: + r2 = compute(b, r1, ops[1]) + if r2 is not None: + r3 = compute(a, r2, ops[0]) + if r3 is not None: + results.append(r3) + + # Check if any result equals 24 + for result in results: + if abs(result - TARGET) < EPSILON: + return True + + return False + explanation: | + **Time Complexity:** O(1) — 24 permutations * 64 operation combinations * 5 structures = 7680 cases to check. + + **Space Complexity:** O(1) — Only uses a fixed number of variables. + + This approach explicitly enumerates all 5 possible tree structures (parenthesisations) for 4 numbers. While more verbose than the recursive solution, it makes the search space explicit. Both approaches are equivalent for this fixed-size problem. diff --git a/backend/data/questions/3sum-with-multiplicity.yaml b/backend/data/questions/3sum-with-multiplicity.yaml new file mode 100644 index 0000000..ebca770 --- /dev/null +++ b/backend/data/questions/3sum-with-multiplicity.yaml @@ -0,0 +1,227 @@ +title: 3Sum With Multiplicity +slug: 3sum-with-multiplicity +difficulty: medium +leetcode_id: 923 +leetcode_url: https://leetcode.com/problems/3sum-with-multiplicity/ +categories: + - arrays + - hash-tables + - two-pointers +patterns: + - two-pointers + +description: | + Given an integer array `arr`, and an integer `target`, return the number of tuples `i, j, k` such that `i < j < k` and `arr[i] + arr[j] + arr[k] == target`. + + As the answer can be very large, return it **modulo** `10^9 + 7`. + +constraints: | + - `3 <= arr.length <= 3000` + - `0 <= arr[i] <= 100` + - `0 <= target <= 300` + +examples: + - input: "arr = [1,1,2,2,3,3,4,4,5,5], target = 8" + output: "20" + explanation: "Enumerating by values: (1, 2, 5) occurs 8 times; (1, 3, 4) occurs 8 times; (2, 2, 4) occurs 2 times; (2, 3, 3) occurs 2 times." + - input: "arr = [1,1,2,2,2,2], target = 5" + output: "12" + explanation: "arr[i] = 1, arr[j] = arr[k] = 2 occurs 12 times: We choose one 1 from [1,1] in 2 ways, and two 2s from [2,2,2,2] in 6 ways (C(4,2) = 6)." + - input: "arr = [2,1,3], target = 6" + output: "1" + explanation: "(1, 2, 3) occurred one time in the array so we return 1." + +explanation: + intuition: | + This problem is a twist on the classic 3Sum problem. Instead of finding unique triplets that sum to a target, we need to **count** all valid index combinations `(i, j, k)` where `i < j < k`. + + The key insight is that the array values are **bounded** (`0 <= arr[i] <= 100`). This small range means there are at most 101 distinct values, which opens up a counting-based approach. + + Think of it like this: instead of focusing on indices, we can focus on **values**. If we know the count of each value in the array, we can mathematically compute how many ways to pick indices that form a valid triplet. + + For three values `x`, `y`, `z` where `x + y + z == target`: + - If all three are **distinct**: multiply their counts (`count[x] * count[y] * count[z]`) + - If **two are equal** (e.g., `x == y`): use combinations formula (`C(count[x], 2) * count[z]`) + - If **all three are equal** (`x == y == z`): use combinations formula (`C(count[x], 3)`) + + This transforms the problem from O(n³) index iteration to O(101³) value iteration — a massive improvement when `n` is large. + + approach: | + We solve this using a **Counting Approach** that leverages the bounded value range: + + **Step 1: Count the frequency of each value** + + - Create a frequency array `count` of size 101 (since values range from 0 to 100) + - Iterate through `arr` and increment `count[val]` for each value + +   + + **Step 2: Iterate through all possible value triplets** + + - Use three nested loops for values `i`, `j`, `k` where `i <= j <= k` + - This ensures we don't double-count triplets like (1, 2, 3) and (2, 1, 3) + - Check if `i + j + k == target` + +   + + **Step 3: Calculate combinations based on value equality** + + - **Case 1: All distinct** (`i < j < k`): Add `count[i] * count[j] * count[k]` + - **Case 2: First two equal** (`i == j < k`): Add `C(count[i], 2) * count[k]` = `count[i] * (count[i] - 1) / 2 * count[k]` + - **Case 3: Last two equal** (`i < j == k`): Add `count[i] * C(count[j], 2)` = `count[i] * count[j] * (count[j] - 1) / 2` + - **Case 4: All equal** (`i == j == k`): Add `C(count[i], 3)` = `count[i] * (count[i] - 1) * (count[i] - 2) / 6` + +   + + **Step 4: Apply modulo and return** + + - Keep result modulo `10^9 + 7` throughout to prevent overflow + - Return the final count + + common_pitfalls: + - title: The Brute Force Index Trap + description: | + A naive approach iterates through all index triplets `(i, j, k)` with three nested loops. + + With `n = 3000`, this means `3000³ = 27 billion` operations — far too slow. The problem constraints specifically allow values up to 100, hinting that we should count values rather than iterate indices. + wrong_approach: "Triple nested loops over indices O(n³)" + correct_approach: "Count frequencies and iterate over value combinations O(101³)" + + - title: Forgetting the Modulo + description: | + The answer can be astronomically large. With many duplicate values, the number of combinations grows exponentially. + + For example, an array of 3000 identical values that sum to the target would have `C(3000, 3) ≈ 4.5 billion` combinations. Always apply `% (10^9 + 7)` to intermediate results. + wrong_approach: "Accumulating without modulo, causing integer overflow" + correct_approach: "Apply modulo after each addition" + + - title: Double-Counting Triplets + description: | + When iterating values, you might accidentally count the triplet `(1, 2, 3)` multiple times if you don't enforce an ordering constraint. + + By ensuring `i <= j <= k` in your loops, each unique value triplet is considered exactly once. + wrong_approach: "Unordered iteration over all value pairs" + correct_approach: "Enforce i <= j <= k ordering" + + - title: Incorrect Combination Formulas + description: | + When two or more values are equal, you need the combinations formula, not simple multiplication. + + Picking 2 items from `n` identical items: `C(n, 2) = n * (n - 1) / 2` + Picking 3 items from `n` identical items: `C(n, 3) = n * (n - 1) * (n - 2) / 6` + + Integer division must happen after the multiplication to avoid truncation errors. + wrong_approach: "Using count² or count³ for repeated values" + correct_approach: "Use proper combinations C(n, k) formulas" + + key_takeaways: + - "**Counting over bounded domains**: When values are constrained to a small range, counting frequencies can reduce complexity dramatically" + - "**Combinations for duplicates**: When selecting multiple items of the same type, use `C(n, k)` formulas rather than simple multiplication" + - "**Modular arithmetic**: For large counts, apply modulo at each step to prevent overflow" + - "**Order constraints prevent double-counting**: Enforcing `i <= j <= k` ensures each unique combination is counted exactly once" + + time_complexity: "O(n + V³) where V is the value range (101). We count frequencies in O(n), then iterate through at most 101³ ≈ 1 million value triplets." + space_complexity: "O(V) where V is 101. We store a frequency count for each possible value in the range [0, 100]." + +solutions: + - approach_name: Counting with Combinatorics + is_optimal: true + code: | + def three_sum_multi(arr: list[int], target: int) -> int: + MOD = 10**9 + 7 + + # Count frequency of each value (values range 0-100) + count = [0] * 101 + for val in arr: + count[val] += 1 + + result = 0 + + # Iterate through all value triplets where i <= j <= k + for i in range(101): + for j in range(i, 101): + k = target - i - j + + # k must be in valid range and >= j (to avoid double counting) + if k < j or k > 100: + continue + + if count[i] == 0 or count[j] == 0 or count[k] == 0: + continue + + if i == j == k: + # All three values equal: C(count[i], 3) + result += count[i] * (count[i] - 1) * (count[i] - 2) // 6 + elif i == j: + # First two equal: C(count[i], 2) * count[k] + result += count[i] * (count[i] - 1) // 2 * count[k] + elif j == k: + # Last two equal: count[i] * C(count[j], 2) + result += count[i] * count[j] * (count[j] - 1) // 2 + else: + # All distinct: simple multiplication + result += count[i] * count[j] * count[k] + + result %= MOD + + return result + explanation: | + **Time Complexity:** O(n + V²) where V = 101 — We count in O(n), then iterate i and j while computing k directly. + + **Space Complexity:** O(V) — Frequency array of size 101. + + By counting value frequencies first, we transform index-based enumeration into value-based enumeration. The combinations formulas handle duplicate values correctly, ensuring each valid index triplet is counted exactly once. + + - approach_name: Two Pointers with Sorting + is_optimal: false + code: | + def three_sum_multi(arr: list[int], target: int) -> int: + MOD = 10**9 + 7 + arr.sort() + n = len(arr) + result = 0 + + for i in range(n - 2): + # Two pointers for remaining sum + left, right = i + 1, n - 1 + remaining = target - arr[i] + + while left < right: + current_sum = arr[left] + arr[right] + + if current_sum < remaining: + left += 1 + elif current_sum > remaining: + right -= 1 + else: + # Found a valid triplet sum + if arr[left] == arr[right]: + # All values between left and right are equal + count = right - left + 1 + result += count * (count - 1) // 2 + break + else: + # Count duplicates on left and right + left_count = 1 + while left + 1 < right and arr[left] == arr[left + 1]: + left += 1 + left_count += 1 + + right_count = 1 + while right - 1 > left and arr[right] == arr[right - 1]: + right -= 1 + right_count += 1 + + result += left_count * right_count + left += 1 + right -= 1 + + result %= MOD + + return result + explanation: | + **Time Complexity:** O(n² + n log n) — Sorting takes O(n log n), then we have O(n) iterations of the outer loop, each with O(n) two-pointer work. + + **Space Complexity:** O(1) or O(n) depending on sorting implementation. + + This approach sorts the array first, then uses the classic two-pointer technique for 3Sum. When duplicates are found, we count them and multiply. While this is less optimal than the counting approach for small value ranges, it works for any value range. diff --git a/backend/data/questions/4sum-ii.yaml b/backend/data/questions/4sum-ii.yaml new file mode 100644 index 0000000..827e8b4 --- /dev/null +++ b/backend/data/questions/4sum-ii.yaml @@ -0,0 +1,166 @@ +title: 4Sum II +slug: 4sum-ii +difficulty: medium +leetcode_id: 454 +leetcode_url: https://leetcode.com/problems/4sum-ii/ +categories: + - arrays + - hash-tables +patterns: + - two-pointers + +description: | + Given four integer arrays `nums1`, `nums2`, `nums3`, and `nums4` all of length `n`, return the number of tuples `(i, j, k, l)` such that: + + - `0 <= i, j, k, l < n` + - `nums1[i] + nums2[j] + nums3[k] + nums4[l] == 0` + +constraints: | + - `n == nums1.length == nums2.length == nums3.length == nums4.length` + - `1 <= n <= 200` + - `-2^28 <= nums1[i], nums2[i], nums3[i], nums4[i] <= 2^28` + +examples: + - input: "nums1 = [1,2], nums2 = [-2,-1], nums3 = [-1,2], nums4 = [0,2]" + output: "2" + explanation: "The two tuples are: (0, 0, 0, 1) -> 1 + (-2) + (-1) + 2 = 0, and (1, 1, 0, 0) -> 2 + (-1) + (-1) + 0 = 0." + - input: "nums1 = [0], nums2 = [0], nums3 = [0], nums4 = [0]" + output: "1" + explanation: "The only tuple is (0, 0, 0, 0) -> 0 + 0 + 0 + 0 = 0." + +explanation: + intuition: | + At first glance, this looks like a problem requiring four nested loops to check every combination — but that would be O(n^4), far too slow. + + The key insight is to **split the problem in half**. Think of it like this: instead of finding four numbers that sum to zero, find two numbers from the first half (arrays 1 and 2) and two numbers from the second half (arrays 3 and 4) that cancel each other out. + + If `a + b + c + d = 0`, then `a + b = -(c + d)`. + + This transforms the problem into a **Two Sum variant**: for every possible sum from the first two arrays, check how many times its negation appears among sums from the last two arrays. + + By precomputing all possible sums from arrays 1 and 2 into a hash map, we can then iterate through arrays 3 and 4 and look up complements in O(1) time. This reduces the complexity from O(n^4) to O(n^2). + + approach: | + We solve this using a **Hash Map with Split Arrays** approach: + + **Step 1: Build a hash map of sums from the first two arrays** + + - Create an empty hash map `sum_count` to store `{sum: frequency}` + - Iterate through all pairs `(a, b)` from `nums1` and `nums2` + - For each pair, calculate `a + b` and increment its count in the hash map + +   + + **Step 2: Count complements from the last two arrays** + + - Initialise `count = 0` to track the total number of valid tuples + - Iterate through all pairs `(c, d)` from `nums3` and `nums4` + - For each pair, calculate `target = -(c + d)` + - If `target` exists in `sum_count`, add `sum_count[target]` to our count + +   + + **Step 3: Return the result** + + - Return `count` as the total number of tuples that sum to zero + +   + + This approach works because each time we find a matching complement, we're counting all valid combinations: if `a + b` appears 3 times and `-(c + d)` matches it, that's 3 valid tuples for this specific `(c, d)` pair. + + common_pitfalls: + - title: The Brute Force Trap + description: | + The naive approach uses four nested loops to check every possible `(i, j, k, l)` combination: + + ```python + for i in nums1: + for j in nums2: + for k in nums3: + for l in nums4: + if i + j + k + l == 0: + count += 1 + ``` + + This results in **O(n^4) time complexity**. With `n = 200`, that's 1.6 billion operations — guaranteed TLE. + wrong_approach: "Four nested loops checking all combinations" + correct_approach: "Split into two groups and use hash map for O(n^2)" + + - title: Forgetting to Count Duplicates + description: | + A common mistake is using a set instead of a counting map for the first two arrays. If the same sum `a + b` can be formed in multiple ways (e.g., `1 + 2` and `0 + 3` both equal 3), each occurrence represents a different valid tuple. + + Using a set would only count one match per sum value, missing valid combinations. + wrong_approach: "Using a set to store sums from first two arrays" + correct_approach: "Using a hash map with frequency counts" + + - title: Looking Up Wrong Complement + description: | + When searching for complements, ensure you're looking for `-(c + d)`, not `(c + d)`. We need the sum from the first half to be the **negation** of the sum from the second half so they cancel to zero. + wrong_approach: "Looking up (c + d) in the hash map" + correct_approach: "Looking up -(c + d) to find values that sum to zero" + + key_takeaways: + - "**Divide and conquer with hashing**: When dealing with multiple arrays, consider splitting them and using a hash map to bridge the halves" + - "**Two Sum generalisation**: This is essentially Two Sum where each 'number' is a sum of elements from two arrays" + - "**Time-space tradeoff**: We use O(n^2) extra space to reduce time from O(n^4) to O(n^2)" + - "**Count, don't just detect**: When duplicates matter, use a frequency map instead of a set" + + time_complexity: "O(n^2). We iterate through n^2 pairs for the first two arrays to build the map, then another n^2 pairs for the last two arrays to find complements." + space_complexity: "O(n^2). The hash map can store up to n^2 different sums from the first two arrays." + +solutions: + - approach_name: Hash Map with Split Arrays + is_optimal: true + code: | + from collections import defaultdict + + def four_sum_count(nums1: list[int], nums2: list[int], + nums3: list[int], nums4: list[int]) -> int: + # Store all possible sums from first two arrays with their frequencies + sum_count = defaultdict(int) + + # Build the hash map: O(n^2) + for a in nums1: + for b in nums2: + sum_count[a + b] += 1 + + # Count complements from last two arrays: O(n^2) + count = 0 + for c in nums3: + for d in nums4: + # We need a + b + c + d = 0, so a + b = -(c + d) + target = -(c + d) + # Add the number of ways to form this sum from first two arrays + count += sum_count[target] + + return count + explanation: | + **Time Complexity:** O(n^2) — Two passes of n^2 iterations each. + + **Space Complexity:** O(n^2) — Hash map stores up to n^2 sums. + + We split the four arrays into two groups. First, we precompute all sums from arrays 1 and 2, storing their frequencies. Then, for each sum from arrays 3 and 4, we look up how many complementary sums exist. The `defaultdict(int)` returns 0 for missing keys, simplifying the lookup. + + - approach_name: Brute Force + is_optimal: false + code: | + def four_sum_count(nums1: list[int], nums2: list[int], + nums3: list[int], nums4: list[int]) -> int: + count = 0 + + # Check every possible combination of indices + for a in nums1: + for b in nums2: + for c in nums3: + for d in nums4: + if a + b + c + d == 0: + count += 1 + + return count + explanation: | + **Time Complexity:** O(n^4) — Four nested loops. + + **Space Complexity:** O(1) — Only a counter variable. + + This approach checks every possible tuple directly. While correct, it's prohibitively slow for the given constraints (n up to 200). Included to illustrate why the hash map optimisation is essential. diff --git a/backend/data/questions/a-number-after-a-double-reversal.yaml b/backend/data/questions/a-number-after-a-double-reversal.yaml new file mode 100644 index 0000000..acd6739 --- /dev/null +++ b/backend/data/questions/a-number-after-a-double-reversal.yaml @@ -0,0 +1,151 @@ +title: A Number After a Double Reversal +slug: a-number-after-a-double-reversal +difficulty: easy +leetcode_id: 2119 +leetcode_url: https://leetcode.com/problems/a-number-after-a-double-reversal/ +categories: + - math +patterns: + - greedy + +description: | + **Reversing** an integer means to reverse all its digits. + + - For example, reversing `2021` gives `1202`. Reversing `12300` gives `321` as the **leading zeros are not retained**. + + Given an integer `num`, **reverse** `num` to get `reversed1`, then **reverse** `reversed1` to get `reversed2`. Return `true` *if* `reversed2` *equals* `num`. Otherwise return `false`. + +constraints: | + - `0 <= num <= 10^6` + +examples: + - input: "num = 526" + output: "true" + explanation: "Reverse num to get 625, then reverse 625 to get 526, which equals num." + - input: "num = 1800" + output: "false" + explanation: "Reverse num to get 81, then reverse 81 to get 18, which does not equal num." + - input: "num = 0" + output: "true" + explanation: "Reverse num to get 0, then reverse 0 to get 0, which equals num." + +explanation: + intuition: | + At first glance, this problem seems to require implementing a digit reversal function and applying it twice. But let's think deeper about what actually happens during reversal. + + The key insight is understanding **when information is lost**. When you reverse a number, the only way you lose information is if the original number has **trailing zeros**. These trailing zeros become leading zeros after the first reversal, and leading zeros are dropped (since `0123` is just `123`). + + Think of it like this: imagine writing a number on a piece of paper and flipping it horizontally. If the number ends in zeros, those zeros move to the front — and we don't write leading zeros! So `1800` becomes `0081` which we write as `81`. Flipping again gives `18`, not `1800`. + + The only exception is zero itself (`0`), which remains `0` no matter how many times you reverse it. + + So the question reduces to: **does `num` have trailing zeros?** If yes (and it's not zero), the double reversal won't recover the original. If no, it will. + + approach: | + We solve this with a **simple mathematical observation**: + + **Step 1: Handle the special case** + + - If `num` is `0`, return `true` immediately + - Zero reversed is zero, so double reversal preserves it + +   + + **Step 2: Check for trailing zeros** + + - A number has trailing zeros if and only if it is divisible by 10 + - Check: `num % 10 == 0` + - If true, the number has trailing zeros and will lose them on reversal + +   + + **Step 3: Return the result** + + - Return `true` if `num` has no trailing zeros (or is zero) + - Return `false` if `num` has trailing zeros + +   + + This can be simplified to a single expression: `num == 0 or num % 10 != 0`. + + common_pitfalls: + - title: Actually Implementing Reversal + description: | + A common first instinct is to implement a digit-reversal function and apply it twice. While this works, it's unnecessarily complex: + + ```python + def reverse(n): + result = 0 + while n > 0: + result = result * 10 + n % 10 + n //= 10 + return result + ``` + + This is O(log n) time and requires careful handling. The mathematical insight reduces this to O(1). + wrong_approach: "Implement reverse function and call twice" + correct_approach: "Check if number has trailing zeros" + + - title: Forgetting Zero is a Special Case + description: | + Zero ends in zero (`0 % 10 == 0`), but zero reversed is still zero. If you only check for trailing zeros without handling this case, you'll incorrectly return `false` for `num = 0`. + + The condition `num == 0 or num % 10 != 0` handles this elegantly. + wrong_approach: "Only check num % 10 != 0" + correct_approach: "Check num == 0 OR num % 10 != 0" + + - title: Confusing Trailing vs Leading Zeros + description: | + Leading zeros don't affect a number's value (`007` is just `7`), so they're not the issue. **Trailing zeros** are what get lost because they become leading zeros after reversal. + + For example: `120` → reverse → `021` = `21` → reverse → `12` ≠ `120`. + + key_takeaways: + - "**Look for the invariant**: Instead of simulating the operation, ask 'when does this operation lose information?'" + - "**Trailing zeros are the only information loss**: Reversing a number only loses data when trailing zeros become leading zeros" + - "**Mathematical insight beats simulation**: Recognising the pattern gives O(1) instead of O(log n)" + - "**Edge cases matter**: Zero is a special case that satisfies `num % 10 == 0` but should return `true`" + + time_complexity: "O(1). We perform a single modulo operation and comparison." + space_complexity: "O(1). We use no additional space beyond the input." + +solutions: + - approach_name: Mathematical Insight + is_optimal: true + code: | + def is_same_after_reversals(num: int) -> bool: + # Zero is a special case - reversing 0 gives 0 + # Otherwise, check if number has trailing zeros + # Trailing zeros become leading zeros after first reversal + # and are lost, making double reversal different from original + return num == 0 or num % 10 != 0 + explanation: | + **Time Complexity:** O(1) — Single modulo operation. + + **Space Complexity:** O(1) — No additional space used. + + We recognise that the only way a double reversal changes a number is if it has trailing zeros (which become leading zeros and get dropped). Zero itself is the exception since `0` reversed is `0`. + + - approach_name: Simulation + is_optimal: false + code: | + def is_same_after_reversals(num: int) -> bool: + def reverse(n: int) -> int: + """Reverse the digits of a non-negative integer.""" + result = 0 + while n > 0: + # Extract last digit and append to result + result = result * 10 + n % 10 + n //= 10 + return result + + # Apply reversal twice and compare + reversed1 = reverse(num) + reversed2 = reverse(reversed1) + return reversed2 == num + explanation: | + **Time Complexity:** O(log n) — Each reversal iterates through all digits. + + **Space Complexity:** O(1) — Only integer variables used. + + This approach directly simulates the problem statement. While correct, it's more complex than necessary. Understanding why the mathematical approach works is more valuable for interviews. diff --git a/backend/data/questions/accounts-merge.yaml b/backend/data/questions/accounts-merge.yaml new file mode 100644 index 0000000..a5ad6cf --- /dev/null +++ b/backend/data/questions/accounts-merge.yaml @@ -0,0 +1,295 @@ +title: Accounts Merge +slug: accounts-merge +difficulty: medium +leetcode_id: 721 +leetcode_url: https://leetcode.com/problems/accounts-merge/ +categories: + - graphs + - hash-tables + - strings +patterns: + - union-find + - dfs + +description: | + Given a list of `accounts` where each element `accounts[i]` is a list of strings, where the first element `accounts[i][0]` is a name, and the rest of the elements are **emails** representing emails of the account. + + Now, we would like to merge these accounts. Two accounts definitely belong to the same person if there is some common email to both accounts. Note that even if two accounts have the same name, they may belong to different people as people could have the same name. A person can have any number of accounts initially, but all of their accounts definitely have the same name. + + After merging the accounts, return the accounts in the following format: the first element of each account is the name, and the rest of the elements are emails **in sorted order**. The accounts themselves can be returned in **any order**. + +constraints: | + - `1 <= accounts.length <= 1000` + - `2 <= accounts[i].length <= 10` + - `1 <= accounts[i][j].length <= 30` + - `accounts[i][0]` consists of English letters + - `accounts[i][j]` (for `j > 0`) is a valid email + +examples: + - input: 'accounts = [["John","johnsmith@mail.com","john_newyork@mail.com"],["John","johnsmith@mail.com","john00@mail.com"],["Mary","mary@mail.com"],["John","johnnybravo@mail.com"]]' + output: '[["John","john00@mail.com","john_newyork@mail.com","johnsmith@mail.com"],["Mary","mary@mail.com"],["John","johnnybravo@mail.com"]]' + explanation: "The first and second John's are the same person as they have the common email 'johnsmith@mail.com'. The third John and Mary are different people as none of their email addresses are used by other accounts." + - input: 'accounts = [["Gabe","Gabe0@m.co","Gabe3@m.co","Gabe1@m.co"],["Kevin","Kevin3@m.co","Kevin5@m.co","Kevin0@m.co"],["Ethan","Ethan5@m.co","Ethan4@m.co","Ethan0@m.co"],["Hanzo","Hanzo3@m.co","Hanzo1@m.co","Hanzo0@m.co"],["Fern","Fern5@m.co","Fern1@m.co","Fern0@m.co"]]' + output: '[["Ethan","Ethan0@m.co","Ethan4@m.co","Ethan5@m.co"],["Gabe","Gabe0@m.co","Gabe1@m.co","Gabe3@m.co"],["Hanzo","Hanzo0@m.co","Hanzo1@m.co","Hanzo3@m.co"],["Kevin","Kevin0@m.co","Kevin3@m.co","Kevin5@m.co"],["Fern","Fern0@m.co","Fern1@m.co","Fern5@m.co"]]' + explanation: "No accounts share common emails, so each account remains separate. The emails within each account are sorted alphabetically." + +explanation: + intuition: | + Think of this problem as a **social network** where emails are people and accounts are group chats. If two accounts share even one email, they must belong to the same person — meaning all emails from both accounts should be merged together. + + The key insight is that this is a **connected components** problem in disguise. Imagine each email as a node in a graph. When two emails appear in the same account, they're connected by an edge. Your task is to find all connected components (groups of emails that belong to the same person) and then associate each component with the correct name. + + Think of it like this: if email A and email B are in one account, and email B and email C are in another account, then A, B, and C all belong to the same person — they form a connected chain. + + Two classic approaches work well here: + - **Union-Find (Disjoint Set Union)**: Efficiently groups emails by treating each email as an element and unioning emails that appear together + - **DFS/BFS**: Build an adjacency graph of emails and traverse to find all connected emails + + approach: | + We'll use the **Union-Find** approach, which is particularly elegant for this problem: + + **Step 1: Build email-to-index mapping and union structure** + + - Create a mapping from each email to a unique index + - Initialise the Union-Find structure where each email starts as its own parent + - Also track which name is associated with each email + +   + + **Step 2: Union emails within each account** + + - For each account, union all emails together + - Use the first email in the account as the "anchor" — union every other email with it + - This ensures all emails in an account end up in the same connected component + +   + + **Step 3: Group emails by their root parent** + + - For each email, find its root parent using path compression + - Group all emails that share the same root together + - Use a dictionary mapping root → list of emails + +   + + **Step 4: Build the final result** + + - For each group of emails, get the associated name from any email in the group + - Sort the emails alphabetically + - Construct the result as `[name, email1, email2, ...]` + +   + + The Union-Find approach is efficient because the union and find operations are nearly O(1) with path compression and union by rank. + + common_pitfalls: + - title: Assuming Same Name Means Same Person + description: | + A critical mistake is merging accounts just because they share the same name. + + For example, `["John", "a@mail.com"]` and `["John", "b@mail.com"]` are **different people** unless they share a common email. The problem explicitly states: "even if two accounts have the same name, they may belong to different people." + + Only merge accounts when they share at least one email address. + wrong_approach: "Grouping accounts by name" + correct_approach: "Group by shared emails using Union-Find or graph traversal" + + - title: Missing Transitive Connections + description: | + If account 1 has emails `[a, b]` and account 2 has emails `[b, c]`, then a, b, and c ALL belong to the same person through the transitive connection via `b`. + + A naive approach might only merge direct pairs, missing that `a` and `c` are connected through `b`. Union-Find naturally handles transitivity — when you union `a-b` and `b-c`, calling `find(a)` and `find(c)` will return the same root. + wrong_approach: "Only checking direct email matches between accounts" + correct_approach: "Use Union-Find or DFS to capture all transitive connections" + + - title: Forgetting to Sort Emails + description: | + The problem requires emails within each merged account to be in **sorted order**. It's easy to forget this step after successfully grouping the emails. + + Always sort the email list before constructing the final result. + wrong_approach: "Returning emails in arbitrary order" + correct_approach: "Sort emails alphabetically before adding to result" + + - title: Inefficient Union-Find Without Optimizations + description: | + A basic Union-Find implementation without path compression or union by rank can degrade to O(n) per operation, making the overall solution O(n²). + + With path compression (flattening the tree during `find`) and union by rank (attaching smaller trees under larger ones), operations become nearly O(1) amortized. + wrong_approach: "Basic Union-Find without optimizations" + correct_approach: "Use path compression and union by rank for O(α(n)) operations" + + key_takeaways: + - "**Connected components pattern**: When you need to group items by shared relationships, think Union-Find or graph traversal" + - "**Union-Find efficiency**: Path compression and union by rank make Union-Find nearly O(1) per operation — ideal for grouping problems" + - "**Don't trust names**: In real-world data, names are not unique identifiers — only concrete links (like shared emails) can establish identity" + - "**Transitive relationships**: Union-Find elegantly handles chains of relationships without explicit graph construction" + + time_complexity: "O(n × k × α(n × k)) where `n` is the number of accounts and `k` is the average number of emails per account. The `α` (inverse Ackermann) function grows so slowly it's effectively constant." + space_complexity: "O(n × k). We store each email once in the parent dictionary and once in the grouping phase." + +solutions: + - approach_name: Union-Find + is_optimal: true + code: | + def accounts_merge(accounts: list[list[str]]) -> list[list[str]]: + # Union-Find helper functions with path compression + def find(x: str) -> str: + # Find root with path compression + if parent[x] != x: + parent[x] = find(parent[x]) # Flatten the tree + return parent[x] + + def union(x: str, y: str) -> None: + # Union two emails by connecting their roots + root_x, root_y = find(x), find(y) + if root_x != root_y: + # Union by rank for efficiency + if rank[root_x] < rank[root_y]: + parent[root_x] = root_y + elif rank[root_x] > rank[root_y]: + parent[root_y] = root_x + else: + parent[root_y] = root_x + rank[root_x] += 1 + + # Initialise Union-Find structures + parent = {} # Maps email -> parent email + rank = {} # Tracks tree depth for union by rank + email_to_name = {} # Maps email -> account name + + # Process each account + for account in accounts: + name = account[0] + first_email = account[1] + + for email in account[1:]: + # Initialise email if not seen before + if email not in parent: + parent[email] = email + rank[email] = 0 + email_to_name[email] = name + + # Union this email with the first email in the account + union(first_email, email) + + # Group emails by their root parent + from collections import defaultdict + groups = defaultdict(list) + for email in parent: + root = find(email) + groups[root].append(email) + + # Build result: [name, sorted emails...] + result = [] + for root, emails in groups.items(): + name = email_to_name[root] + # Sort emails alphabetically as required + result.append([name] + sorted(emails)) + + return result + explanation: | + **Time Complexity:** O(n × k × α(n × k)) — Each union/find operation is nearly O(1) with path compression and union by rank. We process each email once. + + **Space Complexity:** O(n × k) — We store each unique email in the parent dictionary, rank dictionary, and email-to-name mapping. + + The Union-Find approach efficiently groups emails by maintaining a forest of trees where each tree represents a connected component. Path compression ensures trees stay flat, and union by rank prevents worst-case linear trees. + + - approach_name: DFS Graph Traversal + is_optimal: true + code: | + def accounts_merge(accounts: list[list[str]]) -> list[list[str]]: + from collections import defaultdict + + # Build adjacency list: email -> set of connected emails + graph = defaultdict(set) + email_to_name = {} + + for account in accounts: + name = account[0] + first_email = account[1] + + for email in account[1:]: + # Connect all emails in this account to the first email + graph[first_email].add(email) + graph[email].add(first_email) + email_to_name[email] = name + + # DFS to find all connected emails + def dfs(email: str, component: list[str]) -> None: + visited.add(email) + component.append(email) + for neighbor in graph[email]: + if neighbor not in visited: + dfs(neighbor, component) + + visited = set() + result = [] + + # Find all connected components + for email in graph: + if email not in visited: + component = [] + dfs(email, component) + # Get name from any email in the component + name = email_to_name[component[0]] + # Sort emails and build result + result.append([name] + sorted(component)) + + return result + explanation: | + **Time Complexity:** O(n × k × log(n × k)) — Building the graph is O(n × k), DFS visits each email once O(n × k), and sorting each component adds the log factor. + + **Space Complexity:** O(n × k) — The graph stores edges between emails, and the visited set tracks processed emails. + + This approach explicitly builds a graph where edges connect emails that appear together in an account. DFS then finds all connected components. While conceptually clearer than Union-Find, it requires more memory for the adjacency list. + + - approach_name: BFS Graph Traversal + is_optimal: false + code: | + def accounts_merge(accounts: list[list[str]]) -> list[list[str]]: + from collections import defaultdict, deque + + # Build adjacency list + graph = defaultdict(set) + email_to_name = {} + + for account in accounts: + name = account[0] + first_email = account[1] + + for email in account[1:]: + graph[first_email].add(email) + graph[email].add(first_email) + email_to_name[email] = name + + # BFS to find connected component + def bfs(start: str) -> list[str]: + component = [] + queue = deque([start]) + visited.add(start) + + while queue: + email = queue.popleft() + component.append(email) + for neighbor in graph[email]: + if neighbor not in visited: + visited.add(neighbor) + queue.append(neighbor) + + return component + + visited = set() + result = [] + + for email in graph: + if email not in visited: + component = bfs(email) + name = email_to_name[component[0]] + result.append([name] + sorted(component)) + + return result + explanation: | + **Time Complexity:** O(n × k × log(n × k)) — Same as DFS: graph construction, traversal, and sorting. + + **Space Complexity:** O(n × k) — Graph storage plus the BFS queue in the worst case. + + BFS achieves the same result as DFS but uses a queue instead of recursion. This can be preferable when recursion depth is a concern, though for this problem's constraints (≤1000 accounts), either works fine. diff --git a/backend/data/questions/add-binary.yaml b/backend/data/questions/add-binary.yaml new file mode 100644 index 0000000..ae1e40f --- /dev/null +++ b/backend/data/questions/add-binary.yaml @@ -0,0 +1,185 @@ +title: Add Binary +slug: add-binary +difficulty: easy +leetcode_id: 67 +leetcode_url: https://leetcode.com/problems/add-binary/ +categories: + - strings + - math +patterns: + - two-pointers + +function_signature: "def add_binary(a: str, b: str) -> str:" + +test_cases: + visible: + - input: { a: "11", b: "1" } + expected: "100" + - input: { a: "1010", b: "1011" } + expected: "10101" + - input: { a: "0", b: "0" } + expected: "0" + hidden: + - input: { a: "1", b: "1" } + expected: "10" + - input: { a: "111", b: "1" } + expected: "1000" + - input: { a: "1111", b: "1111" } + expected: "11110" + +description: | + Given two binary strings `a` and `b`, return *their sum as a binary string*. + +constraints: | + - `1 <= a.length, b.length <= 10^4` + - `a` and `b` consist only of `'0'` or `'1'` characters + - Each string does not contain leading zeros except for the zero itself + +examples: + - input: 'a = "11", b = "1"' + output: '"100"' + explanation: "In binary: 11 + 1 = 100 (which is 3 + 1 = 4 in decimal)." + - input: 'a = "1010", b = "1011"' + output: '"10101"' + explanation: "In binary: 1010 + 1011 = 10101 (which is 10 + 11 = 21 in decimal)." + +explanation: + intuition: | + Think of this problem just like adding two numbers by hand in elementary school, but instead of base-10, we're working in base-2 (binary). + + When you add two decimal numbers on paper, you start from the rightmost digit, add the digits together along with any carry from the previous column, write down the result's last digit, and carry over anything that exceeds 9. Binary addition works exactly the same way, except we carry over when the sum exceeds 1. + + In binary: + - `0 + 0 = 0` (no carry) + - `0 + 1 = 1` (no carry) + - `1 + 0 = 1` (no carry) + - `1 + 1 = 10` (write `0`, carry `1`) + - `1 + 1 + 1 = 11` (write `1`, carry `1`) + + The key insight is that we need to process both strings from right to left, handling cases where strings have different lengths and managing the carry throughout. + + approach: | + We use a **Two Pointers from End** approach to simulate binary addition: + + **Step 1: Initialise pointers and carry** + + - `i`: Pointer starting at the last index of string `a` + - `j`: Pointer starting at the last index of string `b` + - `carry`: Set to `0` initially to track overflow from each addition + - `result`: An empty list to collect result digits (we'll reverse at the end) + +   + + **Step 2: Process digits from right to left** + + - Continue while `i >= 0` OR `j >= 0` OR `carry > 0` + - Get the current digit from `a` if `i >= 0`, otherwise use `0` + - Get the current digit from `b` if `j >= 0`, otherwise use `0` + - Calculate `total = digit_a + digit_b + carry` + - Append `total % 2` to result (this is the current bit: `0` or `1`) + - Update `carry = total // 2` (this is `1` if total >= 2, else `0`) + - Decrement both pointers + +   + + **Step 3: Build and return the result** + + - Reverse the result list (since we built it backwards) + - Join the digits into a string and return + + common_pitfalls: + - title: Converting to Integers Directly + description: | + A tempting approach is to convert both strings to integers, add them, and convert back: + ```python + return bin(int(a, 2) + int(b, 2))[2:] + ``` + While this works for small inputs, it fails for very large binary strings. With lengths up to `10^4`, the resulting integer could have over 10,000 bits, which exceeds typical integer handling in some contexts and defeats the purpose of the exercise. + + The problem expects you to implement the addition algorithm manually. + wrong_approach: "int(a, 2) + int(b, 2) conversion" + correct_approach: "Manual digit-by-digit addition with carry" + + - title: Forgetting the Final Carry + description: | + After processing all digits, there may still be a carry left over. For example: + - `a = "1"`, `b = "1"` should produce `"10"`, not `"0"` + + If you exit the loop only when both pointers are exhausted, you'll miss the final carry. The loop condition must include `carry > 0` to handle this case. + wrong_approach: "while i >= 0 or j >= 0 (missing carry check)" + correct_approach: "while i >= 0 or j >= 0 or carry > 0" + + - title: String Concatenation in Loop + description: | + Building the result by prepending characters to a string (`result = bit + result`) is O(n) per operation, leading to O(n^2) overall. + + Instead, append to a list and reverse at the end for O(n) total time. + wrong_approach: "result = str(bit) + result in loop" + correct_approach: "result.append(bit), then reverse" + + key_takeaways: + - "**Two-pointer pattern**: When processing two sequences together, use two independent pointers that can move at their own pace" + - "**Right-to-left processing**: For addition problems (decimal, binary, linked lists), always process from the least significant digit" + - "**Carry handling**: The loop condition should include the carry to avoid missing the final overflow bit" + - "**Foundation for related problems**: This technique applies to Add Strings (base-10), Add Two Numbers (linked lists), and Multiply Strings" + + time_complexity: "O(max(n, m)). We process each digit of both strings exactly once, where `n` and `m` are the lengths of `a` and `b`." + space_complexity: "O(max(n, m)). The result string has at most `max(n, m) + 1` characters (the extra one for a possible final carry)." + +solutions: + - approach_name: Two Pointers from End + is_optimal: true + code: | + def add_binary(a: str, b: str) -> str: + # Start from the rightmost digit of each string + i, j = len(a) - 1, len(b) - 1 + carry = 0 + result = [] + + # Process while there are digits left or carry remains + while i >= 0 or j >= 0 or carry: + # Get current digit from a (or 0 if exhausted) + digit_a = int(a[i]) if i >= 0 else 0 + # Get current digit from b (or 0 if exhausted) + digit_b = int(b[j]) if j >= 0 else 0 + + # Add digits plus carry + total = digit_a + digit_b + carry + # Current bit is total mod 2 (0 or 1) + result.append(str(total % 2)) + # Carry is 1 if total >= 2, else 0 + carry = total // 2 + + # Move to next digit (left) + i -= 1 + j -= 1 + + # We built the result backwards, so reverse it + return ''.join(reversed(result)) + explanation: | + **Time Complexity:** O(max(n, m)) — Single pass through both strings. + + **Space Complexity:** O(max(n, m)) — Result string storage. + + We simulate manual binary addition using two pointers starting from the end of each string. At each step, we add the current digits plus any carry, compute the result bit and new carry, then move left. The key is including `carry` in the loop condition to handle the final overflow. + + - approach_name: Bit Manipulation + is_optimal: false + code: | + def add_binary(a: str, b: str) -> str: + # Convert binary strings to integers + x, y = int(a, 2), int(b, 2) + + # Use bit manipulation to add without + operator + while y: + # XOR gives sum without carry + # AND shifted left gives the carry + x, y = x ^ y, (x & y) << 1 + + return bin(x)[2:] # Convert back, strip '0b' prefix + explanation: | + **Time Complexity:** O(max(n, m)) — Each iteration removes one bit from the carry. + + **Space Complexity:** O(1) — Only integer variables used (excluding output). + + This approach uses bit manipulation: XOR computes the sum ignoring carries, while AND shifted left computes the carries. We repeat until no carries remain. Note: While elegant, this converts to integers first, which may not be the intended solution for very large inputs. Included here to demonstrate bit manipulation techniques. diff --git a/backend/data/questions/add-digits.yaml b/backend/data/questions/add-digits.yaml new file mode 100644 index 0000000..d2b438d --- /dev/null +++ b/backend/data/questions/add-digits.yaml @@ -0,0 +1,203 @@ +title: Add Digits +slug: add-digits +difficulty: easy +leetcode_id: 258 +leetcode_url: https://leetcode.com/problems/add-digits/ +categories: + - math +patterns: + - greedy + +function_signature: "def add_digits(num: int) -> int:" + +test_cases: + visible: + - input: { num: 38 } + expected: 2 + - input: { num: 0 } + expected: 0 + - input: { num: 9 } + expected: 9 + hidden: + - input: { num: 18 } + expected: 9 + - input: { num: 123 } + expected: 6 + - input: { num: 199 } + expected: 1 + +description: | + Given an integer `num`, repeatedly add all its digits until the result has only one digit, and return it. + + This process of summing digits repeatedly is known as finding the **digital root** of a number. + +constraints: | + - `0 <= num <= 2^31 - 1` + +examples: + - input: "num = 38" + output: "2" + explanation: "The process is: 38 → 3 + 8 → 11 → 1 + 1 → 2. Since 2 has only one digit, return it." + - input: "num = 0" + output: "0" + explanation: "0 is already a single digit, so return it." + +explanation: + intuition: | + Imagine you have a number like `38`. You sum its digits: `3 + 8 = 11`. Still two digits, so you repeat: `1 + 1 = 2`. Now you have a single digit — that's your answer. + + While a loop-based simulation works, there's a **beautiful mathematical shortcut** hiding in plain sight. + + Think about what happens when you sum a number's digits. Take `38`: + - `38 = 3 × 10 + 8 = 3 × (9 + 1) + 8 = 3 × 9 + 3 + 8` + + Notice that `3 × 9` is divisible by 9, so it contributes nothing to the remainder when dividing by 9. The digit sum `3 + 8 = 11` has the **same remainder when divided by 9** as the original number `38`. + + This pattern holds recursively! The final single digit (called the **digital root**) is simply the number's remainder when divided by 9 — with one small adjustment for multiples of 9. + + approach: | + We can solve this two ways: the straightforward simulation and the elegant O(1) math formula. + + **Simulation Approach** + + **Step 1: Handle the base case** + + - If `num` is already a single digit (`num < 10`), return it directly + +   + + **Step 2: Sum the digits** + + - Extract each digit using `num % 10` and add to a running sum + - Remove the last digit with `num // 10` + - Repeat until `num` becomes 0 + +   + + **Step 3: Recurse or loop** + + - If the sum is still multi-digit, repeat the process + - Continue until you reach a single digit + +   + + **O(1) Mathematical Approach (Digital Root Formula)** + + **Step 1: Handle zero** + + - If `num == 0`, return `0` (special case) + +   + + **Step 2: Apply the digital root formula** + + - For any positive number, the digital root is `1 + (num - 1) % 9` + - This formula works because: + - If `num % 9 == 0` and `num > 0`, the digital root is `9` + - Otherwise, the digital root is `num % 9` + - The formula `1 + (num - 1) % 9` elegantly handles both cases + +   + + The mathematical approach avoids all loops and runs in constant time regardless of the input size. + + common_pitfalls: + - title: Forgetting the Zero Case + description: | + When using the mathematical formula, `0` requires special handling. + + The formula `1 + (num - 1) % 9` would give: + - `1 + (-1) % 9 = 1 + (-1) = 0` (in Python, which handles negative modulo correctly) + + However, in some languages, `-1 % 9` might return `-1` instead of `8`, leading to wrong results. Always check for `num == 0` explicitly for portability. + wrong_approach: "Applying the formula without checking for zero" + correct_approach: "Handle num == 0 as a special case first" + + - title: Misunderstanding the Formula + description: | + A common mistake is using `num % 9` directly. This fails for multiples of 9: + - `9 % 9 = 0`, but the digital root of 9 is `9`, not `0` + - `18 % 9 = 0`, but `1 + 8 = 9`, not `0` + + The formula `1 + (num - 1) % 9` shifts the range from `[0, 8]` to `[1, 9]`, correctly mapping multiples of 9 to `9` instead of `0`. + wrong_approach: "num % 9 (returns 0 for multiples of 9)" + correct_approach: "1 + (num - 1) % 9 (returns 9 for multiples of 9)" + + - title: Inefficient Simulation for Large Numbers + description: | + The simulation approach is O(log n) per iteration, and the number of iterations is O(log log n) in practice. While this is acceptable, it's far less elegant than the O(1) mathematical solution. + + For very large numbers (up to 2^31 - 1), the simulation still works but is unnecessarily complex when a direct formula exists. + wrong_approach: "Always using simulation without knowing the math" + correct_approach: "Use the O(1) formula for optimal efficiency" + + key_takeaways: + - "**Digital root concept**: The digital root of a number is the single digit obtained by repeatedly summing digits — it equals `1 + (n-1) % 9` for positive n" + - "**Modular arithmetic insight**: Digit sums preserve remainders mod 9 because 10 ≡ 1 (mod 9), so each place value contributes its digit directly" + - "**O(1) vs simulation tradeoff**: While simulation is intuitive, recognising mathematical patterns can reduce complexity dramatically" + - "**Foundation for number theory problems**: This technique appears in problems involving divisibility rules, casting out nines, and checksum algorithms" + + time_complexity: "O(1). The mathematical formula computes the result directly with a single modulo operation." + space_complexity: "O(1). Only a constant amount of space is used regardless of input size." + +solutions: + - approach_name: Digital Root Formula + is_optimal: true + code: | + def add_digits(num: int) -> int: + # Special case: 0 is already a single digit + if num == 0: + return 0 + + # Digital root formula: maps to range [1, 9] + # Works because digit sums preserve remainder mod 9 + return 1 + (num - 1) % 9 + explanation: | + **Time Complexity:** O(1) — Single arithmetic operation. + + **Space Complexity:** O(1) — No additional space used. + + The digital root formula leverages the mathematical property that summing digits preserves the remainder when dividing by 9. By using `1 + (num - 1) % 9`, we correctly handle multiples of 9 (which should return 9, not 0). + + - approach_name: Iterative Simulation + is_optimal: false + code: | + def add_digits(num: int) -> int: + # Keep summing digits until we have a single digit + while num >= 10: + digit_sum = 0 + # Extract and sum each digit + while num > 0: + digit_sum += num % 10 # Get last digit + num //= 10 # Remove last digit + num = digit_sum + + return num + explanation: | + **Time Complexity:** O(log n) — We process each digit, and digit count is proportional to log(n). + + **Space Complexity:** O(1) — Only tracking the running sum. + + This approach simulates the exact process described in the problem: repeatedly sum digits until reaching a single digit. While correct and intuitive, it's less efficient than the mathematical formula. + + - approach_name: Recursive Simulation + is_optimal: false + code: | + def add_digits(num: int) -> int: + # Base case: single digit + if num < 10: + return num + + # Recursive case: sum digits and recurse + digit_sum = 0 + while num > 0: + digit_sum += num % 10 + num //= 10 + + return add_digits(digit_sum) + explanation: | + **Time Complexity:** O(log n) — Same as iterative, processing each digit. + + **Space Complexity:** O(log log n) — Recursion depth is the number of iterations needed. + + A recursive approach that mirrors the problem description exactly. Each call sums the digits and recurses until reaching a single digit. The recursion depth is very shallow (typically 2-3 calls for any 32-bit integer). diff --git a/backend/data/questions/add-minimum-number-of-rungs.yaml b/backend/data/questions/add-minimum-number-of-rungs.yaml new file mode 100644 index 0000000..308e581 --- /dev/null +++ b/backend/data/questions/add-minimum-number-of-rungs.yaml @@ -0,0 +1,160 @@ +title: Add Minimum Number of Rungs +slug: add-minimum-number-of-rungs +difficulty: medium +leetcode_id: 1936 +leetcode_url: https://leetcode.com/problems/add-minimum-number-of-rungs/ +categories: + - arrays +patterns: + - greedy + +description: | + You are given a **strictly increasing** integer array `rungs` that represents the **height** of rungs on a ladder. You are currently on the **floor** at height `0`, and you want to reach the last rung. + + You are also given an integer `dist`. You can only climb to the next highest rung if the distance between where you are currently at (the floor or on a rung) and the next rung is **at most** `dist`. You are able to insert rungs at any positive **integer** height if a rung is not already there. + + Return *the **minimum** number of rungs that must be added to the ladder in order for you to climb to the last rung*. + +constraints: | + - `1 <= rungs.length <= 10^5` + - `1 <= rungs[i] <= 10^9` + - `1 <= dist <= 10^9` + - `rungs` is **strictly increasing** + +examples: + - input: "rungs = [1,3,5,10], dist = 2" + output: "2" + explanation: "You currently cannot reach the last rung. Add rungs at heights 7 and 8 to climb this ladder. The ladder will now have rungs at [1,3,5,7,8,10]." + - input: "rungs = [3,6,8,10], dist = 3" + output: "0" + explanation: "This ladder can be climbed without adding additional rungs." + - input: "rungs = [3,4,6,7], dist = 2" + output: "1" + explanation: "You currently cannot reach the first rung from the ground. Add a rung at height 1 to climb this ladder. The ladder will now have rungs at [1,3,4,6,7]." + +explanation: + intuition: | + Imagine you're climbing a ladder where some rungs are missing. At each step, you can only reach up by a maximum distance of `dist`. If the next rung is too far away, you need to add intermediate rungs to bridge the gap. + + The key insight is that this problem can be solved **greedily** by processing each gap independently. For any gap between your current position and the next rung, you need to figure out the minimum number of rungs to insert so that no two consecutive rungs (including the ones you add) are more than `dist` apart. + + Think of it like this: if the gap between two rungs is `g`, and you can step at most `dist` at a time, how many intermediate steps do you need? The answer is `ceil(g / dist) - 1`. We subtract 1 because the final step lands on the existing rung, not a new one. + + This can be computed as `(g - 1) // dist` using integer arithmetic, which gives us the number of additional rungs needed for that gap. + + approach: | + We solve this using a **Single Pass Greedy** approach: + + **Step 1: Initialise variables** + + - `prev`: Set to `0` representing our starting position (the floor) + - `rungs_needed`: Set to `0` to count total rungs we need to add + +   + + **Step 2: Iterate through each rung** + + - For each rung at height `h`, calculate the gap: `gap = h - prev` + - If `gap > dist`, we need to add rungs to bridge this gap + - The number of rungs to add is `(gap - 1) // dist` — this computes how many intermediate steps are needed + - Add this count to `rungs_needed` + - Update `prev = h` to move to the current rung + +   + + **Step 3: Return the result** + + - Return `rungs_needed` after processing all rungs + +   + + This greedy approach works because each gap is independent — the number of rungs needed to bridge one gap doesn't affect any other gap. We simply sum up the rungs needed for each gap. + + common_pitfalls: + - title: Off-By-One Error in Rung Calculation + description: | + A common mistake is using `gap // dist` instead of `(gap - 1) // dist`. + + Consider a gap of 6 with `dist = 3`. Using `gap // dist` gives `6 // 3 = 2` rungs. But actually: + - From position 0, you can reach height 3 (one new rung at 3) + - From position 3, you can reach height 6 (the existing rung) + + So you only need **1** rung, not 2. The formula `(gap - 1) // dist` correctly gives `(6 - 1) // 3 = 1`. + wrong_approach: "Using gap // dist" + correct_approach: "Using (gap - 1) // dist" + + - title: Forgetting the Floor + description: | + Don't forget that you start at height `0` (the floor), not at the first rung. The gap between the floor and the first rung might also require additional rungs. + + For example, with `rungs = [5]` and `dist = 2`, you need rungs at heights 2 and 4 to reach height 5. That's 2 additional rungs. + wrong_approach: "Starting iteration from rungs[1]" + correct_approach: "Initialize prev = 0 to account for the floor" + + - title: Integer Overflow Concerns + description: | + With `rungs[i]` up to `10^9`, gaps can be very large. In some languages, you might need to be careful about integer overflow when computing gaps. In Python, integers have arbitrary precision, so this isn't a concern. + + key_takeaways: + - "**Greedy independence**: When subproblems don't affect each other (each gap is independent), solve them separately and sum the results" + - "**Integer division trick**: To find how many steps of size `dist` fit in a gap, use `(gap - 1) // dist` to avoid off-by-one errors" + - "**Don't forget boundaries**: Always consider edge cases like the starting position (floor at height 0)" + - "**Linear scan sufficiency**: When you only need to make local decisions, a single pass through the data is often enough" + + time_complexity: "O(n). We iterate through each rung exactly once, where `n` is the length of the `rungs` array." + space_complexity: "O(1). We only use two variables (`prev` and `rungs_needed`), regardless of input size." + +solutions: + - approach_name: Single Pass Greedy + is_optimal: true + code: | + def add_rungs(rungs: list[int], dist: int) -> int: + # Start at the floor (height 0) + prev = 0 + rungs_needed = 0 + + for height in rungs: + # Calculate the gap from current position to this rung + gap = height - prev + + # If gap is too large, we need intermediate rungs + if gap > dist: + # Number of rungs needed: (gap - 1) // dist + # We subtract 1 because landing on the target rung doesn't count + rungs_needed += (gap - 1) // dist + + # Move to current rung + prev = height + + return rungs_needed + explanation: | + **Time Complexity:** O(n) — Single pass through the rungs array. + + **Space Complexity:** O(1) — Only two integer variables used. + + For each rung, we calculate the gap from our current position and determine how many intermediate rungs are needed using the formula `(gap - 1) // dist`. This accounts for the fact that the final step lands on an existing rung. + + - approach_name: Simulation + is_optimal: false + code: | + def add_rungs(rungs: list[int], dist: int) -> int: + prev = 0 + rungs_needed = 0 + + for height in rungs: + # Simulate climbing step by step + while height - prev > dist: + # Add a rung at the maximum reachable height + prev += dist + rungs_needed += 1 + + # Now we can reach the current rung + prev = height + + return rungs_needed + explanation: | + **Time Complexity:** O(n + k) — where k is the total number of rungs added. In the worst case, this could be O(n * max_gap / dist). + + **Space Complexity:** O(1) — Only two integer variables used. + + This approach simulates the actual climbing process by adding one rung at a time. While correct and intuitive, it's less efficient than the mathematical formula because it might iterate many times for large gaps. For example, a gap of 10^9 with dist = 1 would require 10^9 iterations for that single gap. diff --git a/backend/data/questions/add-one-row-to-tree.yaml b/backend/data/questions/add-one-row-to-tree.yaml new file mode 100644 index 0000000..f2fc73a --- /dev/null +++ b/backend/data/questions/add-one-row-to-tree.yaml @@ -0,0 +1,241 @@ +title: Add One Row to Tree +slug: add-one-row-to-tree +difficulty: medium +leetcode_id: 623 +leetcode_url: https://leetcode.com/problems/add-one-row-to-tree/ +categories: + - trees + - queue + - recursion +patterns: + - bfs + - dfs + - tree-traversal + +description: | + Given the `root` of a binary tree and two integers `val` and `depth`, add a row of nodes with value `val` at the given depth `depth`. + + Note that the `root` node is at depth `1`. + + The adding rule is: + + - Given the integer `depth`, for each not null tree node `cur` at the depth `depth - 1`, create two tree nodes with value `val` as `cur`'s left subtree root and right subtree root. + - `cur`'s original left subtree should be the left subtree of the new left subtree root. + - `cur`'s original right subtree should be the right subtree of the new right subtree root. + - If `depth == 1` that means there is no depth `depth - 1` at all, then create a tree node with value `val` as the new root of the whole original tree, and the original tree is the new root's left subtree. + +constraints: | + - The number of nodes in the tree is in the range `[1, 10^4]` + - The depth of the tree is in the range `[1, 10^4]` + - `-100 <= Node.val <= 100` + - `-10^5 <= val <= 10^5` + - `1 <= depth <= the depth of tree + 1` + +examples: + - input: "root = [4,2,6,3,1,5], val = 1, depth = 2" + output: "[4,1,1,2,null,null,6,3,1,5]" + explanation: "At depth 2, we insert two new nodes with value 1. The original left subtree (rooted at 2) becomes the left child of the new left node. The original right subtree (rooted at 6) becomes the right child of the new right node." + - input: "root = [4,2,null,3,1], val = 1, depth = 3" + output: "[4,2,null,1,1,3,null,null,1]" + explanation: "At depth 3, we insert new nodes below the node with value 2. Its original children (3 and 1) become children of the newly inserted nodes." + +explanation: + intuition: | + Imagine you're inserting a new floor into an existing building. Every room on the floor above the insertion point needs to be "lifted up" and reconnected through the new floor. + + Think of it like this: you need to find all nodes at depth `depth - 1` (the "parent floor"). For each of these nodes, you insert two new nodes with value `val` between the parent and its children. The parent's original left child becomes the left child of the new left node, and similarly for the right side. + + The key insight is that this is a **level-based operation**. You need to reach a specific depth and perform surgery there. This naturally suggests either: + - **BFS**: Traverse level by level until you reach depth `depth - 1`, then modify all nodes at that level + - **DFS**: Track your current depth as you recurse, and perform the insertion when you reach the target + + The special case of `depth == 1` is elegant: since there's no "floor above" the root, you simply create a new root and make the entire original tree its left subtree. + + approach: | + We solve this using **BFS with Level Tracking**: + + **Step 1: Handle the special case depth == 1** + + - If `depth == 1`, create a new root node with value `val` + - Set the original tree as the new root's left child + - Return the new root immediately + +   + + **Step 2: Use BFS to reach depth - 1** + + - Start with the root in a queue + - Track the current depth, starting at `1` + - Process level by level until we reach depth `depth - 1` + +   + + **Step 3: Insert new nodes at the target level** + + - For each node at depth `depth - 1`: + - Save its current left and right children + - Create a new left node with value `val`, connecting the original left child as its left child + - Create a new right node with value `val`, connecting the original right child as its right child + - Update the parent's left and right pointers to the new nodes + +   + + **Step 4: Return the modified tree** + + - Return the original root (or new root if depth was 1) + +   + + The BFS approach is intuitive because we naturally "stop" at the right level. DFS works equally well by passing the current depth through recursion. + + common_pitfalls: + - title: Forgetting the depth == 1 Special Case + description: | + When `depth == 1`, there are no nodes at depth `0` to serve as parents. The problem explicitly states we should create a new root with the original tree as its left subtree. + + If you forget this case, your BFS/DFS will never find parents to attach to, or you might crash trying to access non-existent nodes. + wrong_approach: "Assume depth is always >= 2" + correct_approach: "Check depth == 1 first and create new root" + + - title: Losing Original Children References + description: | + When inserting new nodes, you must save the original children **before** overwriting the parent's pointers. If you write: + ``` + node.left = TreeNode(val) + node.left.left = node.left # Oops, this is now the new node! + ``` + You've lost the reference to the original left subtree. + + Always save first: `old_left = node.left`, then rewire. + wrong_approach: "Overwrite pointers before saving originals" + correct_approach: "Save old_left and old_right before creating new nodes" + + - title: Off-by-One Depth Errors + description: | + The problem states the root is at depth `1`, not `0`. If you use 0-indexed depth, you'll insert the row at the wrong level. + + With root at depth 1: to insert at depth 2, you find nodes at depth 1 (the root) and add children to them. + wrong_approach: "Treating root as depth 0" + correct_approach: "Root is depth 1, find parents at depth - 1" + + - title: Only Inserting One New Node Per Parent + description: | + Each parent at depth `depth - 1` gets **two** new children (left and right), regardless of whether the parent originally had children there. + + Even if `node.left` was `None`, you still create `new_left = TreeNode(val)` and set `new_left.left = None`. The new node exists; it just has no children. + wrong_approach: "Only insert where original children existed" + correct_approach: "Always insert both left and right new nodes" + + key_takeaways: + - "**Level-based tree modifications**: When operations target a specific depth, BFS level-by-level traversal is natural and intuitive" + - "**Save before overwrite**: When rewiring tree pointers, always save original references before modifying parent pointers" + - "**Special case the root**: Operations at depth 1 often require special handling since there's no parent above the root" + - "**DFS with depth tracking**: Passing depth as a parameter in DFS is an effective alternative to BFS for depth-specific operations" + + time_complexity: "O(n). In the worst case (depth equals tree depth), we visit all nodes. BFS processes each node at most once." + space_complexity: "O(w) where w is the maximum width of the tree. The queue holds at most one level of nodes. In a complete binary tree, this is O(n/2) = O(n)." + +solutions: + - approach_name: BFS with Level Tracking + is_optimal: true + code: | + from collections import deque + + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + def add_one_row(root: TreeNode | None, val: int, depth: int) -> TreeNode | None: + # Special case: inserting at depth 1 means new root + if depth == 1: + new_root = TreeNode(val) + new_root.left = root + return new_root + + # BFS to find all nodes at depth - 1 + queue = deque([root]) + current_depth = 1 + + # Traverse until we reach the level just above target + while current_depth < depth - 1: + level_size = len(queue) + for _ in range(level_size): + node = queue.popleft() + if node.left: + queue.append(node.left) + if node.right: + queue.append(node.right) + current_depth += 1 + + # Now queue contains all nodes at depth - 1 + # Insert new nodes between each parent and its children + while queue: + node = queue.popleft() + + # Save original children + old_left = node.left + old_right = node.right + + # Create new nodes and rewire + node.left = TreeNode(val) + node.right = TreeNode(val) + + # Connect original children to new nodes + node.left.left = old_left + node.right.right = old_right + + return root + explanation: | + **Time Complexity:** O(n) — We may traverse all nodes to reach the target depth. + + **Space Complexity:** O(w) — Queue holds at most one level. Maximum width is n/2 for complete tree. + + BFS naturally processes level by level. We traverse until reaching depth - 1, then for each node at that level, we insert two new nodes with value `val`. The original subtrees become children of the new nodes. + + - approach_name: DFS with Depth Tracking + is_optimal: true + code: | + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + def add_one_row(root: TreeNode | None, val: int, depth: int) -> TreeNode | None: + # Special case: inserting at depth 1 means new root + if depth == 1: + new_root = TreeNode(val) + new_root.left = root + return new_root + + def dfs(node: TreeNode | None, current_depth: int) -> None: + if not node: + return + + # We're at depth - 1: insert new row here + if current_depth == depth - 1: + # Save original children + old_left = node.left + old_right = node.right + + # Create and wire new nodes + node.left = TreeNode(val) + node.right = TreeNode(val) + node.left.left = old_left + node.right.right = old_right + return # No need to go deeper + + # Haven't reached target yet, keep going + dfs(node.left, current_depth + 1) + dfs(node.right, current_depth + 1) + + dfs(root, 1) + return root + explanation: | + **Time Complexity:** O(n) — Visit nodes until reaching target depth. In worst case, visits all nodes. + + **Space Complexity:** O(h) — Recursion stack depth equals tree height. O(log n) for balanced, O(n) for skewed tree. + + DFS tracks depth via parameter. When reaching depth - 1, we perform the insertion. The recursion stops after insertion since we don't need to process deeper levels. Both BFS and DFS are optimal; DFS may use less space for wide trees, BFS for deep trees. diff --git a/backend/data/questions/add-strings.yaml b/backend/data/questions/add-strings.yaml new file mode 100644 index 0000000..b20e3da --- /dev/null +++ b/backend/data/questions/add-strings.yaml @@ -0,0 +1,197 @@ +title: Add Strings +slug: add-strings +difficulty: easy +leetcode_id: 415 +leetcode_url: https://leetcode.com/problems/add-strings/ +categories: + - strings + - math +patterns: + - two-pointers + +function_signature: "def add_strings(num1: str, num2: str) -> str:" + +test_cases: + visible: + - input: { num1: "11", num2: "123" } + expected: "134" + - input: { num1: "456", num2: "77" } + expected: "533" + - input: { num1: "0", num2: "0" } + expected: "0" + hidden: + - input: { num1: "99", num2: "1" } + expected: "100" + - input: { num1: "1", num2: "9" } + expected: "10" + - input: { num1: "999", num2: "999" } + expected: "1998" + +description: | + Given two non-negative integers, `num1` and `num2` represented as strings, return *the sum of* `num1` *and* `num2` *as a string*. + + You must solve the problem without using any built-in library for handling large integers (such as `BigInteger`). You must also not convert the inputs to integers directly. + +constraints: | + - `1 <= num1.length, num2.length <= 10^4` + - `num1` and `num2` consist of only digits. + - `num1` and `num2` don't have any leading zeros except for the zero itself. + +examples: + - input: 'num1 = "11", num2 = "123"' + output: '"134"' + explanation: "11 + 123 = 134" + - input: 'num1 = "456", num2 = "77"' + output: '"533"' + explanation: "456 + 77 = 533" + - input: 'num1 = "0", num2 = "0"' + output: '"0"' + explanation: "0 + 0 = 0" + +explanation: + intuition: | + Think back to how you learned to add numbers by hand in primary school. You start from the **rightmost digits**, add them together, write down the result, and carry over any overflow to the next column. + + This problem asks us to simulate exactly that process. Since we can't convert the strings to integers directly (which would fail for very large numbers anyway), we process the strings **digit by digit from right to left**, just like manual addition. + + The key insight is that each digit position can be handled independently: add the two digits plus any carry from the previous position, then determine the new carry and the digit to write. This mechanical process works regardless of how long the numbers are. + + approach: | + We simulate elementary school addition using **two pointers** starting from the end of each string: + + **Step 1: Initialise variables** + + - `i`: Pointer to the last character of `num1` + - `j`: Pointer to the last character of `num2` + - `carry`: Set to `0` to track overflow between digit additions + - `result`: Empty list to collect the result digits (we'll reverse at the end) + +   + + **Step 2: Process digits from right to left** + + - While either pointer is valid OR there's a remaining carry: + - Get digit from `num1[i]` if `i >= 0`, otherwise use `0` + - Get digit from `num2[j]` if `j >= 0`, otherwise use `0` + - Calculate `total = digit1 + digit2 + carry` + - Append `total % 10` to result (the digit to keep) + - Update `carry = total // 10` (either `0` or `1`) + - Decrement both pointers + +   + + **Step 3: Build the final result** + + - Reverse the result list (we built it backwards) + - Join into a string and return + +   + + The two-pointer approach handles strings of different lengths naturally by treating missing digits as zeros. + + common_pitfalls: + - title: Converting to Integer + description: | + The most obvious approach is to convert both strings to integers, add them, and convert back: + + ```python + return str(int(num1) + int(num2)) # Violates the rules! + ``` + + This violates the problem constraints. More importantly, it would fail for numbers larger than what the language's integer type can handle. The digit-by-digit approach works for arbitrarily large numbers. + wrong_approach: "int(num1) + int(num2)" + correct_approach: "Digit-by-digit simulation" + + - title: Processing Left to Right + description: | + If you try to add digits from left to right (index 0 onwards), you'll have problems: + - The carry propagates in the wrong direction + - Strings of different lengths don't align properly + + For example, adding `"99"` and `"1"`: starting from the left, you'd try to add `9 + 1 = 10`, but the carry needs to affect digits to the *left*, not right. + + Always process from **right to left**, then reverse the result. + wrong_approach: "Iterate from index 0" + correct_approach: "Iterate from the last index backwards" + + - title: Forgetting the Final Carry + description: | + After processing all digits, there might still be a carry left over. For example, `"99" + "1"` produces digits `0, 0` with a final carry of `1`. + + If you don't check for this remaining carry, you'd return `"00"` instead of `"100"`. + + The loop condition `while i >= 0 or j >= 0 or carry` handles this by continuing while there's a carry to process. + + key_takeaways: + - "**Simulate manual processes**: When built-in operations are forbidden, think about how you'd solve it by hand" + - "**Two-pointer from the end**: For digit-by-digit arithmetic, start from the least significant digit (rightmost)" + - "**Handle different lengths gracefully**: Treat missing digits as zero rather than special-casing length differences" + - "**Same pattern for multiplication**: This digit-by-digit approach extends to string multiplication (LeetCode 43)" + + time_complexity: "O(max(n, m)). We process each digit exactly once, where `n` and `m` are the lengths of `num1` and `num2`." + space_complexity: "O(max(n, m)). The result string has at most `max(n, m) + 1` digits (one extra for a potential leading carry)." + +solutions: + - approach_name: Two Pointers with Carry + is_optimal: true + code: | + def add_strings(num1: str, num2: str) -> str: + # Start from the rightmost digit of each string + i, j = len(num1) - 1, len(num2) - 1 + carry = 0 + result = [] + + # Process while there are digits or a carry remains + while i >= 0 or j >= 0 or carry: + # Get current digits (0 if we've exhausted that string) + digit1 = int(num1[i]) if i >= 0 else 0 + digit2 = int(num2[j]) if j >= 0 else 0 + + # Add digits and carry + total = digit1 + digit2 + carry + + # Keep the ones digit, carry the tens digit + result.append(str(total % 10)) + carry = total // 10 + + # Move to the next digit (leftward) + i -= 1 + j -= 1 + + # We built the result backwards, so reverse it + return ''.join(reversed(result)) + explanation: | + **Time Complexity:** O(max(n, m)) — Single pass through both strings. + + **Space Complexity:** O(max(n, m)) — For the result list. + + We simulate manual addition by processing digits from right to left, tracking the carry at each step. The loop handles different-length strings naturally by treating exhausted strings as contributing zeros. + + - approach_name: Recursive Approach + is_optimal: false + code: | + def add_strings(num1: str, num2: str) -> str: + def add_helper(i: int, j: int, carry: int) -> str: + # Base case: no more digits and no carry + if i < 0 and j < 0 and carry == 0: + return "" + + # Get current digits + digit1 = int(num1[i]) if i >= 0 else 0 + digit2 = int(num2[j]) if j >= 0 else 0 + + # Calculate sum and new carry + total = digit1 + digit2 + carry + current_digit = str(total % 10) + new_carry = total // 10 + + # Recurse for remaining digits, then append current + return add_helper(i - 1, j - 1, new_carry) + current_digit + + return add_helper(len(num1) - 1, len(num2) - 1, 0) + explanation: | + **Time Complexity:** O(max(n, m)) — Same as iterative. + + **Space Complexity:** O(max(n, m)) — Call stack depth plus string concatenation. + + This recursive version processes digits from right to left using the call stack. While elegant, it's less efficient due to string concatenation and recursion overhead. The iterative approach is preferred for interviews. diff --git a/backend/data/questions/add-to-array-form-of-integer.yaml b/backend/data/questions/add-to-array-form-of-integer.yaml new file mode 100644 index 0000000..ccd70fd --- /dev/null +++ b/backend/data/questions/add-to-array-form-of-integer.yaml @@ -0,0 +1,181 @@ +title: Add to Array-Form of Integer +slug: add-to-array-form-of-integer +difficulty: easy +leetcode_id: 989 +leetcode_url: https://leetcode.com/problems/add-to-array-form-of-integer/ +categories: + - arrays + - math +patterns: + - two-pointers + +description: | + The **array-form** of an integer `num` is an array representing its digits in left to right order. + + - For example, for `num = 1321`, the array form is `[1,3,2,1]`. + + Given `num`, the **array-form** of an integer, and an integer `k`, return *the **array-form** of the integer* `num + k`. + +constraints: | + - `1 <= num.length <= 10^4` + - `0 <= num[i] <= 9` + - `num` does not contain any leading zeros except for the zero itself. + - `1 <= k <= 10^4` + +examples: + - input: "num = [1,2,0,0], k = 34" + output: "[1,2,3,4]" + explanation: "1200 + 34 = 1234" + - input: "num = [2,7,4], k = 181" + output: "[4,5,5]" + explanation: "274 + 181 = 455" + - input: "num = [2,1,5], k = 806" + output: "[1,0,2,1]" + explanation: "215 + 806 = 1021" + +explanation: + intuition: | + Think of this problem like performing **grade-school addition by hand**. When you add two numbers on paper, you start from the rightmost digits, add them together, carry over any excess to the next column, and work your way left. + + The twist here is that one number is given as an array of digits (`num`), while the other is a regular integer (`k`). But we can treat `k` the same way — extracting its digits one at a time using modulo and division operations. + + Imagine you're adding `[1,2,0,0]` (which represents 1200) and `34`: + + ``` + 1 2 0 0 + + 3 4 + ----------- + 1 2 3 4 + ``` + + Starting from the right: `0 + 4 = 4`, then `0 + 3 = 3`, and the remaining digits carry through unchanged. The key insight is that we process both numbers from right to left, handling carries as we go, and `k` naturally shrinks as we extract its digits. + + approach: | + We solve this using a **Right-to-Left Addition with Carry** approach: + + **Step 1: Set up the iteration** + + - Start from the rightmost digit of `num` (index `len(num) - 1`) + - Use `k` itself to hold both the number to add and the carry value + +   + + **Step 2: Process digits from right to left** + + - For each position from right to left: + - If we still have digits in `num`, add the current digit to `k` + - The rightmost digit of `k` (obtained via `k % 10`) becomes the result digit + - The remaining digits of `k` (obtained via `k // 10`) become the new carry + - Insert each result digit at the front of our answer + +   + + **Step 3: Handle remaining carry** + + - If `k > 0` after processing all digits of `num`, continue extracting digits from `k` + - This handles cases where the sum has more digits than the original number (e.g., `215 + 806 = 1021`) + +   + + **Step 4: Return the result** + + - The result array contains the digits in the correct left-to-right order + +   + + This approach elegantly treats `k` as both the addend and the running carry, simplifying the logic. + + common_pitfalls: + - title: Converting to Integer First + description: | + A tempting approach is to convert `num` to an integer, add `k`, then convert back to an array: + + ```python + n = int(''.join(map(str, num))) + return [int(d) for d in str(n + k)] + ``` + + While this works for small inputs, it fails when `num` has up to `10^4` digits. Such a number far exceeds the range of standard integer types in most languages (though Python handles arbitrary precision, this approach is still inefficient and not the intended solution). + wrong_approach: "Convert array to integer, add, convert back" + correct_approach: "Simulate digit-by-digit addition with carry" + + - title: Forgetting to Handle Extra Digits from k + description: | + When `k` is larger than you'd expect for the number of digits being processed, the carry can extend beyond the original array length. + + For example, `num = [2,1,5]` and `k = 806`: after processing all three digits, we still have a carry that produces the leading `1` in `1021`. + + Always continue the loop while `k > 0`, not just while there are digits in `num`. + wrong_approach: "Only loop through num's length" + correct_approach: "Continue while k > 0 or digits remain" + + - title: Building Result in Wrong Order + description: | + Since we process digits right-to-left but need the result left-to-right, you must either: + - Insert at the front of a list (less efficient in some languages) + - Append to the end and reverse at the end (more efficient) + + Appending without reversing gives digits in the wrong order. + wrong_approach: "Append digits and forget to reverse" + correct_approach: "Append and reverse, or insert at front" + + key_takeaways: + - "**Digit-by-digit simulation**: Many problems involving large numbers represented as arrays require simulating arithmetic operations digit by digit" + - "**Use modulo for extraction**: `k % 10` extracts the last digit, `k // 10` removes it — a pattern useful for any digit manipulation" + - "**Carry propagation**: The carry can extend beyond the original number's length; always handle this case" + - "**Related problems**: This pattern applies to Add Two Numbers (linked lists), Multiply Strings, and Plus One" + + time_complexity: "O(max(n, log k)). We process each digit of `num` once and each digit of `k` once, where `n` is the length of `num` and `k` has `log₁₀(k)` digits." + space_complexity: "O(max(n, log k)). The result array can have at most one more digit than the larger of the two inputs." + +solutions: + - approach_name: Right-to-Left Addition + is_optimal: true + code: | + def add_to_array_form(num: list[int], k: int) -> list[int]: + result = [] + i = len(num) - 1 # Start from rightmost digit + + # Process while there are digits in num OR carry remains in k + while i >= 0 or k > 0: + if i >= 0: + k += num[i] # Add current digit to k + i -= 1 + + # k % 10 gives the digit for this position + # k // 10 gives the carry for next position + result.append(k % 10) + k //= 10 + + # We built the result right-to-left, so reverse it + return result[::-1] + explanation: | + **Time Complexity:** O(max(n, log k)) — We iterate through all digits of `num` and all digits of `k`. + + **Space Complexity:** O(max(n, log k)) — The result array stores the sum. + + The key insight is using `k` to hold both the number to add and the running carry. At each step, we add the current digit from `num` to `k`, extract the rightmost digit as our result, and let the remaining value carry forward. + + - approach_name: Append to Front (Alternative) + is_optimal: false + code: | + def add_to_array_form(num: list[int], k: int) -> list[int]: + result = [] + i = len(num) - 1 + + while i >= 0 or k > 0: + if i >= 0: + k += num[i] + i -= 1 + + # Insert at front instead of appending and reversing + result.insert(0, k % 10) + k //= 10 + + return result + explanation: | + **Time Complexity:** O(n × max(n, log k)) — Each `insert(0, ...)` operation is O(n) for a list. + + **Space Complexity:** O(max(n, log k)) — Same result array. + + This variant inserts at the front to avoid the final reverse. However, list insertions at index 0 are O(n) operations, making this less efficient for large inputs. The append-and-reverse approach is preferred in Python. diff --git a/backend/data/questions/add-two-integers.yaml b/backend/data/questions/add-two-integers.yaml new file mode 100644 index 0000000..2dfcf38 --- /dev/null +++ b/backend/data/questions/add-two-integers.yaml @@ -0,0 +1,110 @@ +title: Add Two Integers +slug: add-two-integers +difficulty: easy +leetcode_id: 2235 +leetcode_url: https://leetcode.com/problems/add-two-integers/ +categories: + - math +patterns: [] + +description: | + Given two integers `num1` and `num2`, return *the **sum** of the two integers*. + +constraints: | + - `-100 <= num1, num2 <= 100` + +examples: + - input: "num1 = 12, num2 = 5" + output: "17" + explanation: "num1 is 12, num2 is 5, and their sum is 12 + 5 = 17, so 17 is returned." + - input: "num1 = -10, num2 = 4" + output: "-6" + explanation: "num1 + num2 = -6, so -6 is returned." + +explanation: + intuition: | + This problem is a fundamental introduction to arithmetic operations in programming. + + While adding two numbers might seem trivial, it establishes an important foundation: understanding how programming languages handle basic mathematical operations, including **negative numbers** and **integer overflow** considerations. + + Think of it as verifying that you understand the most basic building block of computation. Every complex algorithm ultimately breaks down into simple operations like addition. + + approach: | + **Step 1: Return the sum** + + - Simply use the `+` operator to add `num1` and `num2` + - Return the result directly + +   + + This problem requires no special data structures or algorithms. The built-in addition operator handles all cases including: + - Two positive numbers + - Two negative numbers + - One positive and one negative number + - Zero as either operand + + common_pitfalls: + - title: Overthinking the Problem + description: | + This problem is intentionally simple. Some developers might overthink it, looking for hidden complexity or edge cases that don't exist. + + The constraints (`-100 <= num1, num2 <= 100`) ensure that the sum will always fit within standard integer bounds, so overflow is not a concern here. + wrong_approach: "Complex bit manipulation or handling overflow" + correct_approach: "Simple addition with the + operator" + + - title: Forgetting Negative Numbers + description: | + While Python handles negative numbers seamlessly, it's worth understanding that the `+` operator works correctly with negative operands. + + For example, `num1 = -10` and `num2 = 4` correctly produces `-6` because `-10 + 4 = -6`. + wrong_approach: "Assuming inputs are always positive" + correct_approach: "Trust the + operator to handle all integer cases" + + key_takeaways: + - "**Foundation for complexity**: Even complex algorithms are built from simple operations like addition" + - "**Operator behaviour**: The `+` operator correctly handles positive, negative, and zero values" + - "**Constraint awareness**: Always check constraints — here, overflow is not a concern due to small input bounds" + - "**Simplicity is valid**: Sometimes the simplest solution is the correct one; don't overcomplicate" + + time_complexity: "O(1). Addition is a constant-time operation regardless of the values." + space_complexity: "O(1). No additional data structures are used." + +solutions: + - approach_name: Direct Addition + is_optimal: true + code: | + def sum(num1: int, num2: int) -> int: + # Simply return the sum of the two integers + return num1 + num2 + explanation: | + **Time Complexity:** O(1) — Single arithmetic operation. + + **Space Complexity:** O(1) — No additional memory used. + + This solution directly returns the sum using Python's built-in addition operator. It handles all cases including negative numbers and zero. + + - approach_name: Bit Manipulation (Without + Operator) + is_optimal: false + code: | + def sum(num1: int, num2: int) -> int: + # Handle negative numbers with 32-bit masking + MASK = 0xFFFFFFFF + MAX_INT = 0x7FFFFFFF + + while num2 != 0: + # XOR gives sum without carry + temp = (num1 ^ num2) & MASK + # AND shifted left gives carry + num2 = ((num1 & num2) << 1) & MASK + num1 = temp + + # Handle negative results + return num1 if num1 <= MAX_INT else ~(num1 ^ MASK) + explanation: | + **Time Complexity:** O(1) — Maximum 32 iterations for 32-bit integers. + + **Space Complexity:** O(1) — Only uses a few variables. + + This approach adds two numbers without using the `+` operator by simulating binary addition. XOR computes the sum without carry, while AND followed by left shift computes the carry. We repeat until there's no carry left. + + This is included for educational purposes — it demonstrates how addition works at the bit level. In practice, the direct addition approach is preferred. diff --git a/backend/data/questions/add-two-numbers-ii.yaml b/backend/data/questions/add-two-numbers-ii.yaml new file mode 100644 index 0000000..0ee95e1 --- /dev/null +++ b/backend/data/questions/add-two-numbers-ii.yaml @@ -0,0 +1,222 @@ +title: Add Two Numbers II +slug: add-two-numbers-ii +difficulty: medium +leetcode_id: 445 +leetcode_url: https://leetcode.com/problems/add-two-numbers-ii/ +categories: + - linked-lists + - stack + - math +patterns: + - linkedlist-reversal + +description: | + You are given two **non-empty** linked lists representing two non-negative integers. The most significant digit comes first and each of their nodes contains a single digit. Add the two numbers and return the sum as a linked list. + + You may assume the two numbers do not contain any leading zero, except the number `0` itself. + +constraints: | + - `1 <= number of nodes in each list <= 100` + - `0 <= Node.val <= 9` + - The list represents a number without leading zeros + +examples: + - input: "l1 = [7,2,4,3], l2 = [5,6,4]" + output: "[7,8,0,7]" + explanation: "7243 + 564 = 7807" + - input: "l1 = [2,4,3], l2 = [5,6,4]" + output: "[8,0,7]" + explanation: "243 + 564 = 807" + - input: "l1 = [0], l2 = [0]" + output: "[0]" + explanation: "0 + 0 = 0" + +explanation: + intuition: | + Think of this problem like adding two numbers by hand on paper. When you add numbers manually, you start from the **rightmost digit** (the least significant) and work your way left, carrying over when a column sums to 10 or more. + + The challenge here is that linked lists give us digits in the **opposite order** — most significant digit first. It's like being handed two numbers written left-to-right and being asked to add them without being able to peek at the end. + + There are two main ways to handle this: + + 1. **Reverse the lists** — Convert the problem into "Add Two Numbers" (LeetCode 2), where digits are already in least-significant-first order. Add them, then reverse the result. + + 2. **Use stacks** — Push all digits onto stacks, then pop them off (which gives us least-significant-first order naturally). This avoids modifying the input lists. + + Both approaches transform the problem into one where we process digits from right to left, just like manual addition. + + approach: | + We'll use the **Stack Approach** to solve this without modifying the input lists: + + **Step 1: Push all digits onto stacks** + + - Create two stacks, one for each linked list + - Traverse each list and push every value onto its respective stack + - After this step, the top of each stack holds the least significant digit + +   + + **Step 2: Add digits with carry** + + - Initialise `carry` to `0` and `result` pointer to `None` + - While either stack has elements OR there's a carry: + - Pop from stack 1 if not empty, otherwise use `0` + - Pop from stack 2 if not empty, otherwise use `0` + - Calculate `total = val1 + val2 + carry` + - The digit for this position is `total % 10` + - The new carry is `total // 10` + - Create a new node with this digit and prepend it to the result + +   + + **Step 3: Build result by prepending** + + - Each new node becomes the new head of our result list + - This naturally builds the list in most-significant-first order + - Return the head of the result list + +   + + This approach mirrors how we add numbers by hand, processing from right to left while propagating carries. + + common_pitfalls: + - title: Forgetting the Final Carry + description: | + After processing all digits from both numbers, there might still be a carry left over. For example, adding `[9,9]` and `[1]` gives `99 + 1 = 100`. + + If you exit the loop when both stacks are empty but don't check for a remaining carry, you'll return `[0,0]` instead of `[1,0,0]`. + + Always continue the loop while `carry > 0`, or handle the final carry after the main loop. + wrong_approach: "Exit loop when both stacks are empty" + correct_approach: "Continue while stacks have elements OR carry is non-zero" + + - title: Building the Result List Backwards + description: | + Since we process digits from least significant to most significant, we need to build the result list in reverse order. If you append nodes to the end, you'll get the digits reversed. + + For example, adding `[2,4,3]` and `[5,6,4]`: + - Process: 3+4=7, 4+6=10 (carry 1), 2+5+1=8 + - Appending gives: `[7,0,8]` (wrong!) + - Prepending gives: `[8,0,7]` (correct!) + wrong_approach: "Append new nodes to the tail of result" + correct_approach: "Prepend new nodes to the head of result" + + - title: Modifying Input Lists + description: | + While reversing the input lists works, the problem may expect you to leave them unchanged (especially in production code where the lists might be used elsewhere). + + The stack approach naturally avoids this issue since we only read from the lists, never modify them. + + If you do use the reversal approach, consider reversing the lists back to their original state before returning. + wrong_approach: "Reverse input lists in place without restoring" + correct_approach: "Use stacks, or reverse and restore the input lists" + + key_takeaways: + - "**Stacks reverse order naturally**: When processing order matters, stacks can transform first-to-last into last-to-first without modifying the original data" + - "**Prepending builds lists in reverse**: To construct a list from least to most significant, prepend each new node as the head" + - "**Handle carries beyond input length**: The result can have more digits than either input (e.g., `999 + 1 = 1000`)" + - "**Related problem**: 'Add Two Numbers' (LeetCode 2) is the reverse version where least significant digit comes first" + + time_complexity: "O(n + m). We traverse both lists once to fill the stacks, then process at most `max(n, m) + 1` digits during addition." + space_complexity: "O(n + m). The stacks store all digits from both input lists, plus the result list of similar size." + +solutions: + - approach_name: Stack-Based Addition + is_optimal: true + code: | + class ListNode: + def __init__(self, val=0, next=None): + self.val = val + self.next = next + + def add_two_numbers(l1: ListNode, l2: ListNode) -> ListNode: + # Push all digits onto stacks + stack1, stack2 = [], [] + + while l1: + stack1.append(l1.val) + l1 = l1.next + + while l2: + stack2.append(l2.val) + l2 = l2.next + + carry = 0 + result = None + + # Process digits from least to most significant + while stack1 or stack2 or carry: + # Get digits (0 if stack is empty) + val1 = stack1.pop() if stack1 else 0 + val2 = stack2.pop() if stack2 else 0 + + # Add with carry + total = val1 + val2 + carry + digit = total % 10 + carry = total // 10 + + # Prepend new node to result + new_node = ListNode(digit) + new_node.next = result + result = new_node + + return result + explanation: | + **Time Complexity:** O(n + m) — We traverse each list once to build stacks, then process each digit once. + + **Space Complexity:** O(n + m) — Stacks hold all digits from both lists. + + The stack approach elegantly handles the digit ordering problem without modifying input lists. By popping from stacks, we naturally get digits in right-to-left order for addition. + + - approach_name: Reverse Lists + is_optimal: false + code: | + class ListNode: + def __init__(self, val=0, next=None): + self.val = val + self.next = next + + def add_two_numbers(l1: ListNode, l2: ListNode) -> ListNode: + def reverse(head): + """Reverse a linked list in place.""" + prev = None + while head: + next_node = head.next + head.next = prev + prev = head + head = next_node + return prev + + # Reverse both lists to get least significant digit first + l1 = reverse(l1) + l2 = reverse(l2) + + carry = 0 + dummy = ListNode(0) + current = dummy + + # Add digits from least to most significant + while l1 or l2 or carry: + val1 = l1.val if l1 else 0 + val2 = l2.val if l2 else 0 + + total = val1 + val2 + carry + digit = total % 10 + carry = total // 10 + + current.next = ListNode(digit) + current = current.next + + if l1: + l1 = l1.next + if l2: + l2 = l2.next + + # Reverse result to get most significant digit first + return reverse(dummy.next) + explanation: | + **Time Complexity:** O(n + m) — Three passes: reverse l1, reverse l2, reverse result. + + **Space Complexity:** O(1) extra — Only uses pointers (result list doesn't count as extra space). + + This approach converts the problem to the simpler "Add Two Numbers" variant by reversing the inputs. The tradeoff is that it modifies the input lists (which may or may not be acceptable depending on requirements). diff --git a/backend/data/questions/add-two-numbers.yaml b/backend/data/questions/add-two-numbers.yaml new file mode 100644 index 0000000..645a082 --- /dev/null +++ b/backend/data/questions/add-two-numbers.yaml @@ -0,0 +1,185 @@ +title: Add Two Numbers +slug: add-two-numbers +difficulty: medium +leetcode_id: 2 +leetcode_url: https://leetcode.com/problems/add-two-numbers/ +categories: + - linked-lists + - math +patterns: + - two-pointers + +description: | + You are given two **non-empty** linked lists representing two non-negative integers. The digits are stored in **reverse order**, and each of their nodes contains a single digit. Add the two numbers and return the sum as a linked list. + + You may assume the two numbers do not contain any leading zero, except the number 0 itself. + +constraints: | + - The number of nodes in each linked list is in the range `[1, 100]` + - `0 <= Node.val <= 9` + - It is guaranteed that the list represents a number that does not have leading zeros + +examples: + - input: "l1 = [2,4,3], l2 = [5,6,4]" + output: "[7,0,8]" + explanation: "342 + 465 = 807. Digits are stored in reverse order." + - input: "l1 = [0], l2 = [0]" + output: "[0]" + explanation: "0 + 0 = 0" + - input: "l1 = [9,9,9,9,9,9,9], l2 = [9,9,9,9]" + output: "[8,9,9,9,0,0,0,1]" + explanation: "9999999 + 9999 = 10009998" + +explanation: + intuition: | + The digits are stored in **reverse order**, which is actually perfect for addition! Think about how you add numbers by hand: you start from the rightmost digit (ones place), add, carry the overflow, and move left. Here, the "rightmost" digit is conveniently at the head of the list. + + Think of it like this: we're simulating elementary school addition, digit by digit: + 1. Add corresponding digits from both numbers + 2. If the sum is ≥ 10, keep the ones digit and carry 1 to the next position + 3. Move to the next digit and repeat + + The key insight is that we process both lists in parallel, handling cases where: + - One list is longer than the other (treat missing digits as 0) + - There's a carry after processing both lists (add an extra digit) + + approach: | + We solve this using **Iterative Digit-by-Digit Addition**: + + **Step 1: Initialise the result list and carry** + + - Create a `dummy` head node (simplifies result construction) + - Set `current` to point to `dummy` + - Set `carry = 0` + +   + + **Step 2: Process digits from both lists** + + - While `l1` is not None OR `l2` is not None OR `carry` is non-zero: + - Get `val1 = l1.val if l1 else 0` + - Get `val2 = l2.val if l2 else 0` + - Compute `total = val1 + val2 + carry` + - Create new node with `total % 10` (ones digit) + - Update `carry = total // 10` (tens digit, if any) + - Advance `l1` and `l2` if they have more nodes + +   + + **Step 3: Return the result** + + - Return `dummy.next` (skip the dummy head) + +   + + The condition `l1 or l2 or carry` ensures we handle all cases: unequal lengths and final carry. + + common_pitfalls: + - title: Forgetting the Final Carry + description: | + After both lists are exhausted, there might still be a carry. For example, 99 + 1 = 100 — the final carry creates an extra digit. + + The loop condition `while l1 or l2 or carry` handles this by continuing as long as there's a carry to process. + wrong_approach: "while l1 or l2 — stops even if carry is 1" + correct_approach: "while l1 or l2 or carry" + + - title: Assuming Equal Length Lists + description: | + The two lists can have different lengths. When one list is exhausted, treat its "digits" as 0. + + Use `val1 = l1.val if l1 else 0` to handle this gracefully. + wrong_approach: "while l1 and l2 — stops when either is exhausted" + correct_approach: "Use 0 for missing digits: val = node.val if node else 0" + + - title: Converting to Integers + description: | + Don't convert the lists to integers, add them, and convert back. The numbers can have up to 100 digits, far exceeding standard integer limits in many languages. + + Process digit by digit to avoid overflow issues. + wrong_approach: "num1 = int(digits1); result = num1 + num2" + correct_approach: "Add digit by digit with carry" + + key_takeaways: + - "**Dummy head simplifies linked list construction**: No special case for the first node" + - "**Digit-by-digit avoids overflow**: Works with arbitrarily large numbers" + - "**Handle unequal lengths gracefully**: Treat missing nodes as 0" + - "**Don't forget the final carry**: The loop condition should include `or carry`" + + time_complexity: "O(max(m, n)). We traverse both lists once, processing max(m, n) digits where m and n are the list lengths." + space_complexity: "O(max(m, n)). The result list has at most max(m, n) + 1 nodes (extra node for carry)." + +solutions: + - approach_name: Iterative with Carry + is_optimal: true + code: | + class ListNode: + def __init__(self, val=0, next=None): + self.val = val + self.next = next + + def add_two_numbers(l1: ListNode | None, l2: ListNode | None) -> ListNode | None: + dummy = ListNode() # Dummy head for easy result construction + current = dummy + carry = 0 + + # Continue while there are digits to process or carry to add + while l1 or l2 or carry: + # Get current digits (0 if list is exhausted) + val1 = l1.val if l1 else 0 + val2 = l2.val if l2 else 0 + + # Add digits and carry + total = val1 + val2 + carry + carry = total // 10 # Carry for next position + digit = total % 10 # Current digit + + # Create new node and advance + current.next = ListNode(digit) + current = current.next + + # Advance input pointers if possible + l1 = l1.next if l1 else None + l2 = l2.next if l2 else None + + return dummy.next + explanation: | + **Time Complexity:** O(max(m, n)) — Single pass through both lists. + + **Space Complexity:** O(max(m, n)) — Result list size. + + We simulate manual addition: add corresponding digits plus carry, store the ones digit, propagate the carry. The dummy head eliminates special handling for the first node. The loop continues until both lists are exhausted AND there's no remaining carry. + + - approach_name: Recursive + is_optimal: false + code: | + def add_two_numbers( + l1: ListNode | None, + l2: ListNode | None, + carry: int = 0 + ) -> ListNode | None: + # Base case: both lists empty and no carry + if not l1 and not l2 and not carry: + return None + + # Get current digit values + val1 = l1.val if l1 else 0 + val2 = l2.val if l2 else 0 + + # Calculate sum and create node + total = val1 + val2 + carry + node = ListNode(total % 10) + + # Recurse for remaining digits + node.next = add_two_numbers( + l1.next if l1 else None, + l2.next if l2 else None, + total // 10 + ) + + return node + explanation: | + **Time Complexity:** O(max(m, n)) — Same as iterative. + + **Space Complexity:** O(max(m, n)) — Recursion stack depth. + + Recursive version processes one digit per call. Each call creates a node and recursively handles the rest. Elegant but uses stack space proportional to input length. diff --git a/backend/data/questions/adding-spaces-to-a-string.yaml b/backend/data/questions/adding-spaces-to-a-string.yaml new file mode 100644 index 0000000..62212b8 --- /dev/null +++ b/backend/data/questions/adding-spaces-to-a-string.yaml @@ -0,0 +1,183 @@ +title: Adding Spaces to a String +slug: adding-spaces-to-a-string +difficulty: medium +leetcode_id: 2109 +leetcode_url: https://leetcode.com/problems/adding-spaces-to-a-string/ +categories: + - strings + - arrays + - two-pointers +patterns: + - two-pointers + +description: | + You are given a **0-indexed** string `s` and a **0-indexed** integer array `spaces` that describes the indices in the original string where spaces will be added. Each space should be inserted **before** the character at the given index. + + For example, given `s = "EnjoyYourCoffee"` and `spaces = [5, 9]`, we place spaces before `'Y'` and `'C'`, which are at indices `5` and `9` respectively. Thus, we obtain `"Enjoy Your Coffee"`. + + Return *the modified string **after** the spaces have been added*. + +constraints: | + - `1 <= s.length <= 3 * 10^5` + - `s` consists only of lowercase and uppercase English letters. + - `1 <= spaces.length <= 3 * 10^5` + - `0 <= spaces[i] <= s.length - 1` + - All the values of `spaces` are **strictly increasing**. + +examples: + - input: 's = "LeetcodeHelpsMeLearn", spaces = [8,13,15]' + output: '"Leetcode Helps Me Learn"' + explanation: "The indices 8, 13, and 15 correspond to the characters 'H', 'M', and 'L' in the original string. We place spaces before those characters." + - input: 's = "icodeinpython", spaces = [1,5,7,9]' + output: '"i code in py thon"' + explanation: "The indices 1, 5, 7, and 9 correspond to the characters 'c', 'i', 'p', and 't'. We place spaces before those characters." + - input: 's = "spacing", spaces = [0,1,2,3,4,5,6]' + output: '" s p a c i n g"' + explanation: "We can also place spaces before the first character of the string, resulting in a leading space." + +explanation: + intuition: | + Imagine you're a typist going through a document character by character, and you have a list of positions where you need to insert spaces. Rather than repeatedly inserting spaces into the middle of the string (which would require shifting all subsequent characters), you build a new string from scratch. + + The key insight is that the `spaces` array is **sorted in increasing order**. This means as you traverse the original string from left to right, you can simultaneously traverse the `spaces` array from left to right. When your current position in the string matches the next space position, you add a space to your result before adding the character. + + Think of it like reading a book while holding a bookmark list. As you read each word (character), you glance at your list to see if this is a spot where you need to pause (insert a space). Since both your reading and your list progress in the same direction, you never need to go backwards. + + This **two-pointer technique** allows you to process both the string and the spaces array in a single pass, achieving optimal efficiency. + + approach: | + We solve this using a **Two Pointers** approach: + + **Step 1: Initialise variables** + + - `result`: A list to collect characters (using a list is more efficient than string concatenation in Python) + - `space_idx`: A pointer starting at `0` to track our position in the `spaces` array + +   + + **Step 2: Iterate through each character in the string** + + - For each index `i` and character `char` in the string: + - Check if `space_idx` is within bounds AND `spaces[space_idx] == i` + - If true, append a space `' '` to `result` and increment `space_idx` + - Append the current character `char` to `result` + +   + + **Step 3: Build and return the final string** + + - Join all characters in the `result` list into a single string + - Return the joined string + +   + + This approach works because the `spaces` array is strictly increasing, guaranteeing that each space position is visited exactly once as we traverse the string left to right. + + common_pitfalls: + - title: String Concatenation in a Loop + description: | + A naive approach might use string concatenation (`result += ' '`) inside the loop. In Python, strings are immutable, so each concatenation creates a new string object and copies all previous characters. + + With `n` characters and `m` spaces, this leads to **O((n+m)^2)** time complexity in the worst case. For the given constraints (`n, m <= 3 * 10^5`), this will cause **Time Limit Exceeded (TLE)**. + + Instead, append characters to a list and use `''.join()` at the end, which is O(n+m). + wrong_approach: "String concatenation with += in a loop" + correct_approach: "Append to a list, then join at the end" + + - title: Inserting Spaces In-Place + description: | + Another tempting approach is to insert spaces directly into the string at each position. However, inserting into a string (or even a list) at arbitrary positions requires shifting all subsequent elements. + + With `m` insertions into a string of length `n`, this results in **O(n * m)** time complexity, which is too slow for the given constraints. + + Building a new string by iterating once avoids this overhead entirely. + wrong_approach: "Using str.insert() or list.insert() for each space" + correct_approach: "Build the result by appending characters sequentially" + + - title: Off-by-One Errors with Indices + description: | + The problem states spaces are inserted **before** the character at each index. If you insert **after** the character or misinterpret 0-indexing, your output will be incorrect. + + For example, with `s = "abc"` and `spaces = [1]`, the correct output is `"a bc"` (space before index 1, which is 'b'), not `"ab c"`. + + Always verify your logic with the examples, especially edge cases like `spaces = [0]` which inserts a space at the very beginning. + + key_takeaways: + - "**Two pointers with sorted data**: When both inputs are sorted or one is naturally ordered (like string indices), consider traversing them together with two pointers." + - "**Avoid string concatenation in loops**: In languages with immutable strings (Python, Java, JavaScript), use a list/array/StringBuilder and join at the end for O(n) performance." + - "**Build vs. modify**: When inserting multiple elements into a sequence, building a new sequence is often more efficient than modifying in place." + - "**Leverage problem constraints**: The guarantee that `spaces` is strictly increasing is what makes the single-pass solution possible." + + time_complexity: "O(n + m). We traverse the string of length `n` once, and we traverse the `spaces` array of length `m` once." + space_complexity: "O(n + m). We build a result list containing all `n` original characters plus `m` spaces." + +solutions: + - approach_name: Two Pointers + is_optimal: true + code: | + def addSpaces(s: str, spaces: list[int]) -> str: + result = [] + space_idx = 0 + + for i, char in enumerate(s): + # Check if we need to insert a space before this character + if space_idx < len(spaces) and spaces[space_idx] == i: + result.append(' ') + space_idx += 1 + + # Add the current character + result.append(char) + + # Join all parts into the final string + return ''.join(result) + explanation: | + **Time Complexity:** O(n + m) — We iterate through the string once (n characters) and through the spaces array once (m elements). + + **Space Complexity:** O(n + m) — The result list contains n characters plus m spaces. + + We use two pointers: one implicit (the loop variable `i`) for the string, and one explicit (`space_idx`) for the spaces array. Since both traverse their respective sequences in order, we achieve linear time complexity. + + - approach_name: Pre-calculate and Join + is_optimal: true + code: | + def addSpaces(s: str, spaces: list[int]) -> str: + # Convert spaces to a set for O(1) lookup + space_set = set(spaces) + result = [] + + for i, char in enumerate(s): + if i in space_set: + result.append(' ') + result.append(char) + + return ''.join(result) + explanation: | + **Time Complexity:** O(n + m) — Building the set is O(m), and iterating through the string with O(1) lookups is O(n). + + **Space Complexity:** O(n + m) — The set uses O(m) space, and the result list uses O(n + m) space. + + This alternative uses a hash set for O(1) lookups instead of maintaining a pointer. While asymptotically the same, the two-pointer approach is slightly more efficient in practice due to lower constant factors (no hashing overhead). However, this approach may be easier to understand for some. + + - approach_name: String Slicing + is_optimal: false + code: | + def addSpaces(s: str, spaces: list[int]) -> str: + parts = [] + prev = 0 + + for space_pos in spaces: + # Add the substring from previous position to current space position + parts.append(s[prev:space_pos]) + prev = space_pos + + # Add the remaining part of the string + parts.append(s[prev:]) + + # Join all parts with spaces + return ' '.join(parts) + explanation: | + **Time Complexity:** O(n + m) — Slicing operations are O(k) where k is the slice length, and the total slice lengths sum to n. + + **Space Complexity:** O(n + m) — Storing all the substrings plus the joined result. + + This approach treats the problem as splitting the string at space positions and rejoining with spaces. While elegant, it creates multiple substring objects which may have slightly higher memory overhead than the character-by-character approach. diff --git a/backend/data/questions/adding-two-negabinary-numbers.yaml b/backend/data/questions/adding-two-negabinary-numbers.yaml new file mode 100644 index 0000000..7a4dc5b --- /dev/null +++ b/backend/data/questions/adding-two-negabinary-numbers.yaml @@ -0,0 +1,224 @@ +title: Adding Two Negabinary Numbers +slug: adding-two-negabinary-numbers +difficulty: medium +leetcode_id: 1073 +leetcode_url: https://leetcode.com/problems/adding-two-negabinary-numbers/ +categories: + - arrays + - math +patterns: + - two-pointers + +description: | + Given two numbers `arr1` and `arr2` in base **-2** (negabinary), return the result of adding them together. + + Each number is given in *array format*: as an array of `0`s and `1`s, from most significant bit to least significant bit. For example, `arr = [1,1,0,1]` represents the number `(-2)^3 + (-2)^2 + (-2)^0 = -3`. + + A number `arr` in array format is also guaranteed to have no leading zeros: either `arr == [0]` or `arr[0] == 1`. + + Return *the result of adding `arr1` and `arr2` in the same format*: as an array of `0`s and `1`s with no leading zeros. + +constraints: | + - `1 <= arr1.length, arr2.length <= 1000` + - `arr1[i]` and `arr2[i]` are `0` or `1` + - `arr1` and `arr2` have no leading zeros + +examples: + - input: "arr1 = [1,1,1,1,1], arr2 = [1,0,1]" + output: "[1,0,0,0,0]" + explanation: "arr1 represents 11 in decimal, arr2 represents 5, and the output represents 16." + - input: "arr1 = [0], arr2 = [0]" + output: "[0]" + explanation: "Both inputs are zero, so the sum is zero." + - input: "arr1 = [0], arr2 = [1]" + output: "[1]" + explanation: "0 + 1 = 1." + +explanation: + intuition: | + Think of this like elementary school addition, but with a twist: the base is **-2** instead of 10. + + In standard binary (base 2), when you add two `1`s, you get `2`, which means you write `0` and carry `1` to the next position. But in negabinary (base **-2**), the rules are different because each position alternates between positive and negative powers. + + The key insight is understanding what happens with carries. In base -2: + - Position 0 has weight `(-2)^0 = 1` + - Position 1 has weight `(-2)^1 = -2` + - Position 2 has weight `(-2)^2 = 4` + - Position 3 has weight `(-2)^3 = -8` + + When the sum at a position is `2`, we need to carry. But since the next position has weight `-2`, carrying `1` there adds `-2` to our total. To compensate for the `2` we're removing from the current position, we need `2 = 1 × (-2) + ?`, which gives us `-2 + ? = 2`, so `? = 4`. This means we also need to carry to the position *after* that. Similarly, if the sum is `3`, we write `1` and handle the same carry logic. + + If the sum is `-1` (from a negative carry), we write `1` and carry `1` to the next position, because `-1 = 1 + (-2) × 1`. + + approach: | + We solve this using a **Right-to-Left Simulation** approach, similar to grade school addition: + + **Step 1: Initialise variables** + + - `result`: An empty list to store the output bits (in reverse order) + - `carry`: Set to `0` — this can be `-1`, `0`, or `1` during processing + - `i`, `j`: Pointers starting at the rightmost (least significant) positions of both arrays + +   + + **Step 2: Process from right to left** + + - While there are digits remaining in either array OR there's a non-zero carry: + - Get the current digit from `arr1` (or `0` if exhausted) + - Get the current digit from `arr2` (or `0` if exhausted) + - Compute `sum = digit1 + digit2 + carry` + - Handle the sum based on its value: + - If `sum == 0` or `sum == 1`: write `sum`, carry becomes `0` + - If `sum == 2`: write `0`, carry becomes `-1` (since `2 = 0 + (-2) × (-1)`) + - If `sum == 3`: write `1`, carry becomes `-1` (since `3 = 1 + (-2) × (-1)`) + - If `sum == -1`: write `1`, carry becomes `1` (since `-1 = 1 + (-2) × 1`) + - Move pointers left + +   + + **Step 3: Clean up and return** + + - Reverse the result list (we built it backwards) + - Remove leading zeros, but keep at least one digit + - Return the final array + +   + + The mathematical basis: `sum = result_bit + (-2) × carry`. Rearranging gives us the carry logic for each case. + + common_pitfalls: + - title: Treating It Like Regular Binary + description: | + In regular binary (base 2), when you have a carry of `1` and it propagates, you handle it the same way at each position. But in negabinary, the alternating signs mean the carry behaves differently. + + For example, if `sum = 2`: + - In base 2: write `0`, carry `1` + - In base -2: write `0`, carry `-1` + + The negative carry compensates for the fact that the next position has a negative weight. + wrong_approach: "Using standard binary carry logic" + correct_approach: "Use negabinary-specific carry rules" + + - title: Forgetting the -1 Carry Case + description: | + When a previous carry was `-1` and the current digits are both `0`, the sum becomes `-1`. This is a valid scenario that requires writing `1` and carrying `1` to the next position. + + If you only handle sums of `0`, `1`, `2`, `3`, your algorithm will fail on certain inputs. + wrong_approach: "Only handling sums 0-3" + correct_approach: "Handle sum = -1 by writing 1 with carry 1" + + - title: Not Removing Leading Zeros Properly + description: | + The result might have leading zeros after the addition is complete. For instance, adding `[1,0]` and `[1,0]` in base -2 gives a result that might initially be `[0,1,1,0]` before cleanup. + + However, you must keep at least one digit — if the result is all zeros, return `[0]`, not an empty array. + wrong_approach: "Stripping all zeros" + correct_approach: "Strip leading zeros but keep [0] for zero result" + + key_takeaways: + - "**Negabinary carry rules**: Unlike standard binary, carries in base -2 can be negative and require different handling for sums of 2, 3, and -1" + - "**Formula-based approach**: Use `sum = result_bit + (-2) × carry` to derive the correct bit and carry values mathematically" + - "**Generalises to any base**: This simulation pattern works for addition in any positional numeral system — just adjust the carry logic for the base" + - "**Edge cases matter**: Handle the `-1` sum case and ensure proper leading zero removal" + + time_complexity: "O(max(n, m)). We process each digit of both arrays exactly once, where `n` and `m` are the lengths of `arr1` and `arr2`." + space_complexity: "O(max(n, m)). The result array can be at most a few digits longer than the longer input." + +solutions: + - approach_name: Simulation with Carry + is_optimal: true + code: | + def add_negabinary(arr1: list[int], arr2: list[int]) -> list[int]: + result = [] + carry = 0 + i, j = len(arr1) - 1, len(arr2) - 1 + + # Process digits from right to left + while i >= 0 or j >= 0 or carry != 0: + # Get current digits (0 if index out of bounds) + digit1 = arr1[i] if i >= 0 else 0 + digit2 = arr2[j] if j >= 0 else 0 + + # Calculate sum including carry + total = digit1 + digit2 + carry + + # Determine result bit and new carry based on sum + # Formula: total = result_bit + (-2) * carry + if total == 0: + result.append(0) + carry = 0 + elif total == 1: + result.append(1) + carry = 0 + elif total == 2: + # 2 = 0 + (-2) * (-1) + result.append(0) + carry = -1 + elif total == 3: + # 3 = 1 + (-2) * (-1) + result.append(1) + carry = -1 + elif total == -1: + # -1 = 1 + (-2) * 1 + result.append(1) + carry = 1 + + i -= 1 + j -= 1 + + # Reverse since we built the result backwards + result.reverse() + + # Remove leading zeros but keep at least one digit + while len(result) > 1 and result[0] == 0: + result.pop(0) + + return result + explanation: | + **Time Complexity:** O(max(n, m)) — Single pass through both arrays. + + **Space Complexity:** O(max(n, m)) — Result array stores the output. + + We simulate the addition process from right to left, handling the unique carry rules of base -2. The key insight is that when the sum is 2 or 3, we carry -1 (not +1), and when the sum is -1, we carry +1. + + - approach_name: Convert to Decimal and Back + is_optimal: false + code: | + def add_negabinary(arr1: list[int], arr2: list[int]) -> list[int]: + def to_decimal(arr: list[int]) -> int: + """Convert negabinary array to decimal.""" + result = 0 + for i, bit in enumerate(reversed(arr)): + if bit == 1: + result += (-2) ** i + return result + + def to_negabinary(n: int) -> list[int]: + """Convert decimal to negabinary array.""" + if n == 0: + return [0] + + result = [] + while n != 0: + # Get remainder when dividing by -2 + remainder = n % (-2) + n //= -2 + + # Adjust for negative remainder + if remainder < 0: + remainder += 2 + n += 1 + + result.append(remainder) + + return result[::-1] + + # Convert both to decimal, add, convert back + decimal_sum = to_decimal(arr1) + to_decimal(arr2) + return to_negabinary(decimal_sum) + explanation: | + **Time Complexity:** O(n + m) — Convert both arrays, add, convert back. + + **Space Complexity:** O(max(n, m)) — For the result array. + + This approach converts both negabinary numbers to decimal, performs standard addition, then converts back to negabinary. While conceptually simpler, it requires understanding the conversion algorithms and may have issues with very large numbers in languages without arbitrary precision integers. diff --git a/backend/data/questions/additive-number.yaml b/backend/data/questions/additive-number.yaml new file mode 100644 index 0000000..a4be580 --- /dev/null +++ b/backend/data/questions/additive-number.yaml @@ -0,0 +1,223 @@ +title: Additive Number +slug: additive-number +difficulty: medium +leetcode_id: 306 +leetcode_url: https://leetcode.com/problems/additive-number/ +categories: + - strings + - recursion +patterns: + - backtracking + +description: | + An **additive number** is a string whose digits can form an **additive sequence**. + + A valid **additive sequence** should contain **at least three numbers**. Except for the first two numbers, each subsequent number in the sequence must be the sum of the preceding two. + + Given a string containing only digits, return `true` if it is an **additive number** or `false` otherwise. + + **Note:** Numbers in the additive sequence **cannot** have leading zeros, so sequence `1, 2, 03` or `1, 02, 3` is invalid. + +constraints: | + - `1 <= num.length <= 35` + - `num` consists only of digits. + +examples: + - input: 'num = "112358"' + output: "true" + explanation: "The digits can form an additive sequence: 1, 1, 2, 3, 5, 8. We have 1 + 1 = 2, 1 + 2 = 3, 2 + 3 = 5, 3 + 5 = 8." + - input: 'num = "199100199"' + output: "true" + explanation: "The additive sequence is: 1, 99, 100, 199. We have 1 + 99 = 100, 99 + 100 = 199." + - input: 'num = "1023"' + output: "false" + explanation: "No valid additive sequence can be formed. The sequence 1, 02, 3 is invalid because 02 has a leading zero." + +explanation: + intuition: | + Think of this problem like solving a puzzle where you need to split a string of digits into a sequence that follows the Fibonacci-like property: each number equals the sum of the two before it. + + The key insight is that **once you fix the first two numbers, the entire sequence is determined**. If we pick `num1` and `num2`, then the third number *must* be `num1 + num2`, the fourth *must* be `num2 + (num1 + num2)`, and so on. There's no choice involved after the first two numbers. + + This means we can **enumerate all possible starting pairs** (the first and second numbers), then verify whether the remaining string follows the additive sequence rule. If the remaining digits perfectly match the expected sums all the way to the end, we have a valid additive number. + + The constraint that numbers cannot have leading zeros (except for `"0"` itself) prunes many invalid cases early, making the search efficient despite the nested loops. + + approach: | + We solve this using **Backtracking with String Enumeration**: + + **Step 1: Enumerate all possible first numbers** + + - The first number can be any prefix of the string from index `0` to `i` + - Skip prefixes with leading zeros (except `"0"` itself) + - The first number can be at most half the string length (we need room for at least two more numbers) + +   + + **Step 2: Enumerate all possible second numbers** + + - For each first number ending at index `i`, try all possible second numbers from index `i+1` to `j` + - Apply the same leading zero rule + - The combined length of the first two numbers must leave room for at least one more number + +   + + **Step 3: Validate the remaining sequence** + + - With `num1` and `num2` fixed, compute their sum as a string + - Check if the remaining string starts with this sum + - If yes, continue with `num2` as the new first number and the sum as the new second number + - Repeat until we consume the entire string (success) or find a mismatch (failure) + +   + + **Step 4: Handle large numbers** + + - Since numbers can be up to 35 digits, they may exceed integer limits + - Use string arithmetic or Python's native big integer support + +   + + If any starting pair leads to consuming the entire string, return `true`. If all pairs fail, return `false`. + + common_pitfalls: + - title: Missing the Leading Zero Rule + description: | + A number like `"03"` is invalid because it has a leading zero. However, `"0"` by itself is valid. + + You must check: if a number has more than one digit and starts with `'0'`, skip it immediately. This applies to the first number, second number, and every computed sum in the sequence. + wrong_approach: "Allowing 01, 02, 03 as valid numbers" + correct_approach: "Skip any multi-digit number starting with '0'" + + - title: Integer Overflow + description: | + With strings up to 35 digits, the numbers can far exceed the range of 64-bit integers (`2^63 - 1` has only 19 digits). + + In Python, this isn't an issue due to native arbitrary-precision integers. In other languages, you'd need to implement string addition or use a big integer library. + wrong_approach: "Using int/long for arithmetic in languages with fixed-width integers" + correct_approach: "Use Python's native big integers or implement string addition" + + - title: Not Trying All Starting Pairs + description: | + It's tempting to assume the first and second numbers are single digits, but consider `"199100199"` where the sequence is `1, 99, 100, 199`. The second number has two digits. + + You must enumerate *all* valid lengths for both the first and second numbers. + wrong_approach: "Assuming single-digit starts or fixed-length prefixes" + correct_approach: "Try all valid prefix lengths for both first and second numbers" + + - title: Forgetting the Three-Number Minimum + description: | + An additive sequence must have at least three numbers. A string like `"12"` cannot form a valid sequence even though `1 + 2 = 3` — there's no third number in the string to verify. + + The validation step must consume the entire string *and* have produced at least three numbers. + wrong_approach: "Accepting two-number 'sequences'" + correct_approach: "Ensure the sequence contains at least three numbers" + + key_takeaways: + - "**Constrained search space**: Fixing the first two numbers determines the entire sequence, reducing the problem to enumeration + validation" + - "**Early pruning with leading zeros**: Rejecting invalid prefixes immediately keeps the search efficient" + - "**String comparison for sums**: Instead of computing and converting, check if the string *starts with* the expected sum" + - "**Pattern recognition**: This is a classic backtracking template — enumerate choices, validate, and backtrack on failure" + + time_complexity: "O(n^3). We have O(n^2) choices for the first two numbers, and validating each sequence takes O(n) time for string operations." + space_complexity: "O(n). The recursion depth is at most O(n), and we store intermediate strings of length up to O(n)." + +solutions: + - approach_name: Backtracking with String Enumeration + is_optimal: true + code: | + def is_additive_number(num: str) -> bool: + n = len(num) + + def is_valid(num1: str, num2: str, remaining: str) -> bool: + """Check if remaining string follows additive sequence.""" + if not remaining: + return True # Successfully consumed entire string + + # Compute the expected next number + total = str(int(num1) + int(num2)) + + # Check if remaining starts with this sum + if not remaining.startswith(total): + return False + + # Leading zero check for the sum (already handled by str(int(...))) + # Continue validation with num2 and total as the new pair + return is_valid(num2, total, remaining[len(total):]) + + # Try all possible first numbers (at least 1 digit, at most n-2 to leave room) + for i in range(1, n): + num1 = num[:i] + + # Skip numbers with leading zeros (except "0" itself) + if len(num1) > 1 and num1[0] == '0': + break # All longer prefixes will also have leading zero + + # Try all possible second numbers + for j in range(i + 1, n): + num2 = num[i:j] + + # Skip numbers with leading zeros + if len(num2) > 1 and num2[0] == '0': + break # All longer second numbers starting here have leading zero + + remaining = num[j:] + + # Validate the sequence from this starting pair + if is_valid(num1, num2, remaining): + return True + + return False + explanation: | + **Time Complexity:** O(n^3) — O(n^2) pairs of starting numbers, each validation takes O(n). + + **Space Complexity:** O(n) — Recursion depth and string slicing. + + We enumerate all valid (num1, num2) pairs and recursively verify that the rest of the string follows the additive property. Python's arbitrary-precision integers handle the 35-digit constraint naturally. + + - approach_name: Iterative Validation + is_optimal: false + code: | + def is_additive_number(num: str) -> bool: + n = len(num) + + def check_sequence(i: int, j: int) -> bool: + """Check if starting with num[0:i] and num[i:j] forms valid sequence.""" + num1 = num[:i] + num2 = num[i:j] + + # Leading zero checks + if (len(num1) > 1 and num1[0] == '0') or \ + (len(num2) > 1 and num2[0] == '0'): + return False + + # Iterate through the rest of the string + start = j + while start < n: + # Compute expected sum + total = str(int(num1) + int(num2)) + + # Check if remaining string starts with sum + if not num[start:].startswith(total): + return False + + # Move to next pair + start += len(total) + num1, num2 = num2, total + + return True + + # Enumerate all possible (i, j) pairs + # i = end of first number, j = end of second number + for i in range(1, n - 1): + for j in range(i + 1, n): + if check_sequence(i, j): + return True + + return False + explanation: | + **Time Complexity:** O(n^3) — Same as recursive approach. + + **Space Complexity:** O(n) — String slicing creates new strings. + + This iterative version replaces recursion with a while loop. The logic is identical: enumerate starting pairs, then validate the sequence step by step. Some find this easier to follow than the recursive version. diff --git a/backend/data/questions/advantage-shuffle.yaml b/backend/data/questions/advantage-shuffle.yaml new file mode 100644 index 0000000..2aac7ee --- /dev/null +++ b/backend/data/questions/advantage-shuffle.yaml @@ -0,0 +1,199 @@ +title: Advantage Shuffle +slug: advantage-shuffle +difficulty: medium +leetcode_id: 870 +leetcode_url: https://leetcode.com/problems/advantage-shuffle/ +categories: + - arrays + - sorting +patterns: + - greedy + - two-pointers + +description: | + You are given two integer arrays `nums1` and `nums2` both of the same length. The **advantage** of `nums1` with respect to `nums2` is the number of indices `i` for which `nums1[i] > nums2[i]`. + + Return *any permutation of* `nums1` *that maximises its **advantage** with respect to* `nums2`. + +constraints: | + - `1 <= nums1.length <= 10^5` + - `nums2.length == nums1.length` + - `0 <= nums1[i], nums2[i] <= 10^9` + +examples: + - input: "nums1 = [2,7,11,15], nums2 = [1,10,4,11]" + output: "[2,11,7,15]" + explanation: "With this arrangement: 2 > 1 (advantage), 11 > 10 (advantage), 7 > 4 (advantage), 15 > 11 (advantage). Total advantage = 4." + - input: "nums1 = [12,24,8,32], nums2 = [13,25,32,11]" + output: "[24,32,8,12]" + explanation: "With this arrangement: 24 > 13 (advantage), 32 > 25 (advantage), 8 < 32 (no advantage), 12 > 11 (advantage). Total advantage = 3, which is optimal." + +explanation: + intuition: | + This problem is a classic example of the **greedy assignment strategy**, famously known as "Tian Ji's Horse Racing" from ancient Chinese history. + + Imagine you're playing a card game where you must beat your opponent's cards. For each of their cards, you want to use your *smallest card that can still win*. If none of your remaining cards can win, you sacrifice your weakest card to preserve stronger ones for future rounds. + + Think of it like this: if your opponent plays a 10, and you have cards [8, 11, 15], you should play 11 (the smallest winning card), not 15. This preserves 15 for potentially beating a higher card later. If you can't beat their card at all, throw away your weakest card (8) since it's useless anyway. + + The key insight is that **greedily using the smallest winning card** maximises your overall advantage. Sorting both arrays allows us to efficiently find these optimal pairings. + + approach: | + We solve this using a **Greedy Assignment with Sorting** approach: + + **Step 1: Sort nums1 and create indexed pairs for nums2** + + - Sort `nums1` in ascending order so we can efficiently find the smallest card that beats each opponent + - Create pairs of `(value, original_index)` for `nums2` and sort by value + - This lets us process opponents from weakest to strongest while remembering original positions + +   + + **Step 2: Use two pointers on sorted nums1** + + - Maintain `left` pointer at start (smallest unused card) + - Maintain `right` pointer at end (largest unused card) + - For each opponent value (from smallest to largest): + - If our smallest card (`nums1[left]`) can beat it, use it and advance `left` + - Otherwise, sacrifice our smallest card to the opponent's *strongest* remaining position + +   + + **Step 3: Build the result array** + + - Process `nums2` values from largest to smallest + - For each position, assign either a winning card or a sacrificed card + - The result array is built by placing cards at their corresponding original indices + +   + + This greedy approach works because using the smallest winning card is always optimal — it never hurts to save larger cards for potentially harder opponents. + + common_pitfalls: + - title: Forgetting Original Indices + description: | + The output must be a permutation of `nums1` that corresponds to the *original positions* of `nums2`. Simply sorting both arrays and matching them loses track of where each value in `nums2` originally was. + + For example, if `nums2 = [1, 10, 4, 11]`, after sorting we get `[1, 4, 10, 11]`. If we assign values to this sorted order, we need to map them back: position 0 in sorted → position 0 in original, position 1 in sorted → position 2 in original, etc. + wrong_approach: "Sort both arrays and match directly" + correct_approach: "Track original indices when sorting nums2" + + - title: Using Largest Card Instead of Smallest Winner + description: | + A tempting but suboptimal strategy is to always use your largest card to guarantee a win. This wastes strong cards on weak opponents. + + For example, if `nums1 = [5, 10, 15]` and `nums2 = [3, 8, 14]`: + - Using largest: 15 beats 3, 10 beats 8, 5 < 14 → 2 wins + - Using smallest winner: 5 beats 3, 10 beats 8, 15 beats 14 → 3 wins + + Always use the *smallest* card that can still win. + wrong_approach: "Match largest available to each opponent" + correct_approach: "Use smallest card that can beat each opponent" + + - title: Sacrificing Wrong Cards + description: | + When you can't beat an opponent, you should sacrifice your *weakest* card, not a random one. Weak cards have the least potential for future wins, so throwing them away preserves your winning potential. + + If you can't beat any remaining opponent with your smallest card, that card is essentially worthless — use it against the opponent's strongest value to get it out of the way. + wrong_approach: "Randomly assign losing cards" + correct_approach: "Sacrifice smallest card when no win is possible" + + key_takeaways: + - "**Greedy assignment pattern**: When matching elements from two arrays, use the smallest value that satisfies the condition" + - "**Sacrifice strategy**: When you can't win, sacrifice your weakest asset to preserve stronger ones" + - "**Index tracking**: When the output order matters, pair values with their original indices before sorting" + - "**Classic problem**: This is Tian Ji's Horse Racing strategy — a foundational greedy pattern that appears in many variations" + + time_complexity: "O(n log n). Sorting both arrays dominates the complexity. The two-pointer assignment is O(n)." + space_complexity: "O(n). We store the sorted `nums1`, indexed pairs for `nums2`, and the result array." + +solutions: + - approach_name: Greedy with Two Pointers + is_optimal: true + code: | + def advantage_count(nums1: list[int], nums2: list[int]) -> list[int]: + n = len(nums1) + # Sort nums1 ascending - we'll pick smallest winning cards + sorted_nums1 = sorted(nums1) + + # Pair nums2 values with original indices, sort by value descending + # Process strongest opponents first + indexed_nums2 = sorted(enumerate(nums2), key=lambda x: -x[1]) + + result = [0] * n + left, right = 0, n - 1 + + for orig_idx, val in indexed_nums2: + # Can our largest remaining card beat this opponent? + if sorted_nums1[right] > val: + # Yes - use it (greedy: use strongest when facing strongest) + result[orig_idx] = sorted_nums1[right] + right -= 1 + else: + # No - sacrifice our weakest card here + result[orig_idx] = sorted_nums1[left] + left += 1 + + return result + explanation: | + **Time Complexity:** O(n log n) — Dominated by sorting operations. + + **Space Complexity:** O(n) — For the sorted array and indexed pairs. + + We process opponents from strongest to weakest. For each opponent, if our strongest remaining card can beat them, we use it. Otherwise, we sacrifice our weakest card. This ensures we never waste a strong card when a weaker one would suffice, and we throw away cards that have no winning potential. + + - approach_name: Greedy with Multiset + is_optimal: true + code: | + from sortedcontainers import SortedList + + def advantage_count(nums1: list[int], nums2: list[int]) -> list[int]: + # Use sorted container for efficient removal and search + available = SortedList(nums1) + result = [] + + for target in nums2: + # Find index of smallest value greater than target + idx = available.bisect_right(target) + + if idx < len(available): + # Found a winning card - use smallest one that wins + result.append(available.pop(idx)) + else: + # No card can win - sacrifice the smallest + result.append(available.pop(0)) + + return result + explanation: | + **Time Complexity:** O(n log n) — Each of the n iterations does O(log n) search and removal. + + **Space Complexity:** O(n) — For the sorted container. + + This approach processes `nums2` in its original order, finding the smallest winning card for each position. Using a sorted container allows O(log n) operations for finding and removing elements. This is conceptually cleaner but requires an external library in Python. + + - approach_name: Brute Force with Backtracking + is_optimal: false + code: | + def advantage_count(nums1: list[int], nums2: list[int]) -> list[int]: + from itertools import permutations + + def count_advantage(perm): + return sum(1 for a, b in zip(perm, nums2) if a > b) + + best_perm = nums1[:] + best_count = count_advantage(best_perm) + + # Try all permutations (only feasible for tiny inputs) + for perm in permutations(nums1): + count = count_advantage(perm) + if count > best_count: + best_count = count + best_perm = list(perm) + + return best_perm + explanation: | + **Time Complexity:** O(n! * n) — Checking all n! permutations, each taking O(n) to evaluate. + + **Space Complexity:** O(n) — For storing permutations. + + This brute force approach tries every possible arrangement of `nums1` and picks the one with maximum advantage. While correct, it's completely impractical for n > 10 due to factorial growth. Included to illustrate why greedy optimisation is essential. diff --git a/backend/data/questions/airplane-seat-assignment-probability.yaml b/backend/data/questions/airplane-seat-assignment-probability.yaml new file mode 100644 index 0000000..235c9cd --- /dev/null +++ b/backend/data/questions/airplane-seat-assignment-probability.yaml @@ -0,0 +1,200 @@ +title: Airplane Seat Assignment Probability +slug: airplane-seat-assignment-probability +difficulty: medium +leetcode_id: 1227 +leetcode_url: https://leetcode.com/problems/airplane-seat-assignment-probability/ +categories: + - math + - dynamic-programming +patterns: + - dynamic-programming + +description: | + `n` passengers board an airplane with exactly `n` seats. The first passenger has lost their ticket and picks a seat randomly. After that, the rest of the passengers will: + + - Take their own seat if it is still available, and + - Pick other seats randomly when they find their seat occupied + + Return *the probability that the* `n`th *person gets their own seat*. + +constraints: | + - `1 <= n <= 10^5` + +examples: + - input: "n = 1" + output: "1.00000" + explanation: "The first person can only get the first seat (which is also their own seat)." + - input: "n = 2" + output: "0.50000" + explanation: "The second person has a probability of 0.5 to get the second seat (when the first person randomly picks seat 1 instead of seat 2)." + +explanation: + intuition: | + This problem appears complex at first — with randomness cascading through passengers, it seems like we'd need to track many probability branches. But there's a beautiful mathematical insight that simplifies everything. + + **The Key Insight:** Focus on *when the chaos ends*. The randomness only continues until someone sits in either **seat 1** (the first passenger's assigned seat) or **seat n** (the last passenger's seat). Every other seat assignment just passes the problem along. + + Think of it like this: imagine you're passenger `n` waiting to board. The only outcomes that matter to you are: + - Someone eventually sits in seat 1 → the chain reaction ends, and your seat remains free + - Someone eventually sits in seat `n` → you lose your seat + + Here's the magic: at every step where a passenger must choose randomly, seats 1 and `n` are **equally likely** to be chosen (directly or indirectly). This symmetry means the probability is always **50/50** — regardless of how many passengers there are! + + The only exception is `n = 1`, where the first passenger *is* the last passenger, so they get their own seat with probability 1. + + approach: | + We can solve this with pure **mathematical reasoning**: + + **Step 1: Understand the recursion** + + - Let `f(n)` = probability that passenger `n` gets their seat + - The first passenger picks randomly from `n` seats + +   + + **Step 2: Analyse the three cases when passenger 1 picks** + + - **Picks seat 1** (probability `1/n`): Everyone else gets their own seat. Passenger `n` gets seat `n`. ✓ + - **Picks seat n** (probability `1/n`): Passenger `n` loses their seat immediately. ✗ + - **Picks seat k** where `1 < k < n` (probability `(n-2)/n`): Passengers 2 through `k-1` get their seats. Passenger `k` faces the same problem with `n-k+1` remaining "uncertain" seats. + +   + + **Step 3: Recognise the symmetry** + + - In case 3, passenger `k` becomes a "new first passenger" for the subproblem + - The recursive structure shows that seats 1 and `n` always have equal probability of being taken + - This gives us: `f(n) = 1/n + (n-2)/n × f(smaller subproblem)` + +   + + **Step 4: Solve the recurrence** + + - Working through the math (or computing small cases), we find `f(n) = 0.5` for all `n >= 2` + - For `n = 1`: the only passenger gets their own seat, so `f(1) = 1` + +   + + The solution becomes trivially simple: return `1.0` if `n == 1`, else return `0.5`. + + common_pitfalls: + - title: Overcomplicating with Simulation + description: | + A natural instinct is to simulate the boarding process with random number generation and run many trials to estimate the probability. + + While this "Monte Carlo" approach works conceptually, it's: + - Slow and imprecise (needs millions of trials for accuracy) + - Unnecessary once you understand the mathematical pattern + - Missing the elegant insight that makes this problem beautiful + + The closed-form solution runs in O(1) time and is exact. + wrong_approach: "Simulate boarding millions of times" + correct_approach: "Use mathematical insight for O(1) solution" + + - title: Building a Full DP Table + description: | + You might try to build a DP solution computing `f(2)`, `f(3)`, ..., `f(n)` iteratively. + + While this works and gives the right answer, it's O(n) time and O(1) or O(n) space — far more than needed. + + Once you prove mathematically that `f(n) = 0.5` for all `n >= 2`, you can skip all computation. + wrong_approach: "Build DP table from 2 to n" + correct_approach: "Return 0.5 directly (after proving the pattern)" + + - title: Forgetting the n = 1 Edge Case + description: | + When `n = 1`, the first passenger is also the last passenger. They pick their own seat (the only seat available), so probability is `1.0`, not `0.5`. + + This is the only case that breaks the "always 0.5" pattern. + + key_takeaways: + - "**Look for symmetry**: When two outcomes seem equally likely at every decision point, the final probabilities are often equal" + - "**Recursive problems can have closed-form solutions**: Don't stop at a working recurrence — ask if there's a pattern" + - "**Brainteasers reward insight over brute force**: This problem tests mathematical reasoning, not coding skill" + - "**The answer 0.5 is counterintuitive**: With 100 passengers, you'd expect the last person's chances to be tiny — but symmetry saves them" + + time_complexity: "O(1). We return a constant value based on a simple condition." + space_complexity: "O(1). No additional data structures are used." + +solutions: + - approach_name: Mathematical Insight + is_optimal: true + code: | + def nth_person_gets_nth_seat(n: int) -> float: + # Edge case: only one passenger, they get their own seat + if n == 1: + return 1.0 + + # For n >= 2, symmetry guarantees 50% probability + # Seats 1 and n are equally likely to be taken at any decision point + return 0.5 + explanation: | + **Time Complexity:** O(1) — Single comparison and return. + + **Space Complexity:** O(1) — No additional memory used. + + This solution leverages the mathematical proof that for any `n >= 2`, the probability is exactly 0.5. The symmetry between seat 1 and seat n at every random choice guarantees this elegant result. + + - approach_name: Dynamic Programming + is_optimal: false + code: | + def nth_person_gets_nth_seat(n: int) -> float: + # f(k) = probability last person gets seat with k passengers + # Base case: with 1 passenger, they get their seat + if n == 1: + return 1.0 + + # f(n) = 1/n + sum over k=2 to n-1 of (1/n * f(n-k+1)) + # This simplifies to f(n) = 1/n + (1/n) * sum of f(2) to f(n-1) + + # We can compute iteratively, but pattern emerges: f(k) = 0.5 for k >= 2 + # Let's verify with actual DP + dp = [0.0] * (n + 1) + dp[1] = 1.0 + + for k in range(2, n + 1): + # Probability = 1/k (picks seat 1) + sum of subproblems + prob = 1.0 / k # First passenger picks their own seat + for j in range(2, k): + # First passenger picks seat j, creating subproblem of size k-j+1 + prob += (1.0 / k) * dp[k - j + 1] + dp[k] = prob + + return dp[n] + explanation: | + **Time Complexity:** O(n²) — Nested loops to compute each DP state. + + **Space Complexity:** O(n) — DP array of size n+1. + + This solution explicitly computes the recurrence relation. While correct, it's far slower than necessary. Running this reveals that `dp[k] = 0.5` for all `k >= 2`, validating the O(1) mathematical solution. + + - approach_name: Recursive with Memoisation + is_optimal: false + code: | + from functools import lru_cache + + def nth_person_gets_nth_seat(n: int) -> float: + @lru_cache(maxsize=None) + def probability(k: int) -> float: + # Base case: single passenger always gets their seat + if k == 1: + return 1.0 + + # First passenger picks seat 1: everyone gets their seat (prob 1/k) + # First passenger picks seat k: last person loses (prob 1/k, contributes 0) + # First passenger picks seat j (2 <= j < k): subproblem of size k-j+1 + result = 1.0 / k # Picks seat 1 + + for j in range(2, k): + # Picks seat j, passenger j becomes "new first passenger" + result += (1.0 / k) * probability(k - j + 1) + + return result + + return probability(n) + explanation: | + **Time Complexity:** O(n²) — Each subproblem computed once, but summing takes O(n) per state. + + **Space Complexity:** O(n) — Recursion stack and memoisation cache. + + This recursive approach directly models the problem's structure. Memoisation prevents recomputation. Like the DP solution, it confirms the 0.5 pattern but is unnecessarily complex for the final answer. diff --git a/backend/data/questions/alert-using-same-key.yaml b/backend/data/questions/alert-using-same-key.yaml new file mode 100644 index 0000000..e07b82c --- /dev/null +++ b/backend/data/questions/alert-using-same-key.yaml @@ -0,0 +1,200 @@ +title: Alert Using Same Key-Card Three or More Times in a One Hour Period +slug: alert-using-same-key +difficulty: medium +leetcode_id: 1604 +leetcode_url: https://leetcode.com/problems/alert-using-same-key-card-three-or-more-times-in-a-one-hour-period/ +categories: + - arrays + - hash-tables + - strings + - sorting +patterns: + - sliding-window + +description: | + LeetCode company workers use key-cards to unlock office doors. Each time a worker uses their key-card, the security system saves the worker's name and the time when it was used. The system emits an **alert** if any worker uses the key-card **three or more times** in a one-hour period. + + You are given a list of strings `keyName` and `keyTime` where `[keyName[i], keyTime[i]]` corresponds to a person's name and the time when their key-card was used **in a single day**. + + Access times are given in the **24-hour time format "HH:MM"**, such as `"23:51"` and `"09:49"`. + + Return *a list of unique worker names who received an alert for frequent keycard use*. Sort the names in **ascending order alphabetically**. + + Notice that `"10:00"` - `"11:00"` is considered to be within a one-hour period, while `"22:51"` - `"23:52"` is not considered to be within a one-hour period. + +constraints: | + - `1 <= keyName.length, keyTime.length <= 10^5` + - `keyName.length == keyTime.length` + - `keyTime[i]` is in the format **"HH:MM"** + - `[keyName[i], keyTime[i]]` is **unique** + - `1 <= keyName[i].length <= 10` + - `keyName[i]` contains only lowercase English letters + +examples: + - input: 'keyName = ["daniel","daniel","daniel","luis","luis","luis","luis"], keyTime = ["10:00","10:40","11:00","09:00","11:00","13:00","15:00"]' + output: '["daniel"]' + explanation: '"daniel" used the keycard 3 times in a one-hour period ("10:00", "10:40", "11:00").' + - input: 'keyName = ["alice","alice","alice","bob","bob","bob","bob"], keyTime = ["12:01","12:00","18:00","21:00","21:20","21:30","23:00"]' + output: '["bob"]' + explanation: '"bob" used the keycard 3 times in a one-hour period ("21:00", "21:20", "21:30").' + +explanation: + intuition: | + Imagine you're a security guard monitoring a list of badge swipes throughout the day. Your task is to flag anyone who swiped their badge three or more times within any 60-minute window. + + The key insight is that checking *every possible* one-hour window for each person would be inefficient. Instead, if we **sort each person's access times chronologically**, we can use a clever observation: for any three consecutive swipes, if the 1st and 3rd are within 60 minutes of each other, then all three are within a one-hour period. + + Think of it like this: if you line up someone's swipes in time order and check every group of three consecutive swipes, you only need to verify that the earliest and latest in each group are at most 60 minutes apart. This works because sorting guarantees the middle swipe is between them. + + This transforms a potentially complex sliding window problem into a simple linear scan after sorting. + + approach: | + We solve this using a **Hash Map + Sorting** approach: + + **Step 1: Group access times by person** + + - Create a hash map where each key is a worker's name + - Each value is a list of their access times converted to minutes (for easy comparison) + - Convert "HH:MM" to total minutes: `hours * 60 + minutes` + +   + + **Step 2: Sort each person's access times** + + - For each worker, sort their list of access times in ascending order + - Sorting enables the consecutive-triplet checking strategy + +   + + **Step 3: Check consecutive triplets** + + - For each worker with 3 or more swipes, iterate through their sorted times + - For each index `i` from `0` to `n-3`, check if `times[i+2] - times[i] <= 60` + - If any triplet satisfies this condition, add the worker to the alert list + +   + + **Step 4: Return sorted result** + + - Sort the list of flagged workers alphabetically + - Return the result + + common_pitfalls: + - title: Not Sorting Access Times + description: | + Without sorting, you cannot use the consecutive-triplet approach. You'd need to check all possible combinations of three swipes, leading to O(n^3) per person. + + For example, if Alice's swipes are recorded as `["12:01", "12:00", "18:00"]`, checking consecutive elements without sorting would miss that `"12:00"` and `"12:01"` are adjacent in time. + wrong_approach: "Checking triplets in the original unsorted order" + correct_approach: "Sort times first, then check consecutive triplets" + + - title: Incorrect One-Hour Window Definition + description: | + The problem states `"10:00"` to `"11:00"` is within one hour (inclusive), meaning the difference should be `<= 60` minutes, not `< 60`. + + However, `"22:51"` to `"23:52"` (61 minutes apart) is NOT within one hour. Be careful with the boundary condition. + wrong_approach: "Using strict less than (< 60)" + correct_approach: "Use less than or equal (<= 60)" + + - title: Forgetting to Handle Multiple Days + description: | + The problem specifies all times are within a **single day**, so there's no midnight wraparound to handle. If you try to account for times spanning midnight (like `"23:30"` to `"00:30"`), you'll introduce bugs. + + Trust the problem constraints and keep the logic simple. + wrong_approach: "Adding complex midnight wraparound logic" + correct_approach: "Treat all times as within a single day (no wraparound)" + + - title: Time Conversion Errors + description: | + When converting `"HH:MM"` to minutes, ensure you parse the string correctly. A common mistake is treating `"09:05"` incorrectly or forgetting to multiply hours by 60. + + The correct formula is: `int(time[:2]) * 60 + int(time[3:])` or split by `":"` and convert. + wrong_approach: "Incorrect string parsing" + correct_approach: "Split by colon and compute hours * 60 + minutes" + + key_takeaways: + - "**Grouping + sorting pattern**: When analysing events per entity over time, group by entity and sort by timestamp first" + - "**Consecutive element insight**: After sorting, checking windows of size `k` only requires comparing element `i` with element `i+k-1`" + - "**Time conversion simplifies comparison**: Converting `HH:MM` to total minutes makes arithmetic comparison straightforward" + - "**This pattern applies to**: Log analysis, session tracking, rate limiting, and any problem involving time-based event grouping" + + time_complexity: "O(n log n). We iterate through all `n` entries once to group them, then sort each person's times. In the worst case (one person with all entries), sorting dominates at O(n log n)." + space_complexity: "O(n). We store all access times in the hash map, and the result list can contain up to O(n) unique names in the worst case." + +solutions: + - approach_name: Hash Map with Sorting + is_optimal: true + code: | + def alertNames(keyName: list[str], keyTime: list[str]) -> list[str]: + from collections import defaultdict + + # Group access times by person, converting to minutes + access_times = defaultdict(list) + for name, time in zip(keyName, keyTime): + # Convert "HH:MM" to total minutes for easy comparison + hours, minutes = int(time[:2]), int(time[3:]) + access_times[name].append(hours * 60 + minutes) + + result = [] + + for name, times in access_times.items(): + # Sort times chronologically + times.sort() + + # Check if any 3 consecutive swipes are within 60 minutes + for i in range(len(times) - 2): + # If the gap between 1st and 3rd is <= 60, all 3 are in one hour + if times[i + 2] - times[i] <= 60: + result.append(name) + break # No need to check further for this person + + # Return names sorted alphabetically + return sorted(result) + explanation: | + **Time Complexity:** O(n log n) — Grouping is O(n), sorting all times is O(n log n) in aggregate, and checking triplets is O(n). + + **Space Complexity:** O(n) — Hash map stores all access times. + + We group by name, sort each person's times, and check consecutive triplets. The key insight is that after sorting, if `times[i+2] - times[i] <= 60`, then the three swipes at indices `i`, `i+1`, and `i+2` all occurred within a one-hour window. + + - approach_name: Brute Force (Check All Triplets) + is_optimal: false + code: | + def alertNames(keyName: list[str], keyTime: list[str]) -> list[str]: + from collections import defaultdict + + # Group access times by person + access_times = defaultdict(list) + for name, time in zip(keyName, keyTime): + hours, minutes = int(time[:2]), int(time[3:]) + access_times[name].append(hours * 60 + minutes) + + result = [] + + for name, times in access_times.items(): + n = len(times) + found = False + + # Check all combinations of 3 times + for i in range(n): + if found: + break + for j in range(i + 1, n): + if found: + break + for k in range(j + 1, n): + # Check if all three are within 60 minutes + max_time = max(times[i], times[j], times[k]) + min_time = min(times[i], times[j], times[k]) + if max_time - min_time <= 60: + result.append(name) + found = True + break + + return sorted(result) + explanation: | + **Time Complexity:** O(n^3) — For each person, we check all combinations of 3 times. + + **Space Complexity:** O(n) — Hash map stores all access times. + + This brute force approach checks every possible triplet of access times for each person. While correct, it's extremely slow for large inputs. With `n = 10^5` entries for a single person, this would require checking up to 10^15 combinations — far too slow. This illustrates why the sorted consecutive-triplet approach is necessary. diff --git a/backend/data/questions/all-divisions-with-highest-score-of-binary-array.yaml b/backend/data/questions/all-divisions-with-highest-score-of-binary-array.yaml new file mode 100644 index 0000000..b1f8764 --- /dev/null +++ b/backend/data/questions/all-divisions-with-highest-score-of-binary-array.yaml @@ -0,0 +1,186 @@ +title: All Divisions With the Highest Score of a Binary Array +slug: all-divisions-with-highest-score-of-binary-array +difficulty: medium +leetcode_id: 2155 +leetcode_url: https://leetcode.com/problems/all-divisions-with-the-highest-score-of-a-binary-array/ +categories: + - arrays +patterns: + - prefix-sum + +description: | + You are given a **0-indexed** binary array `nums` of length `n`. `nums` can be divided at index `i` (where `0 <= i <= n`) into two arrays (possibly empty) `nums_left` and `nums_right`: + + - `nums_left` has all the elements of `nums` between index `0` and `i - 1` **(inclusive)**, while `nums_right` has all the elements of `nums` between index `i` and `n - 1` **(inclusive)**. + - If `i == 0`, `nums_left` is **empty**, while `nums_right` has all the elements of `nums`. + - If `i == n`, `nums_left` has all the elements of `nums`, while `nums_right` is **empty**. + + The **division score** of an index `i` is the **sum** of the number of `0`'s in `nums_left` and the number of `1`'s in `nums_right`. + + Return *all distinct indices that have the **highest** possible **division score***. You may return the answer in **any order**. + +constraints: | + - `n == nums.length` + - `1 <= n <= 10^5` + - `nums[i]` is either `0` or `1` + +examples: + - input: "nums = [0,0,1,0]" + output: "[2,4]" + explanation: "Division at index 2: nums_left is [0,0], nums_right is [1,0]. Score is 2 + 1 = 3. Division at index 4: nums_left is [0,0,1,0], nums_right is []. Score is 3 + 0 = 3. Both indices achieve the highest score of 3." + - input: "nums = [0,0,0]" + output: "[3]" + explanation: "Division at index 3: nums_left is [0,0,0], nums_right is []. Score is 3 + 0 = 3. Only index 3 achieves the highest score." + - input: "nums = [1,1]" + output: "[0]" + explanation: "Division at index 0: nums_left is [], nums_right is [1,1]. Score is 0 + 2 = 2. Only index 0 achieves the highest score." + +explanation: + intuition: | + Imagine you're drawing a vertical line through an array, dividing it into left and right portions. You want to maximise a score where you **count zeros on the left** and **ones on the right**. + + The key insight is that as you move the division point from left to right: + - When you encounter a `0`, moving it from right to left **increases** your score (one more zero on the left) + - When you encounter a `1`, moving it from right to left **decreases** your score (one fewer one on the right) + + Think of it like this: start with the division at index `0` (everything on the right). Your initial score is the total count of `1`s in the array. As you slide the division point rightward, each `0` you pass adds `+1` to your score, and each `1` you pass subtracts `-1` from your score. + + This means you don't need to recalculate counts from scratch at each position — you can compute the score incrementally in a single pass using prefix sum logic. + + approach: | + We solve this using a **Single Pass with Running Score**: + + **Step 1: Calculate the initial score** + + - Count all `1`s in the array — this is the score when the division is at index `0` (empty left, full right) + - `ones_right`: Total count of `1`s in the array + - `zeros_left`: Initially `0` since left portion is empty + +   + + **Step 2: Track the maximum and collect indices** + + - Initialise `max_score` to the initial score (at index `0`) + - Initialise `result` list with `[0]` since index `0` starts with the max score + +   + + **Step 3: Iterate through possible division points** + + - For each index `i` from `1` to `n`: + - Look at the element that just moved from right to left: `nums[i-1]` + - If `nums[i-1] == 0`: increment `zeros_left` (score increases by 1) + - If `nums[i-1] == 1`: decrement `ones_right` (score decreases by 1) + - Calculate `current_score = zeros_left + ones_right` + - If `current_score > max_score`: update `max_score` and reset `result` to `[i]` + - If `current_score == max_score`: append `i` to `result` + +   + + **Step 4: Return the result** + + - Return the list of all indices that achieved `max_score` + + common_pitfalls: + - title: Recalculating Counts at Each Position + description: | + A naive approach recounts zeros and ones for each division point: + - For each `i`, count zeros in `nums[0:i]` and ones in `nums[i:n]` + + This results in **O(n^2) time complexity**. With `n <= 10^5`, this means up to 10 billion operations — too slow. + + Instead, recognise that moving the division by one position only changes the count by one element. Use running counters that update in O(1) time. + wrong_approach: "Nested loops counting zeros and ones for each division" + correct_approach: "Single pass updating running counters incrementally" + + - title: Off-by-One Errors with Division Points + description: | + There are `n + 1` valid division points (indices `0` through `n`), not `n`. + + - Index `0`: empty left, full right + - Index `n`: full left, empty right + + When iterating, make sure to include the final division point at index `n`. A common mistake is iterating only to `n - 1`. + wrong_approach: "Iterating from 0 to n-1" + correct_approach: "Iterating from 0 to n (inclusive)" + + - title: Forgetting to Handle Ties + description: | + The problem asks for **all** indices with the highest score, not just one. + + When you find a score equal to the current maximum, you must append the index to your result list rather than replace it. Use separate logic for "new maximum found" versus "tied with maximum". + + key_takeaways: + - "**Prefix sum pattern**: When a score depends on counts of elements on either side of a moving boundary, track running totals instead of recounting" + - "**Incremental updates**: Moving a boundary by one position changes the score by exactly one element — exploit this for O(1) updates" + - "**Tracking multiple maxima**: When collecting all indices that achieve a maximum, distinguish between finding a new max (reset list) and tying (append to list)" + - "**Division point indexing**: Remember that dividing an array of length `n` creates `n + 1` possible division points" + + time_complexity: "O(n). We make one pass to count ones, then one pass to compute scores at each division point." + space_complexity: "O(1) auxiliary space (excluding the output list). We only use a few integer variables for tracking counts and scores." + +solutions: + - approach_name: Single Pass with Running Score + is_optimal: true + code: | + def max_score_indices(nums: list[int]) -> list[int]: + n = len(nums) + + # Initial score: division at index 0 (empty left, full right) + # Score = zeros on left (0) + ones on right (total ones) + ones_right = sum(nums) # Count all 1s + zeros_left = 0 + + max_score = ones_right # Score at division index 0 + result = [0] # Index 0 starts with the max + + # Try each division point from 1 to n + for i in range(1, n + 1): + # Element nums[i-1] moves from right to left + if nums[i - 1] == 0: + zeros_left += 1 # One more zero on the left + else: + ones_right -= 1 # One fewer one on the right + + current_score = zeros_left + ones_right + + if current_score > max_score: + # Found a new maximum - reset the result list + max_score = current_score + result = [i] + elif current_score == max_score: + # Tied with maximum - add to result list + result.append(i) + + return result + explanation: | + **Time Complexity:** O(n) — One pass to count ones, one pass to compute scores. + + **Space Complexity:** O(1) auxiliary — Only integer variables for counters (output list not counted). + + We start with the division at index 0 and incrementally update our counters as we move the boundary rightward. Each element that crosses from right to left either adds 1 (if it's a zero) or subtracts 1 (if it's a one) from the score. + + - approach_name: Brute Force + is_optimal: false + code: | + def max_score_indices(nums: list[int]) -> list[int]: + n = len(nums) + scores = [] + + # Calculate score for each division point + for i in range(n + 1): + # Count zeros in left portion [0, i) + zeros_left = nums[:i].count(0) + # Count ones in right portion [i, n) + ones_right = nums[i:].count(1) + scores.append(zeros_left + ones_right) + + # Find maximum score and all indices that achieve it + max_score = max(scores) + return [i for i, score in enumerate(scores) if score == max_score] + explanation: | + **Time Complexity:** O(n^2) — For each of n+1 division points, we count elements in O(n) time. + + **Space Complexity:** O(n) — We store all n+1 scores. + + This approach directly implements the problem definition but is too slow for large inputs. Each `count()` call scans a portion of the array, leading to quadratic time. Included to illustrate why incremental counting is necessary. diff --git a/backend/data/questions/all-elements-in-two-binary-search-trees.yaml b/backend/data/questions/all-elements-in-two-binary-search-trees.yaml new file mode 100644 index 0000000..62c64d4 --- /dev/null +++ b/backend/data/questions/all-elements-in-two-binary-search-trees.yaml @@ -0,0 +1,214 @@ +title: All Elements in Two Binary Search Trees +slug: all-elements-in-two-binary-search-trees +difficulty: medium +leetcode_id: 1305 +leetcode_url: https://leetcode.com/problems/all-elements-in-two-binary-search-trees/ +categories: + - trees + - sorting +patterns: + - tree-traversal + - two-pointers + +description: | + Given two binary search trees `root1` and `root2`, return *a list containing all the integers from both trees sorted in **ascending** order*. + +constraints: | + - The number of nodes in each tree is in the range `[0, 5000]` + - `-10^5 <= Node.val <= 10^5` + +examples: + - input: "root1 = [2,1,4], root2 = [1,0,3]" + output: "[0,1,1,2,3,4]" + explanation: "Tree 1 contains [1,2,4] and tree 2 contains [0,1,3]. Merged in sorted order: [0,1,1,2,3,4]." + - input: "root1 = [1,null,8], root2 = [8,1]" + output: "[1,1,8,8]" + explanation: "Tree 1 contains [1,8] and tree 2 contains [1,8]. Merged in sorted order: [1,1,8,8]." + +explanation: + intuition: | + The key insight is recognising that **binary search trees have a special property**: an in-order traversal (left → node → right) visits nodes in sorted ascending order. + + Think of it like this: each BST is already a "sorted container" in disguise. If you perform an in-order traversal, you get a sorted list for free. So the problem transforms into: **merge two sorted lists into one sorted list**. + + This is exactly the merge step from merge sort! You compare the front elements of both lists and take the smaller one, repeating until both lists are exhausted. + + The elegant solution combines these two insights: + 1. Use BST's in-order property to extract sorted sequences + 2. Use two-pointer merge to combine them efficiently + + approach: | + We solve this using **In-Order Traversal + Two-Pointer Merge**: + + **Step 1: Perform in-order traversal on both trees** + + - Traverse each BST using in-order DFS (left → node → right) + - This produces two sorted lists: `list1` from `root1` and `list2` from `root2` + - Each traversal is O(n) time and produces elements in ascending order + +   + + **Step 2: Merge the two sorted lists** + + - Use two pointers, `i` for `list1` and `j` for `list2` + - Compare `list1[i]` and `list2[j]`, append the smaller to the result + - Advance the pointer of whichever list contributed the element + - Continue until one list is exhausted + +   + + **Step 3: Handle remaining elements** + + - If `list1` has remaining elements, append them all + - If `list2` has remaining elements, append them all + - One list may be longer or one tree may be empty + +   + + **Step 4: Return the merged result** + + - The result list contains all elements from both trees in sorted order + + common_pitfalls: + - title: Forgetting the BST Property + description: | + A common mistake is to collect all values from both trees and then sort the combined list. + + While this works, it's inefficient: sorting takes O((m+n) log(m+n)) time. By leveraging the BST's in-order property, we get sorted lists in O(m+n) time, and merging is also O(m+n). This is asymptotically better. + wrong_approach: "Collect all values, then sort with sorted() or list.sort()" + correct_approach: "In-order traversal gives sorted lists, then merge in O(m+n)" + + - title: Not Handling Empty Trees + description: | + Either `root1` or `root2` (or both) could be empty trees. Your in-order traversal should handle `None` roots gracefully by returning an empty list. + + The merge step naturally handles this since merging with an empty list just returns the other list. + wrong_approach: "Assuming both trees have at least one node" + correct_approach: "Check for None roots, return empty list from traversal" + + - title: Inefficient Merge with List Concatenation + description: | + Using `result = result + [value]` inside a loop creates a new list each iteration, leading to O(n²) time complexity. + + Use `result.append(value)` which is O(1) amortized, keeping the merge at O(m+n). + wrong_approach: "result = result + [smaller_value] in loop" + correct_approach: "result.append(smaller_value)" + + key_takeaways: + - "**BST in-order property**: In-order traversal of a BST always produces elements in sorted ascending order" + - "**Problem transformation**: Recognise when a problem can be reduced to a simpler, well-known problem (merge two sorted lists)" + - "**Two-pointer merge**: The merge step from merge sort is a fundamental pattern for combining sorted sequences" + - "**Foundation for harder problems**: This pattern extends to problems like merge k sorted lists, external sorting, and stream merging" + + time_complexity: "O(m + n). We traverse each tree once (O(m) + O(n)) and merge the two lists once (O(m + n)), where m and n are the number of nodes in each tree." + space_complexity: "O(m + n). We store all elements from both trees in lists, plus O(h1 + h2) recursion stack space for the traversals, where h1 and h2 are the tree heights." + +solutions: + - approach_name: In-Order Traversal + Merge + is_optimal: true + code: | + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + def get_all_elements(root1: TreeNode, root2: TreeNode) -> list[int]: + def inorder(root: TreeNode) -> list[int]: + """In-order traversal returns BST values in sorted order.""" + if not root: + return [] + # Left subtree + current node + right subtree + return inorder(root.left) + [root.val] + inorder(root.right) + + # Get sorted lists from both BSTs + list1 = inorder(root1) + list2 = inorder(root2) + + # Merge two sorted lists using two pointers + result = [] + i, j = 0, 0 + + while i < len(list1) and j < len(list2): + if list1[i] <= list2[j]: + result.append(list1[i]) + i += 1 + else: + result.append(list2[j]) + j += 1 + + # Append remaining elements from either list + result.extend(list1[i:]) + result.extend(list2[j:]) + + return result + explanation: | + **Time Complexity:** O(m + n) — In-order traversal is O(m) + O(n), merge is O(m + n). + + **Space Complexity:** O(m + n) — We store all elements plus recursion stack. + + We leverage the BST property that in-order traversal produces sorted output. Then we apply the classic two-pointer merge from merge sort to combine the two sorted lists efficiently. + + - approach_name: Iterative In-Order with Stack + is_optimal: true + code: | + def get_all_elements(root1: TreeNode, root2: TreeNode) -> list[int]: + result = [] + stack1, stack2 = [], [] + + # Helper to push all left children onto stack + def push_left(node, stack): + while node: + stack.append(node) + node = node.left + + # Initialise stacks with leftmost paths + push_left(root1, stack1) + push_left(root2, stack2) + + while stack1 or stack2: + # Choose which stack to pop from + if not stack2 or (stack1 and stack1[-1].val <= stack2[-1].val): + # Pop from stack1 + node = stack1.pop() + result.append(node.val) + # Push left path of right child + push_left(node.right, stack1) + else: + # Pop from stack2 + node = stack2.pop() + result.append(node.val) + push_left(node.right, stack2) + + return result + explanation: | + **Time Complexity:** O(m + n) — Each node is pushed and popped exactly once. + + **Space Complexity:** O(h1 + h2) — Only stores nodes on the current paths (tree heights). + + This approach interleaves the in-order traversals, avoiding the need to materialise both full lists. We maintain two stacks representing the "frontier" of each traversal, always taking the smaller current element. + + - approach_name: Collect and Sort + is_optimal: false + code: | + def get_all_elements(root1: TreeNode, root2: TreeNode) -> list[int]: + def collect(root: TreeNode, values: list[int]): + """Collect all values from tree (any order).""" + if not root: + return + values.append(root.val) + collect(root.left, values) + collect(root.right, values) + + values = [] + collect(root1, values) + collect(root2, values) + + # Sort all collected values + return sorted(values) + explanation: | + **Time Complexity:** O((m + n) log(m + n)) — Dominated by the sorting step. + + **Space Complexity:** O(m + n) — Stores all elements. + + This approach ignores the BST property and simply collects all values, then sorts. While correct and simple, it's less efficient than leveraging the inherent ordering of BSTs. Included to illustrate why understanding data structure properties matters. diff --git a/backend/data/questions/all-nodes-distance-k-in-binary-tree.yaml b/backend/data/questions/all-nodes-distance-k-in-binary-tree.yaml new file mode 100644 index 0000000..4799947 --- /dev/null +++ b/backend/data/questions/all-nodes-distance-k-in-binary-tree.yaml @@ -0,0 +1,223 @@ +title: All Nodes Distance K in Binary Tree +slug: all-nodes-distance-k-in-binary-tree +difficulty: medium +leetcode_id: 863 +leetcode_url: https://leetcode.com/problems/all-nodes-distance-k-in-binary-tree/ +categories: + - trees + - graphs + - hash-tables +patterns: + - bfs + - dfs + +description: | + Given the `root` of a binary tree, the value of a target node `target`, and an integer `k`, return *an array of the values of all nodes that have a distance* `k` *from the target node*. + + You can return the answer in **any order**. + +constraints: | + - `1 <= number of nodes <= 500` + - `0 <= Node.val <= 500` + - All the values `Node.val` are **unique** + - `target` is the value of one of the nodes in the tree + - `0 <= k <= 1000` + +examples: + - input: "root = [3,5,1,6,2,0,8,null,null,7,4], target = 5, k = 2" + output: "[7,4,1]" + explanation: "The nodes that are a distance 2 from the target node (with value 5) have values 7, 4, and 1." + - input: "root = [1], target = 1, k = 3" + output: "[]" + explanation: "There are no nodes at distance 3 from the only node in the tree." + +explanation: + intuition: | + Imagine you're standing at a node in the tree and want to find all nodes exactly `k` steps away. In a typical tree traversal, you can only move *downward* to children. But this problem requires moving in **all directions** — down to children and *up* to the parent. + + Think of it like this: if you could magically add "parent pointers" to each node, the tree would become an **undirected graph**. Then finding all nodes at distance `k` becomes a standard graph traversal problem — just start at the target and do a BFS (breadth-first search) expanding outward level by level until you reach distance `k`. + + The key insight is to **convert the tree into a graph** by first building a mapping from each node to its parent. Once we have parent pointers, we can traverse in all three directions (left child, right child, parent) and use BFS to find all nodes at the exact distance `k`. + + approach: | + We solve this using a **Two-Phase Approach**: first build parent pointers, then BFS from the target. + + **Step 1: Build parent pointers using DFS** + + - Create a hash map `parent` that maps each node to its parent node + - Use DFS to traverse the entire tree, recording each node's parent + - Also locate the actual target node (we're given the value, but need the node reference) + +   + + **Step 2: BFS from the target node** + + - Start BFS from the target node at distance 0 + - Use a `visited` set to avoid revisiting nodes (critical since we can now move in all directions) + - For each node, explore three neighbours: left child, right child, and parent + - Expand level by level until we reach distance `k` + +   + + **Step 3: Collect results** + + - When the BFS reaches distance `k`, all nodes in the current queue are at exactly distance `k` from the target + - Return their values as the result + +   + + This approach transforms the constrained tree structure into a flexible graph, enabling bidirectional traversal. + + common_pitfalls: + - title: Only Searching Downward + description: | + A common mistake is to only search the subtree rooted at the target node. This misses nodes that are "above" the target in the tree. + + For example, if target is a leaf node and `k = 2`, the answer might include the target's grandparent or uncle nodes — you can't find these by only searching downward. + wrong_approach: "DFS only through target's subtree" + correct_approach: "Build parent pointers to enable upward traversal" + + - title: Forgetting to Track Visited Nodes + description: | + Once you add parent pointers, the tree becomes a graph with cycles (child → parent → child). Without a `visited` set, BFS will loop infinitely between parent and child nodes. + + Always mark nodes as visited when adding them to the queue, not when processing them — this prevents duplicate queue entries. + wrong_approach: "BFS without visited tracking" + correct_approach: "Use a visited set to prevent revisiting nodes" + + - title: Confusing Node Value with Node Reference + description: | + The problem gives `target` as a node value, but you need the actual node reference to start BFS. Make sure to find and store the target node during the parent-building DFS. + + If you try to compare `node.val == target` during BFS, you might accidentally match a different node with the same value (though values are unique in this problem, it's good practice to work with references). + wrong_approach: "Using target value directly in BFS" + correct_approach: "Find target node reference during DFS setup" + + key_takeaways: + - "**Tree to graph conversion**: When you need to traverse a tree in multiple directions, add parent pointers to treat it as an undirected graph" + - "**BFS for distance queries**: BFS naturally finds all nodes at a specific distance — each level of BFS expansion increases distance by 1" + - "**Visited set is essential**: When converting a tree to a graph, cycles emerge; always track visited nodes" + - "**Two-phase pattern**: Preprocessing (build parent map) followed by the main algorithm (BFS) is a powerful pattern for tree problems" + + time_complexity: "O(n). We visit each node twice — once during DFS to build parent pointers, once during BFS to find nodes at distance `k`." + space_complexity: "O(n). We store parent pointers for all `n` nodes, plus the BFS queue and visited set can hold up to `n` nodes." + +solutions: + - approach_name: Parent Pointers + BFS + is_optimal: true + code: | + from collections import deque + + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + def distance_k(root: TreeNode, target: TreeNode, k: int) -> list[int]: + # Phase 1: Build parent pointers using DFS + parent = {} + + def build_parent(node: TreeNode, par: TreeNode | None) -> None: + if not node: + return + parent[node] = par + build_parent(node.left, node) + build_parent(node.right, node) + + build_parent(root, None) + + # Phase 2: BFS from target to find all nodes at distance k + queue = deque([(target, 0)]) # (node, distance) + visited = {target} + + while queue: + node, dist = queue.popleft() + + # Found all nodes at distance k + if dist == k: + # Return all nodes currently at this distance + return [node.val] + [n.val for n, d in queue] + + # Explore all three directions: left, right, parent + for neighbour in (node.left, node.right, parent[node]): + if neighbour and neighbour not in visited: + visited.add(neighbour) + queue.append((neighbour, dist + 1)) + + # No nodes found at distance k + return [] + explanation: | + **Time Complexity:** O(n) — DFS visits all nodes once, BFS visits all nodes at most once. + + **Space Complexity:** O(n) — Parent map stores n entries, queue and visited set can hold up to n nodes. + + We first build a parent pointer map using DFS, then run BFS from the target node. The BFS expands outward level by level, exploring left child, right child, and parent at each step. When we reach distance `k`, all nodes in the queue are our answer. + + - approach_name: DFS with Distance Tracking + is_optimal: false + code: | + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + def distance_k(root: TreeNode, target: TreeNode, k: int) -> list[int]: + result = [] + + def find_target_and_collect(node: TreeNode) -> int: + """ + Returns distance from node to target if target is in subtree, + otherwise returns -1. + """ + if not node: + return -1 + + if node == target: + # Collect all nodes at distance k in target's subtree + collect_downward(node, k) + return 0 + + # Search left subtree + left_dist = find_target_and_collect(node.left) + if left_dist >= 0: + # Target is in left subtree + if left_dist + 1 == k: + result.append(node.val) + else: + # Search right subtree for nodes at remaining distance + collect_downward(node.right, k - left_dist - 2) + return left_dist + 1 + + # Search right subtree + right_dist = find_target_and_collect(node.right) + if right_dist >= 0: + # Target is in right subtree + if right_dist + 1 == k: + result.append(node.val) + else: + # Search left subtree for nodes at remaining distance + collect_downward(node.left, k - right_dist - 2) + return right_dist + 1 + + return -1 + + def collect_downward(node: TreeNode, dist: int) -> None: + """Collect all nodes at exactly dist distance going downward.""" + if not node or dist < 0: + return + if dist == 0: + result.append(node.val) + return + collect_downward(node.left, dist - 1) + collect_downward(node.right, dist - 1) + + find_target_and_collect(root) + return result + explanation: | + **Time Complexity:** O(n) — Each node is visited at most twice. + + **Space Complexity:** O(h) — Recursion stack depth equals tree height, plus O(k) for collecting downward. + + This approach uses pure DFS without explicitly building parent pointers. When we find the target, we collect all nodes at distance `k` in its subtree. As we return up the recursion, we track our distance from the target and collect nodes from the "other" subtree at the appropriate remaining distance. More complex but uses less space for balanced trees. diff --git a/backend/data/questions/all-paths-from-source-to-target.yaml b/backend/data/questions/all-paths-from-source-to-target.yaml new file mode 100644 index 0000000..f828b42 --- /dev/null +++ b/backend/data/questions/all-paths-from-source-to-target.yaml @@ -0,0 +1,200 @@ +title: All Paths From Source to Target +slug: all-paths-from-source-to-target +difficulty: medium +leetcode_id: 797 +leetcode_url: https://leetcode.com/problems/all-paths-from-source-to-target/ +categories: + - graphs + - recursion +patterns: + - dfs + - backtracking + +description: | + Given a directed acyclic graph (**DAG**) of `n` nodes labeled from `0` to `n - 1`, find all possible paths from node `0` to node `n - 1` and return them in **any order**. + + The graph is given as follows: `graph[i]` is a list of all nodes you can visit from node `i` (i.e., there is a directed edge from node `i` to node `graph[i][j]`). + +constraints: | + - `n == graph.length` + - `2 <= n <= 15` + - `0 <= graph[i][j] < n` + - `graph[i][j] != i` (no self-loops) + - All elements of `graph[i]` are **unique** + - The input graph is **guaranteed** to be a **DAG** + +examples: + - input: "graph = [[1,2],[3],[3],[]]" + output: "[[0,1,3],[0,2,3]]" + explanation: "There are two paths: 0 -> 1 -> 3 and 0 -> 2 -> 3." + - input: "graph = [[4,3,1],[3,2,4],[3],[4],[]]" + output: "[[0,4],[0,3,4],[0,1,3,4],[0,1,2,3,4],[0,1,4]]" + explanation: "There are five different paths from node 0 to node 4." + +explanation: + intuition: | + Imagine you're standing at the entrance of a maze (node `0`) and need to find **every possible route** to the exit (node `n-1`). Unlike finding the shortest path, where you'd stop after reaching the destination once, here you need to explore *all* branches systematically. + + The key insight is that the graph is a **DAG** (Directed Acyclic Graph) — there are no cycles. This is crucial because it means: + 1. You can never get stuck in an infinite loop + 2. Every path you start will eventually either reach the target or hit a dead end + 3. You don't need to track "visited" nodes globally (a node can appear in multiple valid paths) + + Think of it like exploring a family tree from an ancestor to all descendants named "Target". You start at the root, follow each branch completely, record the path when you find a Target, then *backtrack* to explore other branches. + + The **backtracking** pattern is perfect here: build a path incrementally, and when you reach the destination (or a dead end), undo your last choice and try a different branch. + + approach: | + We solve this using **Depth-First Search with Backtracking**: + + **Step 1: Set up the recursive DFS function** + + - Create a helper function `dfs(node, path)` that explores from the current node + - `path` is a list tracking the nodes visited so far in the current exploration + +   + + **Step 2: Handle the base case** + + - If `node == n - 1` (we've reached the target), we found a complete path + - Add a *copy* of the current path to our results list + - Important: We must copy because the same list object will be modified during backtracking + +   + + **Step 3: Explore all neighbors** + + - For each neighbor in `graph[node]`: + - Add the neighbor to our current path + - Recursively call `dfs(neighbor, path)` to continue exploring + - **Backtrack**: Remove the neighbor from the path after the recursive call returns + +   + + **Step 4: Start the traversal** + + - Begin DFS from node `0` with the initial path `[0]` + - Return the collected results after all paths are explored + +   + + The backtracking step (removing the neighbor after recursion) is what allows us to explore *all* branches — we "undo" our choice so we can try other neighbors. + + common_pitfalls: + - title: Forgetting to Copy the Path + description: | + When you find a valid path and add it to results, you must add a **copy** of the list, not the list itself. + + ```python + # Wrong - all paths in results will be the same (empty) list + results.append(path) + + # Correct - creates an independent copy + results.append(path[:]) # or list(path) + ``` + + Since we're backtracking and modifying `path` in place, if you don't copy, all entries in `results` will reference the same list object — which ends up empty after all backtracking completes. + wrong_approach: "results.append(path)" + correct_approach: "results.append(path[:]) or results.append(list(path))" + + - title: Tracking Visited Nodes Globally + description: | + In many graph problems, you track visited nodes to avoid infinite loops. Here, that would be **incorrect**. + + Because this is a DAG, we're guaranteed no cycles, so infinite loops aren't possible. More importantly, the same node can legitimately appear in *multiple different paths*. If you mark it as "visited" globally, you'd miss valid paths. + + For example, in `graph = [[1,2],[3],[3],[]]`, node `3` is reachable via both `0->1->3` and `0->2->3`. A global visited set would prevent finding the second path. + wrong_approach: "Global visited set preventing revisits" + correct_approach: "No visited tracking needed — DAG guarantees no cycles" + + - title: Not Backtracking After Recursion + description: | + If you add a node to the path but forget to remove it after the recursive call, your paths will contain nodes from other branches. + + ```python + # Wrong - path keeps growing, contains nodes from all branches + for neighbor in graph[node]: + path.append(neighbor) + dfs(neighbor) + # Missing: path.pop() + + # Correct - remove after exploring + for neighbor in graph[node]: + path.append(neighbor) + dfs(neighbor) + path.pop() # Backtrack + ``` + wrong_approach: "Append without pop" + correct_approach: "Always pop after recursive call returns" + + key_takeaways: + - "**Backtracking pattern**: Build solution incrementally, undo choices after exploring to try alternatives" + - "**DAG property**: No cycles means no need for visited tracking — the same node can appear in multiple valid paths" + - "**Copy on record**: When storing a path, always copy the list to avoid reference issues during backtracking" + - "**Foundation for path enumeration**: This technique extends to finding all paths in trees, counting paths, or finding paths with specific properties" + + time_complexity: "O(2^n * n). In the worst case (complete DAG), there can be `2^(n-2)` paths, and each path can have up to `n` nodes to copy." + space_complexity: "O(n). The recursion stack depth is at most `n` (longest path), and the current path stores at most `n` nodes. Output space is not counted." + +solutions: + - approach_name: DFS with Backtracking + is_optimal: true + code: | + def all_paths_source_target(graph: list[list[int]]) -> list[list[int]]: + target = len(graph) - 1 + results = [] + + def dfs(node: int, path: list[int]) -> None: + # Base case: reached the target node + if node == target: + # Important: append a COPY of the path + results.append(path[:]) + return + + # Explore all neighbors + for neighbor in graph[node]: + path.append(neighbor) # Choose + dfs(neighbor, path) # Explore + path.pop() # Unchoose (backtrack) + + # Start DFS from node 0 + dfs(0, [0]) + return results + explanation: | + **Time Complexity:** O(2^n * n) — In a complete DAG, there can be exponentially many paths, and we copy each path (length up to n) when recording. + + **Space Complexity:** O(n) — Recursion stack depth and current path length are both bounded by n. + + The classic backtracking template: choose (add to path), explore (recurse), unchoose (remove from path). The DAG property guarantees termination without needing a visited set. + + - approach_name: BFS with Path Tracking + is_optimal: false + code: | + from collections import deque + + def all_paths_source_target(graph: list[list[int]]) -> list[list[int]]: + target = len(graph) - 1 + results = [] + + # Queue holds (current_node, path_so_far) + queue = deque([(0, [0])]) + + while queue: + node, path = queue.popleft() + + if node == target: + results.append(path) + continue + + # Add all neighbors with extended paths + for neighbor in graph[node]: + # Create new path for each branch + queue.append((neighbor, path + [neighbor])) + + return results + explanation: | + **Time Complexity:** O(2^n * n) — Same as DFS; we still explore all paths. + + **Space Complexity:** O(2^n * n) — Queue can hold many partial paths simultaneously, each up to length n. + + BFS explores level by level. We store the entire path with each queue entry, creating new lists for each branch. This uses more memory than DFS backtracking but avoids recursion. Less elegant for this problem but useful when you need shortest paths first. diff --git a/backend/data/questions/all-possible-full-binary-trees.yaml b/backend/data/questions/all-possible-full-binary-trees.yaml new file mode 100644 index 0000000..4086940 --- /dev/null +++ b/backend/data/questions/all-possible-full-binary-trees.yaml @@ -0,0 +1,215 @@ +title: All Possible Full Binary Trees +slug: all-possible-full-binary-trees +difficulty: medium +leetcode_id: 894 +leetcode_url: https://leetcode.com/problems/all-possible-full-binary-trees/ +categories: + - trees + - recursion + - dynamic-programming +patterns: + - backtracking + - dynamic-programming + +description: | + Given an integer `n`, return *a list of all possible **full binary trees** with* `n` *nodes*. Each node of each tree in the answer must have `Node.val == 0`. + + Each element of the answer is the root node of one possible tree. You may return the final list of trees in **any order**. + + A **full binary tree** is a binary tree where each node has exactly `0` or `2` children. + +constraints: | + - `1 <= n <= 20` + +examples: + - input: "n = 7" + output: "[[0,0,0,null,null,0,0,null,null,0,0],[0,0,0,null,null,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,null,null,null,null,0,0],[0,0,0,0,0,null,null,0,0]]" + explanation: "With 7 nodes, there are 5 possible full binary trees. Each tree has nodes with exactly 0 or 2 children." + - input: "n = 3" + output: "[[0,0,0]]" + explanation: "With 3 nodes, there is only one possible full binary tree: a root with two children." + +explanation: + intuition: | + A **full binary tree** has a special property: every node has either 0 children (leaf) or exactly 2 children. This constraint immediately tells us something crucial — a full binary tree can only exist when `n` is **odd**. + + Why? Think about it: we start with a root (1 node). Every time we add children, we must add exactly 2 nodes (not 1). So the count goes 1 → 3 → 5 → 7... always odd. If `n` is even, no valid full binary tree exists. + + Now, how do we construct all possible trees with `n` nodes? Imagine you're the root node. You must have exactly two subtrees: a left and a right. If you use `i` nodes for the left subtree, you have `n - 1 - i` nodes remaining for the right subtree (subtracting 1 for yourself, the root). + + The key insight is that this is a **divide and conquer** problem. For each valid split of nodes between left and right subtrees, we recursively find all possible left subtrees and all possible right subtrees, then combine every left-right pair with a new root. + + This naturally leads to a recursive structure where the answer for `n` nodes depends on answers for smaller values of `n`. + + approach: | + We solve this using **Recursion with Memoisation** — breaking the problem into smaller subproblems and caching results. + + **Step 1: Handle base cases** + + - If `n` is even, return an empty list (no full binary tree possible) + - If `n == 1`, return a list containing a single leaf node + +   + + **Step 2: Recursively build trees** + + - For a tree with `n` nodes, try all ways to split remaining `n - 1` nodes between left and right subtrees + - Left subtree gets `i` nodes, right subtree gets `n - 1 - i` nodes + - Since both subtrees must be full binary trees, `i` must be odd and between 1 and `n - 2` + - Iterate `i` from 1 to `n - 1` in steps of 2 (only odd values) + +   + + **Step 3: Combine subtrees** + + - Recursively get all possible left subtrees with `i` nodes + - Recursively get all possible right subtrees with `n - 1 - i` nodes + - For each combination of left and right subtree, create a new root node connecting them + - Add each complete tree to the result list + +   + + **Step 4: Memoise results** + + - Cache results for each value of `n` to avoid recomputation + - When building trees for `n = 7`, we might need trees for `n = 3` multiple times + - Memoisation ensures we compute each subproblem only once + +   + + The total number of full binary trees with `n` nodes follows the **Catalan number** sequence, which grows exponentially but is well within bounds for `n <= 20`. + + common_pitfalls: + - title: Forgetting the Odd Constraint + description: | + A full binary tree can only have an odd number of nodes. If you don't check this upfront, you'll waste computation or return incorrect results for even `n`. + + For `n = 4`, the answer should be an empty list, not an error or an invalid tree. + wrong_approach: "Try to build trees for any n without checking parity" + correct_approach: "Return empty list immediately when n is even" + + - title: Skipping Memoisation + description: | + Without memoisation, the same subproblems get solved repeatedly. For `n = 15`, computing all trees for `n = 7` happens multiple times during different splits. + + This leads to exponential time complexity instead of the manageable complexity with caching. The difference can be dramatic — solving `n = 20` goes from impractical to instant. + wrong_approach: "Pure recursion without caching" + correct_approach: "Use a dictionary or array to cache results for each n" + + - title: Incorrect Node Splitting + description: | + When splitting `n` nodes between left and right subtrees, remember that the root takes 1 node. So if the total is `n`, and the root takes 1, you have `n - 1` nodes to distribute. + + Also, both subtrees must have an odd number of nodes. Iterating through all values of `i` instead of just odd values wastes time on impossible cases. + wrong_approach: "Split n nodes instead of n-1, or try even values for subtree sizes" + correct_approach: "Iterate i from 1 to n-2 in steps of 2 (odd values only)" + + key_takeaways: + - "**Structural recursion**: When a data structure is defined recursively (like trees), solutions often follow the same recursive structure" + - "**Divide and conquer with combinatorics**: Generate all combinations by recursively generating sub-solutions and combining them" + - "**Catalan numbers**: Full binary tree counts follow the Catalan sequence — this appears in many tree-related combinatorial problems" + - "**Memoisation is essential**: Overlapping subproblems make caching crucial for efficiency in tree-generation problems" + + time_complexity: "O(2^n). The number of full binary trees with n nodes is the Catalan number C((n-1)/2), which grows exponentially. We generate each tree exactly once." + space_complexity: "O(n × 2^n). We store all generated trees, and each tree has O(n) nodes. The recursion stack adds O(n) depth." + +solutions: + - approach_name: Recursion with Memoisation + is_optimal: true + code: | + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + + def all_possible_fbt(n: int) -> list[TreeNode]: + # Cache to store results for each n + memo = {} + + def build(num_nodes: int) -> list[TreeNode]: + # Check cache first + if num_nodes in memo: + return memo[num_nodes] + + # Full binary trees only exist for odd n + if num_nodes % 2 == 0: + return [] + + # Base case: single node is a valid full binary tree + if num_nodes == 1: + return [TreeNode(0)] + + result = [] + + # Try all ways to split n-1 nodes between left and right + # Both subtrees need odd number of nodes, so step by 2 + for left_count in range(1, num_nodes, 2): + right_count = num_nodes - 1 - left_count + + # Get all possible left and right subtrees + left_trees = build(left_count) + right_trees = build(right_count) + + # Combine every left-right pair with a new root + for left in left_trees: + for right in right_trees: + root = TreeNode(0) + root.left = left + root.right = right + result.append(root) + + # Cache and return + memo[num_nodes] = result + return result + + return build(n) + explanation: | + **Time Complexity:** O(2^n) — We generate all Catalan((n-1)/2) trees, each requiring O(n) construction time. + + **Space Complexity:** O(n × 2^n) — Storing all trees, each with O(n) nodes, plus O(n) recursion depth. + + The memoisation dictionary stores all trees for each odd value from 1 to n. This prevents recomputation when the same subtree size is needed for different parent splits. The nested loops combine all valid left-right subtree pairs. + + - approach_name: Bottom-Up Dynamic Programming + is_optimal: false + code: | + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + + def all_possible_fbt(n: int) -> list[TreeNode]: + # Full binary trees only exist for odd n + if n % 2 == 0: + return [] + + # dp[i] = list of all full binary trees with i nodes + dp = {1: [TreeNode(0)]} + + # Build up from 3 nodes to n nodes (odd values only) + for num_nodes in range(3, n + 1, 2): + dp[num_nodes] = [] + + # Try all ways to split num_nodes-1 between left and right + for left_count in range(1, num_nodes, 2): + right_count = num_nodes - 1 - left_count + + # Combine all left-right pairs + for left in dp[left_count]: + for right in dp[right_count]: + root = TreeNode(0) + root.left = left + root.right = right + dp[num_nodes].append(root) + + return dp.get(n, []) + explanation: | + **Time Complexity:** O(2^n) — Same as recursive, we build all valid trees. + + **Space Complexity:** O(n × 2^n) — We store trees for all odd values up to n. + + This iterative approach builds solutions bottom-up, starting from the base case of 1 node and working up to n nodes. It's equivalent to the recursive solution but makes the memoisation explicit as a dictionary indexed by node count. Some prefer this style as it avoids recursion overhead. diff --git a/backend/data/questions/allocate-mailboxes.yaml b/backend/data/questions/allocate-mailboxes.yaml new file mode 100644 index 0000000..33578e3 --- /dev/null +++ b/backend/data/questions/allocate-mailboxes.yaml @@ -0,0 +1,252 @@ +title: Allocate Mailboxes +slug: allocate-mailboxes +difficulty: hard +leetcode_id: 1478 +leetcode_url: https://leetcode.com/problems/allocate-mailboxes/ +categories: + - arrays + - dynamic-programming + - sorting + - math +patterns: + - dynamic-programming + +description: | + Given the array `houses` where `houses[i]` is the location of the ith house along a street and an integer `k`, allocate `k` mailboxes in the street. + + Return *the **minimum** total distance between each house and its nearest mailbox*. + + The test cases are generated so that the answer fits in a 32-bit integer. + +constraints: | + - `1 <= k <= houses.length <= 100` + - `1 <= houses[i] <= 10^4` + - All the integers of `houses` are **unique** + +examples: + - input: "houses = [1,4,8,10,20], k = 3" + output: "5" + explanation: "Allocate mailboxes in position 3, 9 and 20. Minimum total distance from each house to nearest mailbox is |3-1| + |4-3| + |9-8| + |10-9| + |20-20| = 5." + - input: "houses = [2,3,5,12,18], k = 2" + output: "9" + explanation: "Allocate mailboxes in position 3 and 14. Minimum total distance from each house to nearest mailbox is |2-3| + |3-3| + |5-3| + |12-14| + |18-14| = 9." + +explanation: + intuition: | + Imagine you're a postal service manager trying to place mailboxes along a street to minimise the total walking distance for all residents. Each house will use the nearest mailbox, so you need to strategically partition houses into groups and place one mailbox optimally for each group. + + The **core insight** is recognising two key mathematical facts: + + **Fact 1: Optimal placement for one mailbox serving multiple houses is at the median.** + + If you have a single mailbox serving houses at positions `[2, 5, 8]`, where should you place it? The answer is the **median** position (5 in this case). The median minimises the sum of absolute deviations — this is a well-known result from statistics. Placing it at position 5 gives total distance `|2-5| + |5-5| + |8-5| = 3 + 0 + 3 = 6`, which is optimal. + + **Fact 2: Houses served by the same mailbox must be contiguous (after sorting).** + + Think about it: if house A and house C use mailbox M, but house B (between them) uses a different mailbox M', then B would be closer to M than to M' — a contradiction. So we can **sort the houses first** and then partition them into `k` contiguous groups. + + With these insights, the problem transforms into: *"Partition `n` sorted houses into `k` contiguous groups to minimise the total cost, where the cost of a group is the sum of distances to the median."* + + This is a classic **interval DP** problem where we try all ways to split houses into groups. + + approach: | + We solve this using **Dynamic Programming with Precomputed Costs**: + + **Step 1: Sort the houses** + + - Sorting ensures that houses served by the same mailbox are contiguous + - This is crucial for the DP to work correctly + +   + + **Step 2: Precompute the cost matrix** + + - `cost[i][j]`: The minimum total distance when one mailbox serves houses from index `i` to index `j` + - For each pair `(i, j)`, the optimal mailbox position is at the median house + - Calculate the sum of distances from all houses in `[i, j]` to the median + +   + + **Step 3: Define the DP state** + + - `dp[i][m]`: The minimum total distance to serve houses `0` to `i-1` using exactly `m` mailboxes + - Base case: `dp[0][0] = 0` (no houses, no mailboxes, zero cost) + - Goal: `dp[n][k]` where `n` is the number of houses + +   + + **Step 4: Fill the DP table** + + - For each number of houses `i` from `1` to `n`: + - For each number of mailboxes `m` from `1` to `min(i, k)`: + - Try all ways to assign the last group: houses `j` to `i-1` served by mailbox `m` + - `dp[i][m] = min(dp[j][m-1] + cost[j][i-1])` for all valid `j` + +   + + **Step 5: Return the result** + + - Return `dp[n][k]`, the minimum cost to serve all `n` houses with `k` mailboxes + + common_pitfalls: + - title: Forgetting to Sort + description: | + The houses are not necessarily given in sorted order. Without sorting, the assumption that each mailbox serves a contiguous segment breaks down. + + For example, with `houses = [10, 1, 5]` and `k = 2`, if we don't sort, we might incorrectly partition as `[10, 1]` and `[5]`, but after sorting it becomes `[1, 5, 10]` where valid partitions are `[1]` and `[5, 10]` or `[1, 5]` and `[10]`. + wrong_approach: "Process houses in given order" + correct_approach: "Sort houses first, then apply DP" + + - title: Using Mean Instead of Median + description: | + A common mathematical error is placing the mailbox at the **mean** (average) position instead of the **median**. + + The mean minimises the sum of *squared* distances, but we need to minimise the sum of *absolute* distances. For `houses = [1, 2, 10]`: + - Mean = 4.33: Total distance = `|1-4.33| + |2-4.33| + |10-4.33|` ≈ 11.0 + - Median = 2: Total distance = `|1-2| + |2-2| + |10-2|` = 9 + + Always use the median for minimising absolute deviations. + wrong_approach: "Place mailbox at average position" + correct_approach: "Place mailbox at median position" + + - title: Inefficient Cost Calculation + description: | + Recalculating the cost for each interval `[i, j]` during DP leads to O(n^3) or O(n^4) complexity. + + **Precompute all costs** in a matrix first. For each interval, the cost can be computed in O(j - i) time, giving O(n^2) total precomputation. Then DP lookups are O(1). + wrong_approach: "Calculate interval cost inside DP loops" + correct_approach: "Precompute cost[i][j] matrix before DP" + + - title: Off-by-One Errors in DP Indices + description: | + The DP has multiple indices (`i` for houses, `m` for mailboxes, `j` for partition points). It's easy to confuse 0-indexed vs 1-indexed or inclusive vs exclusive bounds. + + Be consistent: if `dp[i][m]` represents the first `i` houses with `m` mailboxes, then `dp[0][0] = 0` is the base case, and `cost[j][i-1]` covers houses from index `j` to `i-1` inclusive. + + key_takeaways: + - "**Median minimises absolute distance**: When placing one point to minimise sum of absolute distances to multiple points, use the median" + - "**Sorting enables contiguity**: After sorting, optimal groups are always contiguous — this transforms the problem into interval DP" + - "**Precomputation optimisation**: Compute all `cost[i][j]` values upfront to avoid redundant calculations in DP" + - "**Interval DP pattern**: Problems asking to partition an array into `k` groups with a cost function often use this `dp[i][m]` formulation" + + time_complexity: "O(n^2 * k). The DP table has O(n * k) states, and each state considers O(n) possible partitions." + space_complexity: "O(n^2 + n * k). We use O(n^2) for the precomputed cost matrix and O(n * k) for the DP table." + +solutions: + - approach_name: Dynamic Programming with Precomputed Costs + is_optimal: true + code: | + def min_distance(houses: list[int], k: int) -> int: + # Sort houses so each mailbox serves a contiguous segment + houses.sort() + n = len(houses) + + # Precompute cost[i][j]: min distance for one mailbox serving houses[i:j+1] + # Optimal position is at the median house + cost = [[0] * n for _ in range(n)] + for i in range(n): + for j in range(i, n): + # Median is at index (i + j) // 2 + median = houses[(i + j) // 2] + # Sum distances from all houses in range to the median + for h in range(i, j + 1): + cost[i][j] += abs(houses[h] - median) + + # dp[i][m] = min cost to serve first i houses with m mailboxes + # Initialize with infinity + INF = float('inf') + dp = [[INF] * (k + 1) for _ in range(n + 1)] + dp[0][0] = 0 # Base case: 0 houses, 0 mailboxes, 0 cost + + # Fill DP table + for i in range(1, n + 1): # Number of houses to serve + for m in range(1, min(i, k) + 1): # Number of mailboxes used + # Try all ways to assign last group + # Houses j to i-1 (0-indexed) served by mailbox m + for j in range(m - 1, i): + dp[i][m] = min(dp[i][m], dp[j][m - 1] + cost[j][i - 1]) + + return dp[n][k] + explanation: | + **Time Complexity:** O(n^2 * k) — O(n^2) to precompute costs, O(n^2 * k) for the DP. + + **Space Complexity:** O(n^2 + n * k) — Cost matrix plus DP table. + + We first sort the houses, then precompute the cost of serving any contiguous segment with one optimally-placed mailbox. The DP finds the optimal way to partition houses into `k` groups, minimising total cost. + + - approach_name: Optimised Cost Calculation + is_optimal: true + code: | + def min_distance(houses: list[int], k: int) -> int: + houses.sort() + n = len(houses) + + # Optimised cost calculation using the property: + # cost[i][j] = cost[i][j-1] + houses[j] - houses[(i+j)//2] + # But simpler: compute using two-pointer from ends + cost = [[0] * n for _ in range(n)] + for i in range(n): + for j in range(i + 1, n): + # Cost grows by adding distance from new house to median + # For a range, sum of |h - median| can be computed as: + # houses[j] - houses[i] when range has 2 elements + # For larger ranges, add distances symmetrically + cost[i][j] = cost[i][j - 1] + houses[j] - houses[(i + j) // 2] + + # DP with space optimization: only need previous row + INF = float('inf') + dp = [INF] * (n + 1) + dp[0] = 0 + + for m in range(1, k + 1): + # Process right-to-left to use previous iteration's values + new_dp = [INF] * (n + 1) + for i in range(m, n + 1): + for j in range(m - 1, i): + new_dp[i] = min(new_dp[i], dp[j] + cost[j][i - 1]) + dp = new_dp + + return dp[n] + explanation: | + **Time Complexity:** O(n^2 * k) — Same asymptotic complexity but with optimised cost computation. + + **Space Complexity:** O(n^2) — Cost matrix dominates; DP uses O(n) with space optimisation. + + This version uses the recurrence relation for cost calculation: when extending a range by one house, the new cost equals the old cost plus the distance from the new house to the (possibly shifted) median. The DP is space-optimised to use only O(n) for the current and previous rows. + + - approach_name: Brute Force (Exponential) + is_optimal: false + code: | + def min_distance(houses: list[int], k: int) -> int: + houses.sort() + n = len(houses) + + def cost(i: int, j: int) -> int: + """Cost for one mailbox serving houses[i:j+1]""" + median = houses[(i + j) // 2] + return sum(abs(houses[h] - median) for h in range(i, j + 1)) + + def solve(start: int, remaining: int) -> int: + """Min cost to serve houses[start:] with remaining mailboxes""" + # Base case: no more houses + if start == n: + return 0 if remaining == 0 else float('inf') + # Base case: no more mailboxes but houses remain + if remaining == 0: + return float('inf') + + min_cost = float('inf') + # Try assigning houses[start:end+1] to one mailbox + for end in range(start, n - remaining + 1): + current = cost(start, end) + solve(end + 1, remaining - 1) + min_cost = min(min_cost, current) + + return min_cost + + return solve(0, k) + explanation: | + **Time Complexity:** O(n^k * n) — Exponential due to trying all partitions without memoisation. + + **Space Complexity:** O(k) — Recursion depth. + + This brute force approach tries all ways to partition houses into `k` groups. Without memoisation, it explores many overlapping subproblems redundantly. Included to illustrate the recursive structure before optimisation. Adding memoisation would give the same O(n^2 * k) complexity as the DP solution. diff --git a/backend/data/questions/alphabet-board-path.yaml b/backend/data/questions/alphabet-board-path.yaml new file mode 100644 index 0000000..2e5c883 --- /dev/null +++ b/backend/data/questions/alphabet-board-path.yaml @@ -0,0 +1,236 @@ +title: Alphabet Board Path +slug: alphabet-board-path +difficulty: medium +leetcode_id: 1138 +leetcode_url: https://leetcode.com/problems/alphabet-board-path/ +categories: + - strings + - hash-tables +patterns: + - matrix-traversal + +description: | + On an alphabet board, we start at position `(0, 0)`, corresponding to character `board[0][0]`. + + The board is defined as: + + ``` + board = ["abcde", "fghij", "klmno", "pqrst", "uvwxy", "z"] + ``` + + We may make the following moves: + + - `'U'` moves our position up one row, if the position exists on the board + - `'D'` moves our position down one row, if the position exists on the board + - `'L'` moves our position left one column, if the position exists on the board + - `'R'` moves our position right one column, if the position exists on the board + - `'!'` adds the character at our current position to the answer + + Return a sequence of moves that makes our answer equal to `target` in the **minimum number of moves**. You may return any valid path. + +constraints: | + - `1 <= target.length <= 100` + - `target` consists only of English lowercase letters + +examples: + - input: 'target = "leet"' + output: '"DDR!UURRR!!DDD!"' + explanation: "Starting at 'a' (0,0), move down twice and right once to reach 'l', press '!'. Then move up twice and right three times to reach 'e', press '!' twice (since 'e' appears twice). Finally move down three times to reach 't' and press '!'." + - input: 'target = "code"' + output: '"RR!DDRR!UUL!R!"' + explanation: "Navigate from 'a' to 'c' (right twice), then to 'o', then to 'd', then to 'e', pressing '!' after reaching each character." + +explanation: + intuition: | + Imagine the alphabet board as a **coordinate grid** where each letter has a fixed position. Since the letters are arranged sequentially (`a` at `(0,0)`, `b` at `(0,1)`, etc.), we can calculate any letter's position using simple arithmetic: + + - **Row** = `(char - 'a') // 5` (integer division by 5, since each row has 5 letters) + - **Column** = `(char - 'a') % 5` (remainder when divided by 5) + + The problem becomes: given a current position and a target position, what's the shortest path between them? On a grid with no obstacles, this is simply moving the required number of steps in each direction. + + However, there's a **critical trap**: the letter `'z'` sits alone at position `(5, 0)`. The last row only has one cell! If you try to move right while on row 5, you'll fall off the board. Similarly, if you're at column > 0 and try to move down to row 5, you'll be stuck. + + The solution is to **control the order of moves**: when moving *toward* `'z'`, move left/up first before moving down. When moving *away from* `'z'`, move up first before moving right. This ensures we never land on an invalid position. + + approach: | + We solve this using a **Coordinate Calculation with Ordered Moves** approach: + + **Step 1: Create a position lookup** + + - For each letter `'a'` to `'z'`, calculate its `(row, col)` position + - Row = `(ord(char) - ord('a')) // 5` + - Column = `(ord(char) - ord('a')) % 5` + - This can be done on-the-fly or precomputed + +   + + **Step 2: Initialise tracking variables** + + - `current_row`: Set to `0` (starting position) + - `current_col`: Set to `0` (starting position) + - `result`: Empty list to collect move characters + +   + + **Step 3: Process each character in target** + + - Calculate the target position `(target_row, target_col)` + - Determine the row and column differences: + - `row_diff = target_row - current_row` + - `col_diff = target_col - current_col` + +   + + **Step 4: Generate moves in the correct order** + + - **Move up first** (if `row_diff < 0`): Add `'U'` × `|row_diff|` times + - **Move left next** (if `col_diff < 0`): Add `'L'` × `|col_diff|` times + - **Move down** (if `row_diff > 0`): Add `'D'` × `row_diff` times + - **Move right last** (if `col_diff > 0`): Add `'R'` × `col_diff` times + - This order (U → L → D → R) ensures we never get stuck at `'z'` + +   + + **Step 5: Add the selection and update position** + + - Append `'!'` to select the current character + - Update `current_row` and `current_col` to the target position + +   + + **Step 6: Return the result** + + - Join all moves into a single string + + common_pitfalls: + - title: The 'z' Trap + description: | + The most common bug is ignoring that `'z'` is the only letter in its row. Consider moving from `'z'` (5, 0) to `'e'` (0, 4): + + If you move right first: you try to go to `(5, 1)` — but that position doesn't exist! The board only has `'z'` at `(5, 0)`. + + The fix is to **always move up/left before down/right**. This ensures you leave row 5 before moving horizontally, and you move to column 0 before entering row 5. + wrong_approach: "Move in any order (e.g., right before up)" + correct_approach: "Always process moves in order: Up, Left, Down, Right" + + - title: Forgetting to Handle Staying in Place + description: | + If the same character appears consecutively in `target` (e.g., `"ee"`), you don't need any moves — just add `'!'`. + + Some implementations incorrectly add empty move strings or fail to handle zero differences. The solution handles this naturally since `'U' * 0` produces an empty string. + + - title: Off-by-One in Position Calculation + description: | + Remember that `ord('a')` is the baseline. The formula `(ord(char) - ord('a'))` gives values 0-25 for 'a'-'z'. + + - `'a'` → 0 → row 0, col 0 + - `'f'` → 5 → row 1, col 0 + - `'z'` → 25 → row 5, col 0 + + Double-check your arithmetic, especially for edge characters like `'e'` (row 0, col 4) and `'z'` (row 5, col 0). + + key_takeaways: + - "**Coordinate mapping**: Converting characters to grid positions using arithmetic (`// 5` for row, `% 5` for column) is a common technique for board problems" + - "**Order matters**: When navigating grids with irregular shapes, the sequence of moves can determine validity — this pattern appears in many path-finding problems" + - "**Handle edge cases explicitly**: The `'z'` special case is the crux of this problem; always analyse board boundaries carefully" + - "**Greedy works here**: Since we're on a grid with no obstacles (except the shape constraint), the shortest path is simply the Manhattan distance — no need for BFS/DFS" + + time_complexity: "O(n × k) where `n` is the length of `target` and `k` is the average number of moves per character. Since the board is 6×5, `k` is at most 9 (5 rows + 4 columns), so this simplifies to **O(n)**." + space_complexity: "O(n × k) for storing the result string. In the worst case, each character requires up to 9 moves plus '!', so the output can be up to 10× the input length. This simplifies to **O(n)**." + +solutions: + - approach_name: Coordinate Calculation with Ordered Moves + is_optimal: true + code: | + def alphabet_board_path(target: str) -> str: + result = [] + # Start at 'a' which is at position (0, 0) + curr_row, curr_col = 0, 0 + + for char in target: + # Calculate target position from character + # 'a' is 0, 'b' is 1, ..., 'z' is 25 + char_index = ord(char) - ord('a') + target_row = char_index // 5 # 5 letters per row + target_col = char_index % 5 + + # Calculate how many steps in each direction + row_diff = target_row - curr_row + col_diff = target_col - curr_col + + # CRITICAL: Order matters to avoid falling off at 'z' + # Move UP first (away from row 5) + if row_diff < 0: + result.append('U' * (-row_diff)) + + # Move LEFT next (toward column 0, needed before going to 'z') + if col_diff < 0: + result.append('L' * (-col_diff)) + + # Move DOWN (toward row 5, but only after we're at column 0) + if row_diff > 0: + result.append('D' * row_diff) + + # Move RIGHT last (only valid after leaving row 5) + if col_diff > 0: + result.append('R' * col_diff) + + # Select the character + result.append('!') + + # Update current position + curr_row, curr_col = target_row, target_col + + return ''.join(result) + explanation: | + **Time Complexity:** O(n) — We process each character once, and each character requires at most O(1) position calculations and O(k) string operations where k ≤ 9. + + **Space Complexity:** O(n) — The result string grows linearly with input length. + + The key insight is processing moves in a specific order (U → L → D → R) to handle the irregular board shape where 'z' is alone on the last row. + + - approach_name: Using Position Dictionary + is_optimal: true + code: | + def alphabet_board_path(target: str) -> str: + # Precompute positions for all letters + positions = {} + for i, char in enumerate("abcdefghijklmnopqrstuvwxyz"): + positions[char] = (i // 5, i % 5) + + result = [] + curr_row, curr_col = 0, 0 + + for char in target: + target_row, target_col = positions[char] + + # Move up before moving right (escape from 'z' row) + while curr_row > target_row: + result.append('U') + curr_row -= 1 + + # Move left before moving down (prepare for 'z') + while curr_col > target_col: + result.append('L') + curr_col -= 1 + + # Move down after horizontal adjustment + while curr_row < target_row: + result.append('D') + curr_row += 1 + + # Move right after leaving 'z' row + while curr_col < target_col: + result.append('R') + curr_col += 1 + + result.append('!') + + return ''.join(result) + explanation: | + **Time Complexity:** O(n) — Same as the first approach, with O(26) preprocessing. + + **Space Complexity:** O(n) — For the result, plus O(26) = O(1) for the positions dictionary. + + This version uses a precomputed dictionary and while loops instead of string multiplication. Both approaches are equally optimal; this one may be slightly more readable for some. diff --git a/backend/data/questions/alternating-digit-sum.yaml b/backend/data/questions/alternating-digit-sum.yaml new file mode 100644 index 0000000..b64a4ac --- /dev/null +++ b/backend/data/questions/alternating-digit-sum.yaml @@ -0,0 +1,171 @@ +title: Alternating Digit Sum +slug: alternating-digit-sum +difficulty: easy +leetcode_id: 2544 +leetcode_url: https://leetcode.com/problems/alternating-digit-sum/ +categories: + - math +patterns: + - greedy + +description: | + You are given a positive integer `n`. Each digit of `n` has a sign according to the following rules: + + - The **most significant digit** is assigned a **positive** sign. + - Each other digit has an opposite sign to its adjacent digits. + + Return *the sum of all digits with their corresponding sign*. + +constraints: | + - `1 <= n <= 10^9` + +examples: + - input: "n = 521" + output: "4" + explanation: "(+5) + (-2) + (+1) = 4." + - input: "n = 111" + output: "1" + explanation: "(+1) + (-1) + (+1) = 1." + - input: "n = 886996" + output: "0" + explanation: "(+8) + (-8) + (+6) + (-9) + (+9) + (-6) = 0." + +explanation: + intuition: | + Imagine you have a row of people standing in a line, and they alternate between facing forward (+) and backward (-). The first person always faces forward. + + The **core insight** is that the first digit (most significant) is always positive, and the sign alternates from there. Think of it like a zigzag pattern: `+`, `-`, `+`, `-`, and so on. + + The challenge is that we receive the number as an integer, not as a string of digits. We have two main approaches: + + 1. **Convert to string**: Easy to iterate left-to-right, directly accessing each digit + 2. **Use modulo arithmetic**: Extract digits from right-to-left using `% 10` and `// 10` + + The string approach is more intuitive since we process digits in the natural reading order (left-to-right), making it easy to assign the correct sign starting from positive. + + approach: | + We solve this using a **String Conversion Approach**: + + **Step 1: Convert the number to a string** + + - Convert `n` to a string to easily access each digit + - This allows left-to-right traversal, matching the problem's sign assignment order + +   + + **Step 2: Initialise tracking variables** + + - `total`: Set to `0` to accumulate our alternating sum + - `sign`: Set to `1` (positive) since the first digit is always positive + +   + + **Step 3: Iterate through each digit** + + - For each character in the string: + - Convert the character back to an integer + - Multiply by the current sign and add to total + - Flip the sign: multiply by `-1` + +   + + **Step 4: Return the result** + + - Return `total` after processing all digits + +   + + This approach works because we process digits in their natural order (most significant first), allowing us to easily assign alternating signs starting with positive. + + common_pitfalls: + - title: Processing Digits Right-to-Left + description: | + Using modulo (`% 10`) extracts digits from right-to-left (least significant first). This means you'd process `521` as `1, 2, 5`. + + The problem states the **most significant digit** (leftmost) is positive. If you process right-to-left, you need to track whether the total number of digits is odd or even to determine the correct starting sign. + + For example, `521` has 3 digits (odd), so the rightmost digit gets `+` sign when processed right-to-left. But `5210` has 4 digits (even), so the rightmost would get `-` sign. + + Converting to a string and processing left-to-right avoids this complexity entirely. + wrong_approach: "Using % 10 without adjusting for digit count" + correct_approach: "Convert to string for natural left-to-right processing" + + - title: Forgetting to Flip the Sign + description: | + A common mistake is incrementing or using a counter to determine the sign instead of simply flipping it. + + Using `sign = -sign` or `sign *= -1` on each iteration is cleaner and less error-prone than checking if the index is odd or even. + wrong_approach: "Using if-else or modulo on index" + correct_approach: "Flip sign each iteration with sign *= -1" + + - title: Integer Overflow (Not Applicable in Python) + description: | + In languages like C++ or Java, you might worry about overflow when handling numbers up to `10^9`. However, in Python, integers have arbitrary precision, so this isn't a concern. + + Even the sum of alternating digits for the maximum input can't exceed the input value itself, so overflow isn't an issue in any language for this problem. + + key_takeaways: + - "**String conversion simplifies digit access**: When you need left-to-right digit processing, converting to string is often cleaner than modulo arithmetic" + - "**Sign flipping pattern**: Alternating signs can be elegantly handled by multiplying by `-1` each iteration" + - "**Right-to-left vs left-to-right**: Modulo gives digits in reverse order; consider which direction your problem needs" + - "**Similar problems**: This pattern appears in problems involving alternating operations or checkerboard-like patterns" + + time_complexity: "O(d) where d is the number of digits in `n`. We process each digit exactly once. Since `n <= 10^9`, we have at most 10 digits, making this effectively O(1)." + space_complexity: "O(d) for the string representation of the number. With at most 10 digits, this is effectively O(1)." + +solutions: + - approach_name: String Conversion + is_optimal: true + code: | + def alternate_digit_sum(n: int) -> int: + # Convert to string for easy left-to-right access + digits = str(n) + total = 0 + # First digit is always positive + sign = 1 + + for char in digits: + # Convert character to integer and apply sign + total += sign * int(char) + # Flip sign for next digit + sign *= -1 + + return total + explanation: | + **Time Complexity:** O(d) where d is the number of digits (at most 10 for n <= 10^9). + + **Space Complexity:** O(d) for the string representation. + + We convert the number to a string and iterate left-to-right, applying alternating signs starting with positive. The sign flips after each digit. + + - approach_name: Mathematical (Right-to-Left) + is_optimal: false + code: | + def alternate_digit_sum(n: int) -> int: + total = 0 + # Sign for the LAST digit (will be adjusted based on digit count) + sign = 1 + + while n > 0: + # Extract the last digit + digit = n % 10 + # Add with current sign + total += sign * digit + # Flip sign for next digit (going right-to-left) + sign *= -1 + # Remove the last digit + n //= 10 + + # After the loop, sign has been flipped one extra time + # If sign is now -1, we had an odd number of digits (correct) + # If sign is now +1, we had an even number of digits (need to negate) + # Equivalently: multiply by -sign to correct the orientation + return total * -sign + explanation: | + **Time Complexity:** O(d) where d is the number of digits. + + **Space Complexity:** O(1) - no string allocation needed. + + This approach extracts digits right-to-left using modulo. The tricky part is that we process in reverse order, so we need to adjust the final sign based on whether we had an odd or even number of digits. The `* -sign` at the end corrects for this. + + While this uses O(1) space, it's less intuitive than the string approach due to the sign correction logic. diff --git a/backend/data/questions/ambiguous-coordinates.yaml b/backend/data/questions/ambiguous-coordinates.yaml new file mode 100644 index 0000000..4887556 --- /dev/null +++ b/backend/data/questions/ambiguous-coordinates.yaml @@ -0,0 +1,232 @@ +title: Ambiguous Coordinates +slug: ambiguous-coordinates +difficulty: medium +leetcode_id: 816 +leetcode_url: https://leetcode.com/problems/ambiguous-coordinates/ +categories: + - strings + - recursion +patterns: + - backtracking + +description: | + We had some 2-dimensional coordinates, like `"(1, 3)"` or `"(2, 0.5)"`. Then, we removed all commas, decimal points, and spaces and ended up with the string `s`. + + For example, `"(1, 3)"` becomes `s = "(13)"` and `"(2, 0.5)"` becomes `s = "(205)"`. + + Return *a list of strings representing all possibilities for what our original coordinates could have been*. + + Our original representation never had extraneous zeroes, so we never started with numbers like `"00"`, `"0.0"`, `"0.00"`, `"1.0"`, `"001"`, `"00.01"`, or any other number that can be represented with fewer digits. Also, a decimal point within a number never occurs without at least one digit occurring before it, so we never started with numbers like `".1"`. + + The final answer list can be returned in any order. All coordinates in the final answer have exactly one space between them (occurring after the comma). + +constraints: | + - `4 <= s.length <= 12` + - `s[0] == '('` and `s[s.length - 1] == ')'` + - The rest of `s` are digits + +examples: + - input: 's = "(123)"' + output: '["(1, 2.3)","(1, 23)","(1.2, 3)","(12, 3)"]' + explanation: "We can split the digits '123' into x and y coordinates in multiple ways, and each coordinate can optionally have a decimal point." + - input: 's = "(0123)"' + output: '["(0, 1.23)","(0, 12.3)","(0, 123)","(0.1, 2.3)","(0.1, 23)","(0.12, 3)"]' + explanation: "0.0, 00, 0001, or 00.01 are not allowed due to extraneous zeros." + - input: 's = "(00011)"' + output: '["(0, 0.011)","(0.001, 1)"]' + explanation: "Leading zeros severely limit valid representations. '00' as an integer or '00.x' as a decimal are invalid." + +explanation: + intuition: | + Imagine you're trying to decode a compressed message. Someone took a coordinate like `(1.5, 2.3)` and stripped away all the formatting — the comma, spaces, and decimal points — leaving just `(1523)`. Your job is to figure out all the ways this could be "unpacked" back into valid coordinates. + + The key insight is to break this into **two independent subproblems**: + + 1. **Split the string** into two parts: one for the x-coordinate and one for the y-coordinate + 2. **Generate all valid numbers** from each part by optionally inserting a decimal point + + For each split position, you independently generate all valid representations for the left part (x) and the right part (y), then combine them. This is essentially a **Cartesian product** of possibilities. + + The tricky part is knowing which number representations are *valid*. A number is invalid if it has: + - Leading zeros (like `"01"` or `"001"`) — unless it's just `"0"` + - Trailing zeros after a decimal point (like `"1.0"` or `"2.30"`) + + Think of it like reconstructing a puzzle where you try every valid piece combination systematically. + + approach: | + We solve this using **systematic enumeration** with careful validation: + + **Step 1: Extract the digit string** + + - Remove the parentheses from `s` to get the raw digits + - Example: `"(123)"` → `"123"` + +   + + **Step 2: Split into two coordinate parts** + + - Try every possible split point: left part gets 1 to n-1 digits, right part gets the rest + - For `"123"`: splits are `("1", "23")`, `("12", "3")` + - Each split represents a potential (x, y) pairing + +   + + **Step 3: Generate valid numbers from a digit string** + + For each part, generate all valid number representations: + + - **Integer form**: Valid only if it doesn't have leading zeros (except for `"0"` itself) + - **Decimal form**: Try placing a decimal point at each position. Valid only if: + - The integer part doesn't have leading zeros (unless it's `"0"`) + - The fractional part doesn't have trailing zeros + +   + + **Step 4: Combine results** + + - For each split, compute the Cartesian product of valid x-numbers and valid y-numbers + - Format each combination as `"(x, y)"` + +   + + **Step 5: Return all valid coordinates** + + - Collect all formatted coordinate strings across all splits + + common_pitfalls: + - title: Missing Edge Cases for Zero + description: | + The rules around zeros are subtle: + - `"0"` is valid as an integer + - `"00"` is NOT valid (leading zero) + - `"0.5"` is valid (zero before decimal is allowed) + - `"0.0"` is NOT valid (trailing zero after decimal) + - `"10"` is valid, but `"01"` is NOT (leading zero) + + Many solutions fail by not handling these cases correctly. Always test with inputs containing zeros like `"(0123)"` and `"(00011)"`. + wrong_approach: "Treating all zero-containing strings the same" + correct_approach: "Separate checks for leading zeros in integers and trailing zeros in decimals" + + - title: Forgetting Single-Digit Numbers + description: | + When generating decimal representations, don't forget that a single digit like `"5"` can only be an integer — you can't place a decimal point in a single character. + + Some solutions try to generate `"5."` or `".5"` which are invalid formats. + wrong_approach: "Trying to insert decimal in single-digit strings" + correct_approach: "Only try decimal insertion when string length >= 2" + + - title: Incorrect Cartesian Product + description: | + The final answer requires combining valid x-coordinates with valid y-coordinates. If either side produces zero valid numbers (due to the zero rules), that entire split should contribute nothing to the answer. + + For example, splitting `"00011"` as `("000", "11")` yields no valid x-coordinates (since `"000"`, `"0.00"`, `"00.0"` are all invalid), so this split produces no results. + wrong_approach: "Including partial results when one coordinate is invalid" + correct_approach: "Only combine when both sides have valid representations" + + key_takeaways: + - "**Decomposition pattern**: Break complex enumeration into independent subproblems (split position × valid numbers)" + - "**Validation rules matter**: Carefully encode the constraints — leading zeros for integers, trailing zeros for decimals" + - "**Cartesian product**: When combining independent choices, iterate through all pairs systematically" + - "**Edge case testing**: Problems with multiple validation rules need thorough testing with boundary inputs like zeros" + + time_complexity: "O(n^4). For each of the O(n) split positions, we generate O(n) valid numbers for each side, and each number generation involves O(n) string operations. The total combinations can be O(n^2) per split." + space_complexity: "O(n^3). The output list can contain O(n^2) coordinate pairs (O(n) splits × O(n) combinations), each of length O(n)." + +solutions: + - approach_name: Enumeration with Validation + is_optimal: true + code: | + def ambiguous_coordinates(s: str) -> list[str]: + # Extract digits by removing parentheses + digits = s[1:-1] + n = len(digits) + result = [] + + def valid_numbers(string: str) -> list[str]: + """Generate all valid number representations from a digit string.""" + if not string: + return [] + + valid = [] + + # Try as integer (no decimal point) + # Valid if no leading zeros, or it's just "0" + if string == "0" or not string.startswith("0"): + valid.append(string) + + # Try with decimal point at each position + for i in range(1, len(string)): + integer_part = string[:i] + decimal_part = string[i:] + + # Integer part: no leading zeros (unless it's "0") + if len(integer_part) > 1 and integer_part.startswith("0"): + continue + + # Decimal part: no trailing zeros + if decimal_part.endswith("0"): + continue + + valid.append(f"{integer_part}.{decimal_part}") + + return valid + + # Try each split position + for i in range(1, n): + left = digits[:i] # x-coordinate digits + right = digits[i:] # y-coordinate digits + + # Get all valid representations for each side + left_valid = valid_numbers(left) + right_valid = valid_numbers(right) + + # Cartesian product: combine each valid x with each valid y + for x in left_valid: + for y in right_valid: + result.append(f"({x}, {y})") + + return result + explanation: | + **Time Complexity:** O(n^4) — For each of O(n) splits, we generate up to O(n) valid numbers per side, and combining them produces O(n^2) pairs. String operations add another O(n) factor. + + **Space Complexity:** O(n^3) — The result list can hold O(n^3) characters total across all coordinate strings. + + The solution systematically tries every split position and every decimal placement, filtering out invalid representations based on the zero rules. The nested loops for Cartesian product ensure we capture all valid combinations. + + - approach_name: Generator-Based Approach + is_optimal: false + code: | + def ambiguous_coordinates(s: str) -> list[str]: + digits = s[1:-1] + + def generate(string: str): + """Yield all valid number representations.""" + # Integer representation + if string == "0" or not string.startswith("0"): + yield string + + # Decimal representations + for i in range(1, len(string)): + left, right = string[:i], string[i:] + # Check: no leading zeros in integer part (except "0") + # Check: no trailing zeros in decimal part + if (left == "0" or not left.startswith("0")) and not right.endswith("0"): + yield f"{left}.{right}" + + result = [] + n = len(digits) + + # Try all split positions + for i in range(1, n): + # Generate Cartesian product of valid coordinates + for x in generate(digits[:i]): + for y in generate(digits[i:]): + result.append(f"({x}, {y})") + + return result + explanation: | + **Time Complexity:** O(n^4) — Same as the list-based approach. + + **Space Complexity:** O(n^3) — Output dominates; generators reduce intermediate storage. + + This variation uses Python generators (`yield`) instead of building lists, which can be slightly more memory-efficient for the intermediate valid number lists. The logic is identical, but the lazy evaluation avoids storing all valid numbers before combining them. diff --git a/backend/data/questions/amount-of-time-for-binary-tree-to-be-infected.yaml b/backend/data/questions/amount-of-time-for-binary-tree-to-be-infected.yaml new file mode 100644 index 0000000..fa601e1 --- /dev/null +++ b/backend/data/questions/amount-of-time-for-binary-tree-to-be-infected.yaml @@ -0,0 +1,227 @@ +title: Amount of Time for Binary Tree to Be Infected +slug: amount-of-time-for-binary-tree-to-be-infected +difficulty: medium +leetcode_id: 2385 +leetcode_url: https://leetcode.com/problems/amount-of-time-for-binary-tree-to-be-infected/ +categories: + - trees + - graphs + - hash-tables +patterns: + - bfs + - tree-traversal + +description: | + You are given the `root` of a binary tree with **unique** values, and an integer `start`. At minute `0`, an **infection** starts from the node with value `start`. + + Each minute, a node becomes infected if: + + - The node is currently uninfected. + - The node is adjacent to an infected node. + + Return *the number of minutes needed for the entire tree to be infected*. + + **Note:** In a binary tree, a node is adjacent to its parent and its children. The infection spreads in all directions — not just downward. + +constraints: | + - `1 <= number of nodes <= 10^5` + - `1 <= Node.val <= 10^5` + - Each node has a **unique** value. + - A node with value `start` exists in the tree. + +examples: + - input: "root = [1,5,3,null,4,10,6,9,2], start = 3" + output: "4" + explanation: "Starting from node 3, the infection spreads: Minute 0: Node 3. Minute 1: Nodes 1, 10, 6. Minute 2: Node 5. Minute 3: Node 4. Minute 4: Nodes 9, 2. Total time: 4 minutes." + - input: "root = [1], start = 1" + output: "0" + explanation: "The only node in the tree is the starting node, so it takes 0 minutes." + +explanation: + intuition: | + At first glance, this looks like a tree traversal problem. But there's a twist: infection spreads **in all directions** — to children *and* to the parent. In a standard binary tree, we only have pointers going downward. How do we "go up"? + + Think of it like this: imagine the tree as a network of rooms connected by hallways. Once a room catches fire, it spreads to all connected rooms — regardless of whether they're "above" or "below" in our original tree structure. + + The key insight is to **convert the tree into an undirected graph**. Once we have a graph where each node knows all its neighbors (parent and children alike), we can use **Breadth-First Search (BFS)** starting from the infected node. BFS naturally explores nodes level by level — and each "level" corresponds to one minute of infection spread. + + The answer is simply the **maximum distance** from the start node to any other node in this graph, which BFS finds efficiently. + + approach: | + We solve this in two phases: first convert the tree to a graph, then run BFS. + + **Step 1: Build an adjacency list (graph) from the tree** + + - Use DFS to traverse the tree + - For each node, add bidirectional edges: parent ↔ child + - Store this in a hash map where each node value maps to a list of neighbor values + +   + + **Step 2: Run BFS from the start node** + + - Initialize a queue with the `start` node and a `visited` set + - Track the current "minute" (distance from start) + - Process nodes level by level: + - For each node at the current level, add all unvisited neighbors to the next level + - Increment the minute counter after processing each level + +   + + **Step 3: Return the total time** + + - When the queue is empty, all nodes are infected + - Return the number of minutes elapsed (which equals the maximum distance from start) + +   + + This approach works because BFS explores nodes in order of their distance from the source. The last level we process contains the farthest nodes, and the time to reach them is our answer. + + common_pitfalls: + - title: Treating It as a Standard Tree Problem + description: | + A common mistake is trying to solve this with standard tree traversal, only considering paths going downward. + + For example, if `start = 3` and node 3 is deep in the tree, the infection needs to spread **upward** to the root and then down other branches. Standard tree traversal doesn't give you a way to go from child to parent. + + The fix is to convert the tree into an undirected graph where parent-child relationships become bidirectional edges. + wrong_approach: "DFS/BFS only going to children" + correct_approach: "Build undirected graph, then BFS from start" + + - title: Using DFS Instead of BFS for Distance + description: | + DFS can find *a* path to every node, but it doesn't naturally give you the *shortest* path (minimum time). You'd need to track depths and handle backtracking carefully. + + BFS is the right choice here because it explores nodes level by level. Each level corresponds to one minute, so when BFS completes, you've automatically found the maximum time needed. + wrong_approach: "DFS with manual depth tracking" + correct_approach: "BFS for level-order (shortest path) traversal" + + - title: Forgetting to Track Visited Nodes + description: | + Since we're working with an undirected graph (bidirectional edges), if you don't track visited nodes, you'll revisit the same node repeatedly and loop forever. + + For example: node A connects to B, B connects back to A. Without a visited set, your BFS will bounce between A and B infinitely. + wrong_approach: "BFS without visited set" + correct_approach: "Mark nodes as visited when adding to queue" + + key_takeaways: + - "**Tree to Graph conversion**: When a tree problem requires bidirectional traversal (going to parent), convert it to an undirected graph first" + - "**BFS for shortest paths**: BFS naturally finds shortest paths in unweighted graphs — each level is one step further from the source" + - "**Level-order = time steps**: When modeling time-based spread (infection, fire, etc.), BFS levels correspond directly to time units" + - "**Related problems**: This pattern appears in problems like *Rotting Oranges*, *Walls and Gates*, and *Shortest Path in Binary Matrix*" + + time_complexity: "O(n). We visit each node twice — once during graph construction (DFS) and once during BFS." + space_complexity: "O(n). We store the adjacency list (O(n) edges in a tree) plus the BFS queue and visited set (O(n) each)." + +solutions: + - approach_name: Graph Conversion + BFS + is_optimal: true + code: | + from collections import defaultdict, deque + from typing import Optional + + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + def amount_of_time(root: Optional[TreeNode], start: int) -> int: + # Step 1: Build adjacency list (undirected graph) + graph = defaultdict(list) + + def build_graph(node: TreeNode, parent: Optional[TreeNode]) -> None: + if not node: + return + # Add bidirectional edge between node and parent + if parent: + graph[node.val].append(parent.val) + graph[parent.val].append(node.val) + # Recurse to children + build_graph(node.left, node) + build_graph(node.right, node) + + build_graph(root, None) + + # Step 2: BFS from start node + queue = deque([start]) + visited = {start} + minutes = -1 # Start at -1 because we count levels, not nodes + + while queue: + minutes += 1 + # Process all nodes at current level (current minute) + for _ in range(len(queue)): + node = queue.popleft() + # Add all unvisited neighbors to next level + for neighbor in graph[node]: + if neighbor not in visited: + visited.add(neighbor) + queue.append(neighbor) + + return minutes + explanation: | + **Time Complexity:** O(n) — We traverse all nodes twice: once for graph construction, once for BFS. + + **Space Complexity:** O(n) — The adjacency list stores O(n) edges (a tree with n nodes has n-1 edges), and BFS uses O(n) for the queue and visited set. + + The key insight is converting the tree to an undirected graph so we can traverse "upward" to parents. Then BFS gives us the maximum distance (time) naturally by exploring level by level. + + - approach_name: One-Pass DFS with Distance Tracking + is_optimal: true + code: | + from typing import Optional + + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + def amount_of_time(root: Optional[TreeNode], start: int) -> int: + max_time = 0 + + def dfs(node: Optional[TreeNode]) -> int: + """ + Returns the depth of the start node if found in this subtree, + or -1 if not found. Negative depths indicate distance above start. + """ + nonlocal max_time + + if not node: + return -1 + + left_depth = dfs(node.left) + right_depth = dfs(node.right) + + if node.val == start: + # Found start node - max time is depth of deepest child + max_time = max(max_time, max(left_depth, right_depth) + 1) + return 0 # Distance from start to itself is 0 + + if left_depth >= 0: + # Start is in left subtree + # Time to infect right subtree = distance to start + right depth + 1 + max_time = max(max_time, left_depth + right_depth + 2) + return left_depth + 1 # Return distance to start + + if right_depth >= 0: + # Start is in right subtree + max_time = max(max_time, left_depth + right_depth + 2) + return right_depth + 1 + + # Start not in this subtree - return max depth for potential use by ancestor + return max(left_depth, right_depth) + 1 + + # Handle edge case: single node tree + if not root.left and not root.right: + return 0 + + dfs(root) + return max_time + explanation: | + **Time Complexity:** O(n) — Single DFS traversal visiting each node once. + + **Space Complexity:** O(h) — Only the recursion stack, where h is the tree height. O(log n) for balanced trees, O(n) worst case for skewed trees. + + This approach avoids building an explicit graph. Instead, during DFS, we track whether the start node is in the current subtree and calculate distances on the fly. When we find a node that's an ancestor of start, we can compute the time to infect the "other" subtree. diff --git a/backend/data/questions/angle-between-hands-of-a-clock.yaml b/backend/data/questions/angle-between-hands-of-a-clock.yaml new file mode 100644 index 0000000..b04844b --- /dev/null +++ b/backend/data/questions/angle-between-hands-of-a-clock.yaml @@ -0,0 +1,155 @@ +title: Angle Between Hands of a Clock +slug: angle-between-hands-of-a-clock +difficulty: medium +leetcode_id: 1344 +leetcode_url: https://leetcode.com/problems/angle-between-hands-of-a-clock/ +categories: + - math +patterns: + - greedy + +description: | + Given two numbers, `hour` and `minutes`, return *the smaller angle (in degrees) formed between the hour and the minute hand*. + + Answers within `10^-5` of the actual value will be accepted as correct. + +constraints: | + - `1 <= hour <= 12` + - `0 <= minutes <= 59` + +examples: + - input: "hour = 12, minutes = 30" + output: "165" + explanation: "At 12:30, the minute hand points at 6 (180°) and the hour hand has moved halfway between 12 and 1 (15°). The angle between them is |180 - 15| = 165°." + - input: "hour = 3, minutes = 30" + output: "75" + explanation: "At 3:30, the minute hand points at 6 (180°) and the hour hand is halfway between 3 and 4 (105°). The angle between them is |180 - 105| = 75°." + - input: "hour = 3, minutes = 15" + output: "7.5" + explanation: "At 3:15, the minute hand points at 3 (90°) and the hour hand has moved slightly past 3 (97.5°). The angle between them is |97.5 - 90| = 7.5°." + +explanation: + intuition: | + Picture an analog clock face as a circle divided into 360 degrees. The key insight is that **both hands move independently at constant rates**, and we can calculate exactly where each hand points at any given time. + + Think of it like this: the clock face is a coordinate system where 12 o'clock is 0° (or 360°), and angles increase clockwise. Each hand sweeps around this circle at a different speed: + + - The **minute hand** completes a full rotation (360°) in 60 minutes, so it moves **6° per minute** + - The **hour hand** completes a full rotation in 12 hours (720 minutes), so it moves **0.5° per minute** + + The crucial detail many miss is that the **hour hand moves continuously**, not just jumping from hour to hour. At 3:30, the hour hand isn't pointing exactly at 3 — it's halfway between 3 and 4 because half an hour has passed. + + Once we know where each hand points, we calculate the absolute difference. But since we want the *smaller* angle (a clock has two angles between any two hands), we take the minimum of the angle and `360 - angle`. + + approach: | + We solve this using **direct angle calculation**: + + **Step 1: Calculate the minute hand's position** + + - The minute hand moves 360° in 60 minutes + - Rate: `360 / 60 = 6°` per minute + - Position: `minutes * 6` + +   + + **Step 2: Calculate the hour hand's position** + + - The hour hand moves 360° in 12 hours + - Rate: `360 / 12 = 30°` per hour + - But it also moves as minutes pass: `30 / 60 = 0.5°` per minute + - Position: `(hour % 12) * 30 + minutes * 0.5` + - Note: We use `hour % 12` because hour 12 should be treated as 0 + +   + + **Step 3: Calculate the absolute difference** + + - Difference: `abs(hour_angle - minute_angle)` + +   + + **Step 4: Return the smaller angle** + + - A clock face has two angles between any two points (they sum to 360°) + - Return `min(angle, 360 - angle)` to get the smaller one + + common_pitfalls: + - title: Forgetting the Hour Hand Moves Continuously + description: | + A common mistake is treating the hour hand as if it only points to exact hour positions. At 3:30, the hour hand is NOT at the 3 — it's halfway between 3 and 4. + + The hour hand moves 0.5° per minute (30° per hour ÷ 60 minutes). So at 3:30, it's at `3 * 30 + 30 * 0.5 = 90 + 15 = 105°`, not 90°. + wrong_approach: "hour_angle = hour * 30" + correct_approach: "hour_angle = (hour % 12) * 30 + minutes * 0.5" + + - title: Not Handling Hour 12 + description: | + When `hour = 12`, the hour hand is at the 12 o'clock position (0°), not at 360°. If you don't use modulo, you'll calculate `12 * 30 = 360°`, which is technically correct but can cause issues when computing the smaller angle. + + Using `hour % 12` normalises hour 12 to 0, giving the correct position of 0°. + wrong_approach: "hour * 30" + correct_approach: "(hour % 12) * 30" + + - title: Returning the Larger Angle + description: | + Two clock hands create two angles that sum to 360°. The problem asks for the *smaller* angle. + + For example, if the hands are 270° apart, the smaller angle is `360 - 270 = 90°`. Always return `min(angle, 360 - angle)`. + wrong_approach: "Return abs(hour_angle - minute_angle)" + correct_approach: "Return min(angle, 360 - angle)" + + key_takeaways: + - "**Rate-based thinking**: Convert cyclic movements to angular velocities (degrees per unit time) for precise calculations" + - "**Continuous vs discrete**: Physical quantities like clock hands move continuously — don't discretise them unless necessary" + - "**Circular geometry**: When dealing with circular positions, remember there are always two ways to measure between any two points" + - "**Modular arithmetic**: Use modulo to handle wraparound cases cleanly (12 o'clock → 0)" + + time_complexity: "O(1). We perform a fixed number of arithmetic operations regardless of input values." + space_complexity: "O(1). We only use a constant number of variables to store the angles." + +solutions: + - approach_name: Direct Angle Calculation + is_optimal: true + code: | + def angle_clock(hour: int, minutes: int) -> float: + # Minute hand: 360° / 60 min = 6° per minute + minute_angle = minutes * 6 + + # Hour hand: 30° per hour + 0.5° per minute + # Use hour % 12 so that 12 o'clock is treated as 0° + hour_angle = (hour % 12) * 30 + minutes * 0.5 + + # Calculate the absolute difference between hands + diff = abs(hour_angle - minute_angle) + + # Return the smaller of the two possible angles + return min(diff, 360 - diff) + explanation: | + **Time Complexity:** O(1) — Only basic arithmetic operations. + + **Space Complexity:** O(1) — Only a few variables used. + + We calculate each hand's position using their angular velocities, find the absolute difference, and return the smaller of the two possible angles. The key insight is that the hour hand moves continuously, advancing 0.5° for each minute that passes. + + - approach_name: Separate Rate Constants + is_optimal: false + code: | + def angle_clock(hour: int, minutes: int) -> float: + # Define angular velocities as constants + MINUTE_HAND_RATE = 6.0 # degrees per minute + HOUR_HAND_RATE = 0.5 # degrees per minute + HOUR_MARK_ANGLE = 30.0 # degrees between hour marks + + # Calculate positions + minute_pos = minutes * MINUTE_HAND_RATE + hour_pos = (hour % 12) * HOUR_MARK_ANGLE + minutes * HOUR_HAND_RATE + + # Get smaller angle + angle = abs(minute_pos - hour_pos) + return min(angle, 360 - angle) + explanation: | + **Time Complexity:** O(1) — Same operations as optimal solution. + + **Space Complexity:** O(1) — Uses named constants for clarity. + + This approach uses named constants to make the angular velocities explicit. While functionally identical to the optimal solution, the named constants (`MINUTE_HAND_RATE`, `HOUR_HAND_RATE`) make the code more self-documenting and easier to understand. diff --git a/backend/data/questions/append-characters-to-string-to-make-subsequence.yaml b/backend/data/questions/append-characters-to-string-to-make-subsequence.yaml new file mode 100644 index 0000000..07c0e4c --- /dev/null +++ b/backend/data/questions/append-characters-to-string-to-make-subsequence.yaml @@ -0,0 +1,165 @@ +title: Append Characters to String to Make Subsequence +slug: append-characters-to-string-to-make-subsequence +difficulty: easy +leetcode_id: 2486 +leetcode_url: https://leetcode.com/problems/append-characters-to-string-to-make-subsequence/ +categories: + - strings + - two-pointers +patterns: + - two-pointers + - greedy + +description: | + You are given two strings `s` and `t` consisting of only lowercase English letters. + + Return *the minimum number of characters that need to be appended to the end of* `s` *so that* `t` *becomes a **subsequence** of* `s`. + + A **subsequence** is a string that can be derived from another string by deleting some or no characters without changing the order of the remaining characters. + +constraints: | + - `1 <= s.length, t.length <= 10^5` + - `s` and `t` consist only of lowercase English letters. + +examples: + - input: 's = "coaching", t = "coding"' + output: "4" + explanation: 'Append the characters "ding" to the end of s so that s = "coachingding". Now, t is a subsequence of s ("coachingding"). It can be shown that appending any 3 characters to the end of s will never make t a subsequence.' + - input: 's = "abcde", t = "a"' + output: "0" + explanation: 't is already a subsequence of s ("abcde").' + - input: 's = "z", t = "abcde"' + output: "5" + explanation: 'Append the characters "abcde" to the end of s so that s = "zabcde". Now, t is a subsequence of s ("zabcde"). It can be shown that appending any 4 characters to the end of s will never make t a subsequence.' + +explanation: + intuition: | + Imagine you're reading through string `s` character by character, trying to "match" as many characters of `t` as possible, **in order**. + + Think of it like this: you have a checklist (string `t`) and you're scanning through a document (string `s`). Every time you find the next item on your checklist in the document, you check it off and move to the next item. Characters you don't need can be skipped. + + The key insight is that we want to find the **longest prefix of `t`** that already exists as a subsequence of `s`. Whatever remains of `t` after this matching process is exactly what we need to append. + + For example, with `s = "coaching"` and `t = "coding"`: + - We find `'c'` in `s` at index 0 — match! + - We find `'o'` in `s` at index 1 — match! + - We don't find `'d'` anywhere after index 1 in `s` + - So only `"co"` (2 characters) of `t` can be matched + - We need to append the remaining 4 characters: `"ding"` + + This greedy matching works because we're always looking for the **earliest** possible match for each character, which leaves the most room for subsequent characters. + + approach: | + We solve this using the **Two Pointers** technique: + + **Step 1: Initialise two pointers** + + - `i`: Pointer for string `s`, starting at `0` + - `j`: Pointer for string `t`, starting at `0` + +   + + **Step 2: Scan through both strings** + + - While `i < len(s)` and `j < len(t)`: + - If `s[i] == t[j]`, we found a match — increment `j` to look for the next character of `t` + - Always increment `i` to continue scanning through `s` + +   + + **Step 3: Calculate the result** + + - After the loop, `j` represents how many characters of `t` we successfully matched + - The answer is `len(t) - j` — the number of unmatched characters that must be appended + +   + + This approach works because the greedy matching (taking the earliest match for each character) is optimal. There's no benefit to skipping a valid match, as that would only reduce our options for matching subsequent characters. + + common_pitfalls: + - title: Checking Characters Out of Order + description: | + A common mistake is trying to find each character of `t` in `s` independently, without respecting the order requirement. + + For example, with `s = "abc"` and `t = "cab"`: + - All characters of `t` exist in `s` + - But `"cab"` is NOT a subsequence of `"abc"` because `'c'` appears after `'a'` and `'b'` in `s` + + The two-pointer approach naturally handles ordering by only looking forward in `s` for each subsequent character of `t`. + wrong_approach: "Check if each character of t exists in s" + correct_approach: "Use two pointers to match characters in order" + + - title: Reversing the Pointer Logic + description: | + It's tempting to swap the roles of `s` and `t`, but the problem specifically asks for `t` to be a subsequence of `s`, not the other way around. + + We iterate through `s` with our main pointer and only advance the `t` pointer when we find a match. This ensures we're finding `t` within `s`. + wrong_approach: "Looking for s as a subsequence of t" + correct_approach: "Always scan through s while matching against t" + + - title: Off-by-One in the Result + description: | + After matching, `j` represents the **count** of matched characters (0-indexed pointer that advanced `j` times). + + If `j = 2` after matching, it means we matched `t[0]` and `t[1]`, so 2 characters are matched. The remaining characters to append is `len(t) - j`, not `len(t) - j - 1`. + wrong_approach: "Return len(t) - j - 1" + correct_approach: "Return len(t) - j" + + key_takeaways: + - "**Subsequence matching pattern**: Use two pointers — one for the source string (scan all), one for the target (advance on match)" + - "**Greedy is optimal**: Taking the earliest match leaves maximum room for subsequent characters" + - "**Linear efficiency**: A single pass through both strings gives O(n + m) time with O(1) space" + - "**Foundation for harder problems**: This pattern extends to problems like *Is Subsequence*, *Longest Common Subsequence*, and edit distance variants" + + time_complexity: "O(n + m). We traverse each string at most once, where `n = len(s)` and `m = len(t)`." + space_complexity: "O(1). We only use two pointer variables regardless of input size." + +solutions: + - approach_name: Two Pointers + is_optimal: true + code: | + def append_characters(s: str, t: str) -> int: + # Pointer for string t - tracks how much we've matched + j = 0 + + # Scan through every character in s + for char in s: + # If we've matched all of t, we're done + if j == len(t): + break + # Found a match - advance the t pointer + if char == t[j]: + j += 1 + + # Characters remaining in t that weren't matched + return len(t) - j + explanation: | + **Time Complexity:** O(n + m) — Single pass through `s`, and `j` advances at most `m` times. + + **Space Complexity:** O(1) — Only one pointer variable used. + + We greedily match characters of `t` as we scan through `s`. The number of unmatched characters at the end is exactly what we need to append. + + - approach_name: Two Pointers (Explicit Indices) + is_optimal: true + code: | + def append_characters(s: str, t: str) -> int: + i, j = 0, 0 # Pointers for s and t respectively + n, m = len(s), len(t) + + # Continue until we exhaust either string + while i < n and j < m: + # Match found - advance t pointer + if s[i] == t[j]: + j += 1 + # Always advance s pointer + i += 1 + + # Return count of unmatched characters in t + return m - j + explanation: | + **Time Complexity:** O(n + m) — We traverse `s` fully and advance through `t` as matches are found. + + **Space Complexity:** O(1) — Only two index variables used. + + This variant uses explicit index pointers instead of Python's `for` loop. The logic is identical: scan `s`, match against `t`, and return the unmatched suffix length. diff --git a/backend/data/questions/append-k-integers-with-minimal-sum.yaml b/backend/data/questions/append-k-integers-with-minimal-sum.yaml new file mode 100644 index 0000000..4d98aa5 --- /dev/null +++ b/backend/data/questions/append-k-integers-with-minimal-sum.yaml @@ -0,0 +1,191 @@ +title: Append K Integers With Minimal Sum +slug: append-k-integers-with-minimal-sum +difficulty: medium +leetcode_id: 2195 +leetcode_url: https://leetcode.com/problems/append-k-integers-with-minimal-sum/ +categories: + - arrays + - math + - sorting +patterns: + - greedy + +description: | + You are given an integer array `nums` and an integer `k`. Append `k` **unique positive** integers that do **not** appear in `nums` to `nums` such that the resulting total sum is **minimum**. + + Return *the sum of the* `k` *integers appended to* `nums`. + +constraints: | + - `1 <= nums.length <= 10^5` + - `1 <= nums[i] <= 10^9` + - `1 <= k <= 10^8` + +examples: + - input: "nums = [1,4,25,10,25], k = 2" + output: "5" + explanation: "The two unique positive integers that do not appear in nums which we append are 2 and 3. The sum of the two integers appended is 2 + 3 = 5." + - input: "nums = [5,6], k = 6" + output: "25" + explanation: "The six unique positive integers that do not appear in nums which we append are 1, 2, 3, 4, 7, and 8. The sum is 1 + 2 + 3 + 4 + 7 + 8 = 25." + +explanation: + intuition: | + To minimise the sum, we want to pick the **smallest possible positive integers** that aren't already in `nums`. Imagine counting from 1 upward: 1, 2, 3, 4, ... and skipping any number that already exists in the array. + + The key insight is that we don't need to iterate through each number one by one. Instead, we can use the **arithmetic series formula** to calculate sums of consecutive integers in bulk: the sum of integers from `1` to `n` is `n * (n + 1) / 2`. + + Think of it like filling gaps: if `nums` contains some numbers that "block" certain positions, we need to count how many integers we can pick before hitting a blocker, calculate that sum efficiently, then jump past the blocker and continue. + + By sorting `nums` and processing gaps between consecutive elements, we can quickly determine how many "free" integers exist in each range and compute their sum using the formula. + + approach: | + We solve this using a **Greedy Gap-Filling Approach**: + + **Step 1: Sort and deduplicate the array** + + - Sort `nums` to process elements in order + - Remove duplicates since they don't affect which integers are "taken" + +   + + **Step 2: Initialise tracking variables** + + - `result`: Accumulates the sum of chosen integers (starts at `0`) + - `prev`: Tracks the last integer we've considered (starts at `0`, meaning we begin from `1`) + +   + + **Step 3: Process each number in the sorted array** + + - For each number `num` in the sorted array, calculate the gap: how many integers exist between `prev + 1` and `num - 1` inclusive + - The count of available integers in this gap is `num - prev - 1` + - If the gap has more integers than we still need (`k`), we only take `k` of them + - Use the arithmetic series formula to add the sum of the integers we take: sum from `prev + 1` to `prev + take` is `take * (2 * prev + take + 1) / 2` + - Subtract the taken count from `k` and update `prev = num` + - If `k` reaches `0`, we're done + +   + + **Step 4: Handle remaining integers after the array** + + - If `k > 0` after processing all elements, we need `k` more integers starting from `prev + 1` + - Add the sum of integers from `prev + 1` to `prev + k` using the arithmetic series formula + +   + + **Step 5: Return the result** + + - Return the accumulated `result` sum + + common_pitfalls: + - title: Iterating One-by-One + description: | + With `k` up to `10^8`, iterating through each integer individually and checking membership would be far too slow. + + For example, if `nums = [10^9]` and `k = 10^8`, you'd need to check and sum 100 million integers one at a time. This results in **O(k)** operations which causes TLE. + + Instead, use the arithmetic series formula `n * (n + 1) / 2` to calculate sums of ranges in **O(1)** time. + wrong_approach: "Loop through integers 1, 2, 3, ... checking each" + correct_approach: "Calculate range sums using arithmetic series formula" + + - title: Not Handling Duplicates + description: | + The input array can contain duplicate values (e.g., `[1,4,25,10,25]`). If you don't deduplicate, you might incorrectly count the same "blocked" position multiple times. + + For instance, with `nums = [2, 2, 2]`, the integer `2` only blocks one position, not three. Deduplicating ensures each blocked position is counted exactly once. + wrong_approach: "Process array with duplicates" + correct_approach: "Convert to set or deduplicate after sorting" + + - title: Integer Overflow + description: | + The sum of `k` integers (where `k` can be `10^8`) starting from 1 is approximately `k * k / 2`, which can exceed `10^16`. In languages with fixed-size integers, this can cause overflow. + + Python handles arbitrary precision integers automatically, but in other languages you'd need to use 64-bit integers (`long long` in C++, `Long` in Java). + wrong_approach: "Use 32-bit integers for sum calculation" + correct_approach: "Use 64-bit integers or language with arbitrary precision" + + - title: Off-by-One Errors in Gap Calculation + description: | + When calculating the number of integers between `prev` and `num`, it's easy to make off-by-one mistakes. + + The count of integers from `a` to `b` inclusive is `b - a + 1`. The gap between `prev` (exclusive) and `num` (exclusive) contains `num - prev - 1` integers. + + Example: between `prev = 2` and `num = 5`, the available integers are `3, 4` — that's `5 - 2 - 1 = 2` integers. + wrong_approach: "Miscounting gap size" + correct_approach: "Gap from prev to num (exclusive both) is num - prev - 1" + + key_takeaways: + - "**Arithmetic series formula**: Sum from 1 to n is `n * (n + 1) / 2`. This transforms O(n) iteration into O(1) calculation" + - "**Gap-filling strategy**: When filling positions with constraints, sort the constraints and process gaps between them" + - "**Greedy correctness**: Taking the smallest available integers first always yields the minimum sum — no need to consider alternatives" + - "**Handle large ranges**: When k or values can be very large, look for mathematical formulas to avoid iteration" + + time_complexity: "O(n log n). Sorting dominates the complexity; processing the sorted array is O(n)." + space_complexity: "O(n). We store the deduplicated sorted array. Can be O(1) extra space if we sort in-place and handle duplicates during iteration." + +solutions: + - approach_name: Greedy with Arithmetic Series + is_optimal: true + code: | + def min_sum(nums: list[int], k: int) -> int: + # Sort and deduplicate to process gaps in order + nums = sorted(set(nums)) + result = 0 + prev = 0 # Last integer we've accounted for (start before 1) + + for num in nums: + # How many integers are available between prev and num? + gap = num - prev - 1 + + if gap > 0: + # Take at most k integers from this gap + take = min(gap, k) + # Sum of integers from (prev + 1) to (prev + take) + # Using formula: sum = take * (first + last) / 2 + first = prev + 1 + last = prev + take + result += take * (first + last) // 2 + k -= take + + if k == 0: + return result + + prev = num + + # Still need more integers after the last element in nums + if k > 0: + first = prev + 1 + last = prev + k + result += k * (first + last) // 2 + + return result + explanation: | + **Time Complexity:** O(n log n) — Sorting the array dominates; iteration is O(n). + + **Space Complexity:** O(n) — Creating a sorted set of unique elements. + + We process gaps between consecutive elements in the sorted array, using the arithmetic series formula to efficiently sum ranges of consecutive integers. This avoids iterating through potentially billions of integers. + + - approach_name: Brute Force (TLE) + is_optimal: false + code: | + def min_sum(nums: list[int], k: int) -> int: + # Convert to set for O(1) lookup + num_set = set(nums) + result = 0 + current = 1 + + # Find k integers not in nums + while k > 0: + if current not in num_set: + result += current + k -= 1 + current += 1 + + return result + explanation: | + **Time Complexity:** O(k + n) — We iterate up to k + max(nums) times in the worst case. + + **Space Complexity:** O(n) — Storing nums in a set. + + This straightforward approach iterates through positive integers one by one, adding those not in nums. While correct, it's far too slow when k is large (up to 10^8). Included to illustrate why the arithmetic series approach is necessary. diff --git a/backend/data/questions/apply-bitwise-operations-to-make-strings-equal.yaml b/backend/data/questions/apply-bitwise-operations-to-make-strings-equal.yaml new file mode 100644 index 0000000..b8c79dd --- /dev/null +++ b/backend/data/questions/apply-bitwise-operations-to-make-strings-equal.yaml @@ -0,0 +1,158 @@ +title: Apply Bitwise Operations to Make Strings Equal +slug: apply-bitwise-operations-to-make-strings-equal +difficulty: medium +leetcode_id: 2546 +leetcode_url: https://leetcode.com/problems/apply-bitwise-operations-to-make-strings-equal/ +categories: + - strings + - math +patterns: + - greedy + +description: | + You are given two **0-indexed binary** strings `s` and `target` of the same length `n`. You can do the following operation on `s` **any** number of times: + + - Choose two **different** indices `i` and `j` where `0 <= i, j < n`. + - Simultaneously, replace `s[i]` with (`s[i]` **OR** `s[j]`) and `s[j]` with (`s[i]` **XOR** `s[j]`). + + For example, if `s = "0110"`, you can choose `i = 0` and `j = 2`, then simultaneously replace `s[0]` with (`s[0]` **OR** `s[2]` = `0` **OR** `1` = `1`), and `s[2]` with (`s[0]` **XOR** `s[2]` = `0` **XOR** `1` = `1`). So we will have `s = "1110"`. + + Return `true` *if you can make the string* `s` *equal to* `target`*, or* `false` *otherwise*. + +constraints: | + - `n == s.length == target.length` + - `2 <= n <= 10^5` + - `s` and `target` consist of only the digits `0` and `1`. + +examples: + - input: 's = "1010", target = "0110"' + output: "true" + explanation: "We can do the following operations: Choose i = 2 and j = 0, giving s = \"0010\". Then choose i = 2 and j = 1, giving s = \"0110\". Since we can make s equal to target, we return true." + - input: 's = "11", target = "00"' + output: "false" + explanation: "It is not possible to make s equal to target with any number of operations." + +explanation: + intuition: | + This problem seems complex at first — we need to understand what the operation actually does before finding the pattern. + + Let's analyse all possible cases when we apply the operation to positions with values `(a, b)`: + + - `(0, 0)` → `(0 OR 0, 0 XOR 0)` = `(0, 0)` — no change + - `(0, 1)` → `(0 OR 1, 0 XOR 1)` = `(1, 1)` — the `1` "spreads" to position `i` + - `(1, 0)` → `(1 OR 0, 1 XOR 0)` = `(1, 1)` — the `1` "spreads" to position `j` + - `(1, 1)` → `(1 OR 1, 1 XOR 1)` = `(1, 0)` — one `1` becomes `0` + + The key insight emerges: **as long as there is at least one `1` in the string, we can move and manipulate `1`s freely**. We can duplicate a `1` to any position (spread), and we can eliminate a `1` at any position (by using two `1`s to make one of them `0`). + + However, we **cannot create a `1` from nothing**. If `s` has no `1`s, it stays all zeros. Similarly, we **cannot eliminate all `1`s** — if we have any `1`, we'll always have at least one. + + This leads to a simple rule: both strings must either both contain at least one `1`, or both be all zeros. + + approach: | + The solution becomes remarkably simple once we understand the invariant: + + **Step 1: Check if `s` contains at least one `1`** + + - Count or check for presence of `'1'` in string `s` + - This tells us if we have any "fuel" to work with + +   + + **Step 2: Check if `target` contains at least one `1`** + + - Similarly, check for presence of `'1'` in `target` + - This tells us what state we need to achieve + +   + + **Step 3: Compare the two conditions** + + - If both have at least one `1` → return `true` (we can transform freely) + - If both have no `1`s (all zeros) → return `true` (already equal or trivially transformable) + - If one has a `1` and the other doesn't → return `false` (impossible) + +   + + In other words: return `true` if and only if `('1' in s) == ('1' in target)`. + + common_pitfalls: + - title: Trying to Simulate the Operations + description: | + A natural instinct is to try simulating the operations step by step to see if we can reach the target. This approach is fundamentally flawed: + + - There's no clear termination condition + - The search space is exponential + - We might loop forever without finding the answer + + The key is to find an **invariant** — a property that remains unchanged regardless of operations. Here, the invariant is "has at least one `1`". + wrong_approach: "BFS/DFS simulation of all possible operations" + correct_approach: "Check if both strings have the same 'has at least one 1' property" + + - title: Counting Exact Number of 1s + description: | + You might think the number of `1`s needs to match between `s` and `target`. This is incorrect! + + Consider `s = "10"` and `target = "11"`: + - Apply operation with `i=0, j=1`: `(1, 0)` → `(1, 1)` + - We went from one `1` to two `1`s + + Consider `s = "11"` and `target = "10"`: + - Apply operation with `i=0, j=1`: `(1, 1)` → `(1, 0)` + - We went from two `1`s to one `1` + + The count can change; only the **presence** of at least one `1` matters. + wrong_approach: "Check if s.count('1') == target.count('1')" + correct_approach: "Check if ('1' in s) == ('1' in target)" + + - title: Missing the All-Zeros Edge Case + description: | + If both strings are `"000...0"` (all zeros), the answer is `true` — they're already equal, or can be trivially transformed (applying any operation on all zeros changes nothing). + + Make sure your condition handles this: both having zero `1`s should return `true`. + + key_takeaways: + - "**Find the invariant**: For transformation problems, identify what property stays constant across all operations" + - "**Bit manipulation insight**: OR spreads `1`s, XOR toggles — together they allow free movement of `1`s but can't create them from nothing" + - "**Simplification**: Complex-looking problems often reduce to simple checks once you understand the underlying mechanics" + - "**Presence vs count**: Sometimes the *existence* of something matters more than the *quantity*" + + time_complexity: "O(n). We scan each string once to check for the presence of `'1'`." + space_complexity: "O(1). We only use a constant amount of extra space for the boolean checks." + +solutions: + - approach_name: Invariant Check + is_optimal: true + code: | + def make_strings_equal(s: str, target: str) -> bool: + # Check if both strings have the same "has at least one 1" property + # If both have a 1, we can freely transform between any configurations + # If neither has a 1, they're both all zeros (equal) + # If only one has a 1, transformation is impossible + has_one_s = '1' in s + has_one_target = '1' in target + + return has_one_s == has_one_target + explanation: | + **Time Complexity:** O(n) — We check for '1' in each string, which scans at most n characters each. + + **Space Complexity:** O(1) — Only two boolean variables used. + + The solution leverages the key insight that the operation preserves the "has at least one 1" invariant. We can spread 1s or eliminate them (by converting 1,1 to 1,0), but we can never go from "no 1s" to "some 1s" or vice versa. + + - approach_name: Count-Based Check + is_optimal: true + code: | + def make_strings_equal(s: str, target: str) -> bool: + # Alternative: check if counts are both zero or both non-zero + count_s = s.count('1') + count_target = target.count('1') + + # Both must be zero, or both must be positive + return (count_s == 0) == (count_target == 0) + explanation: | + **Time Complexity:** O(n) — Counting '1's requires scanning the full string. + + **Space Complexity:** O(1) — Only integer counters used. + + This approach counts the 1s explicitly, then checks if both counts are zero or both are positive. It's equivalent to the first solution but uses count instead of membership check. The `in` operator short-circuits on first match, so the first solution may be slightly faster in practice. diff --git a/backend/data/questions/apply-operations-to-an-array.yaml b/backend/data/questions/apply-operations-to-an-array.yaml new file mode 100644 index 0000000..e69400d --- /dev/null +++ b/backend/data/questions/apply-operations-to-an-array.yaml @@ -0,0 +1,176 @@ +title: Apply Operations to an Array +slug: apply-operations-to-an-array +difficulty: easy +leetcode_id: 2460 +leetcode_url: https://leetcode.com/problems/apply-operations-to-an-array/ +categories: + - arrays + - two-pointers +patterns: + - two-pointers + +description: | + You are given a **0-indexed** array `nums` of size `n` consisting of **non-negative** integers. + + You need to apply `n - 1` operations to this array where, in the ith operation (**0-indexed**), you will apply the following on the ith element of `nums`: + + - If `nums[i] == nums[i + 1]`, then multiply `nums[i]` by `2` and set `nums[i + 1]` to `0`. Otherwise, you skip this operation. + + After performing **all** the operations, **shift** all the `0`'s to the **end** of the array. + + - For example, the array `[1,0,2,0,0,1]` after shifting all its `0`'s to the end, is `[1,2,1,0,0,0]`. + + Return *the resulting array*. + + **Note** that the operations are applied **sequentially**, not all at once. + +constraints: | + - `2 <= nums.length <= 2000` + - `0 <= nums[i] <= 1000` + +examples: + - input: "nums = [1,2,2,1,1,0]" + output: "[1,4,2,0,0,0]" + explanation: | + We do the following operations: + - i = 0: nums[0] and nums[1] are not equal, so we skip this operation. + - i = 1: nums[1] and nums[2] are equal, we multiply nums[1] by 2 and change nums[2] to 0. The array becomes [1,4,0,1,1,0]. + - i = 2: nums[2] and nums[3] are not equal, so we skip this operation. + - i = 3: nums[3] and nums[4] are equal, we multiply nums[3] by 2 and change nums[4] to 0. The array becomes [1,4,0,2,0,0]. + - i = 4: nums[4] and nums[5] are equal, we multiply nums[4] by 2 and change nums[5] to 0. The array becomes [1,4,0,2,0,0]. + After that, we shift the 0's to the end, which gives the array [1,4,2,0,0,0]. + - input: "nums = [0,1]" + output: "[1,0]" + explanation: "No operation can be applied, we just shift the 0 to the end." + +explanation: + intuition: | + This problem breaks down into two distinct phases that can be tackled independently. + + Think of it like a two-step assembly line process: first, you walk through the array performing "merges" where adjacent equal elements combine (the left one doubles, the right one becomes zero). Then, as a separate cleanup step, you push all the zeros to the back — like sweeping debris to one side of a factory floor. + + The key insight is that the **order of operations matters**. Since we process elements sequentially from left to right, a merge at position `i` can create new zeros that might affect subsequent comparisons. For example, if we have `[2, 2, 2]`, merging at index 0 gives us `[4, 0, 2]` — the newly created zero at index 1 doesn't equal the `2` at index 2, so no further merge happens there. + + The second phase — shifting zeros to the end — is a classic **two-pointer** pattern. Instead of actually "moving" zeros, we place all non-zero elements at the front in order, then fill the remaining positions with zeros. + + approach: | + We solve this in two phases: + + **Phase 1: Apply the merge operations** + + - Iterate through indices `0` to `n - 2` (we compare `nums[i]` with `nums[i + 1]`) + - For each index `i`, check if `nums[i] == nums[i + 1]` + - If equal: double `nums[i]` and set `nums[i + 1] = 0` + - If not equal: skip to the next index + +   + + **Phase 2: Shift zeros to the end (Two Pointers)** + + - Use a `write_index` pointer starting at `0` + - Iterate through the array with a `read_index` + - When we find a non-zero element, write it to `write_index` and increment `write_index` + - After processing all elements, fill positions from `write_index` to end with zeros + +   + + **Step 3: Return the result** + + - The array is now modified in-place with all operations applied and zeros shifted to the end + + common_pitfalls: + - title: Forgetting Sequential Processing + description: | + The operations must be applied **sequentially**, not all at once. If you scan the array for all equal adjacent pairs first and then apply all merges simultaneously, you'll get wrong results. + + For example, with `[1, 1, 1]`: + - **Correct (sequential):** At i=0, merge to get `[2, 0, 1]`. At i=1, `0 != 1`, skip. Result after shifting: `[2, 1, 0]` + - **Wrong (simultaneous):** Both pairs (0,1) and (1,2) are equal, so you might merge both, getting `[2, 0, 2]` which is incorrect. + + Always process one index at a time, using the updated array state for subsequent comparisons. + wrong_approach: "Pre-scan for all matches, then apply all merges" + correct_approach: "Process index by index, applying changes immediately" + + - title: Off-by-One in the Loop Boundary + description: | + When iterating to apply operations, you compare `nums[i]` with `nums[i + 1]`. If your loop goes to `n - 1` (inclusive), you'll access `nums[n]` which is out of bounds. + + The loop should run from `i = 0` to `i = n - 2` (or equivalently, `i < n - 1`). + wrong_approach: "for i in range(n)" + correct_approach: "for i in range(n - 1)" + + - title: Creating a New Array Instead of In-Place Modification + description: | + While creating a new result array works, it uses O(n) extra space unnecessarily. The problem can be solved in-place with O(1) extra space using the two-pointer technique for the zero-shifting phase. + + The two-pointer approach overwrites elements as it goes, which is safe because the write pointer never exceeds the read pointer. + + key_takeaways: + - "**Two-phase problems**: Breaking a problem into distinct phases (merge, then shift) simplifies the logic and makes each part easier to reason about" + - "**Two-pointer for partitioning**: Moving zeros to the end is a classic application of the two-pointer pattern — one pointer reads, another writes" + - "**Sequential vs. parallel operations**: Always clarify whether operations in a problem should be applied one at a time (sequential) or all at once (parallel)" + - "**In-place modification**: When the problem allows modifying the input, use two pointers to achieve O(1) space complexity" + + time_complexity: "O(n). We make two passes through the array: one for the merge operations and one for shifting zeros." + space_complexity: "O(1). We modify the array in-place and only use a constant number of pointer variables." + +solutions: + - approach_name: Two-Phase Simulation + is_optimal: true + code: | + def apply_operations(nums: list[int]) -> list[int]: + n = len(nums) + + # Phase 1: Apply merge operations sequentially + for i in range(n - 1): + if nums[i] == nums[i + 1]: + # Double the current element + nums[i] *= 2 + # Set the next element to zero + nums[i + 1] = 0 + + # Phase 2: Shift zeros to the end using two pointers + write_index = 0 # Position to place next non-zero element + + # Move all non-zero elements to the front + for read_index in range(n): + if nums[read_index] != 0: + nums[write_index] = nums[read_index] + write_index += 1 + + # Fill remaining positions with zeros + while write_index < n: + nums[write_index] = 0 + write_index += 1 + + return nums + explanation: | + **Time Complexity:** O(n) — Two passes through the array. + + **Space Complexity:** O(1) — In-place modification with only pointer variables. + + We first iterate through the array applying the merge operations. Then we use a classic two-pointer technique to partition the array: non-zero elements move to the front while zeros fill the remaining positions. + + - approach_name: Two-Phase with List Comprehension + is_optimal: false + code: | + def apply_operations(nums: list[int]) -> list[int]: + n = len(nums) + + # Phase 1: Apply merge operations + for i in range(n - 1): + if nums[i] == nums[i + 1]: + nums[i] *= 2 + nums[i + 1] = 0 + + # Phase 2: Collect non-zeros, then append zeros + non_zeros = [x for x in nums if x != 0] + zeros = [0] * (n - len(non_zeros)) + + return non_zeros + zeros + explanation: | + **Time Complexity:** O(n) — One pass for merges, one pass for filtering. + + **Space Complexity:** O(n) — Creates new lists for non-zeros and zeros. + + This approach is more readable but uses O(n) extra space. The list comprehension collects all non-zero elements, then we create the appropriate number of zeros and concatenate. Suitable when simplicity is preferred over space efficiency. diff --git a/backend/data/questions/arithmetic-slices-ii-subsequence.yaml b/backend/data/questions/arithmetic-slices-ii-subsequence.yaml new file mode 100644 index 0000000..c859796 --- /dev/null +++ b/backend/data/questions/arithmetic-slices-ii-subsequence.yaml @@ -0,0 +1,209 @@ +title: Arithmetic Slices II - Subsequence +slug: arithmetic-slices-ii-subsequence +difficulty: hard +leetcode_id: 446 +leetcode_url: https://leetcode.com/problems/arithmetic-slices-ii-subsequence/ +categories: + - arrays + - dynamic-programming + - hash-tables +patterns: + - dynamic-programming + +description: | + Given an integer array `nums`, return *the number of all the **arithmetic subsequences** of* `nums`. + + A sequence of numbers is called arithmetic if it consists of **at least three elements** and if the difference between any two consecutive elements is the same. + + - For example, `[1, 3, 5, 7, 9]`, `[7, 7, 7, 7]`, and `[3, -1, -5, -9]` are arithmetic sequences. + - For example, `[1, 1, 2, 5, 7]` is not an arithmetic sequence. + + A **subsequence** of an array is a sequence that can be formed by removing some elements (possibly none) of the array. + + - For example, `[2, 5, 10]` is a subsequence of `[1, 2, 1, 2, 4, 1, 5, 10]`. + + The test cases are generated so that the answer fits in a **32-bit** integer. + +constraints: | + - `1 <= nums.length <= 1000` + - `-2^31 <= nums[i] <= 2^31 - 1` + +examples: + - input: "nums = [2,4,6,8,10]" + output: "7" + explanation: "All arithmetic subsequence slices are: [2,4,6], [4,6,8], [6,8,10], [2,4,6,8], [4,6,8,10], [2,4,6,8,10], [2,6,10]" + - input: "nums = [7,7,7,7,7]" + output: "16" + explanation: "Any subsequence of this array is arithmetic since all elements are the same (difference = 0)." + +explanation: + intuition: | + Imagine building arithmetic sequences piece by piece. The challenge is that a **subsequence** doesn't require consecutive elements — we can skip elements in the original array. + + The key insight is that we need to track **partial sequences** of length 2 or more. Why? Because a valid arithmetic subsequence needs at least 3 elements, but to extend any sequence, we need to know what 2-element "building blocks" exist and what their common difference is. + + Think of it like this: for each position `i` in the array, we maintain a record of "how many ways can I reach this position with a sequence ending here that has a specific difference `d`?" This includes both: + - Pairs (length 2) that could become valid if extended + - Longer sequences (length 3+) that are already valid + + When we process a new element `nums[i]`, we look back at every previous element `nums[j]`. The difference `d = nums[i] - nums[j]` tells us which sequences can be extended. If there were `k` sequences ending at `j` with difference `d`, we can extend all of them by adding `nums[i]`. + + The clever part: sequences of length 2 become length 3 (now valid and countable), and sequences already of length 3+ grow longer (still valid and countable). We count every extension of a sequence that was already length 2 or more. + + approach: | + We use **Dynamic Programming with Hash Maps** to track subsequences by their common difference. + + **Step 1: Set up the data structure** + + - Create an array `dp` of hash maps, where `dp[i]` is a dictionary + - `dp[i][d]` represents the count of subsequences (of length >= 2) ending at index `i` with common difference `d` + - Initialise `result` to `0` to accumulate valid sequences (length >= 3) + +   + + **Step 2: Iterate through all pairs** + + - For each index `i` from `0` to `n-1`: + - For each previous index `j` from `0` to `i-1`: + - Calculate the difference `d = nums[i] - nums[j]` + +   + + **Step 3: Extend existing subsequences** + + - Look up how many subsequences end at `j` with difference `d` (call this `count_at_j`) + - Add `count_at_j` to `result` — these are all the sequences that just became length 3+, or were already valid and got extended + - Update `dp[i][d]` by adding `count_at_j + 1`: + - The `+1` accounts for the new pair `(nums[j], nums[i])` with difference `d` + - The `count_at_j` carries forward all extendable sequences from position `j` + +   + + **Step 4: Return the result** + + - Return `result`, which counts every valid arithmetic subsequence (length >= 3) + +   + + The magic is that we only add to `result` when extending sequences that already have at least 2 elements at position `j`. This ensures we only count sequences of length 3 or more. + + common_pitfalls: + - title: Confusing Subsequences with Subarrays + description: | + A **subarray** requires consecutive elements, but a **subsequence** can skip elements. For example, in `[2, 4, 6, 8, 10]`, the subsequence `[2, 6, 10]` is valid (difference of 4) even though the elements aren't adjacent. + + This is why we need O(n^2) pairs — we must consider every possible pairing, not just adjacent elements. + wrong_approach: "Only checking consecutive elements" + correct_approach: "Check all pairs (i, j) where j < i" + + - title: Counting Pairs as Valid Sequences + description: | + A valid arithmetic sequence needs **at least 3 elements**. A common mistake is counting pairs (length 2) as valid. + + The solution handles this by only adding to `result` when we extend a sequence that already exists at `dp[j][d]`. A fresh pair `(nums[j], nums[i])` adds `1` to `dp[i][d]` but contributes `0` to `result` (since `dp[j][d]` was `0`). + wrong_approach: "Counting every pair with a common difference" + correct_approach: "Only count when extending existing sequences of length >= 2" + + - title: Integer Overflow in Difference Calculation + description: | + With constraints `-2^31 <= nums[i] <= 2^31 - 1`, the difference `nums[i] - nums[j]` can overflow a 32-bit integer. + + For example: `nums[i] = 2^31 - 1` and `nums[j] = -2^31` gives a difference of `2^32 - 1`, which exceeds 32-bit range. + + In Python this isn't an issue (arbitrary precision integers), but in languages like Java or C++, you must use `long` for the difference calculation. + wrong_approach: "Using 32-bit integers for the difference" + correct_approach: "Use 64-bit integers (long) for difference calculations" + + - title: Duplicate Elements Mishandling + description: | + When the array has duplicate values (e.g., `[7, 7, 7, 7, 7]`), many pairs share the same difference (0). Each pair can independently start or extend sequences. + + With 5 identical elements, there are C(5,3) + C(5,4) + C(5,5) = 10 + 5 + 1 = 16 valid subsequences of length 3+. The DP correctly accumulates these because it processes every pair. + wrong_approach: "Skipping duplicate values" + correct_approach: "Process every pair regardless of duplicates" + + key_takeaways: + - "**DP with hash maps**: When the state space (possible differences) is large or sparse, use hash maps instead of fixed-size arrays" + - "**Counting extensions**: Only count a sequence when it reaches the minimum valid length — track 'potential' sequences separately from 'valid' ones" + - "**O(n^2) for subsequences**: Unlike subarray problems that can often be solved in O(n), subsequence problems typically require considering all pairs" + - "**Foundation for sequence counting**: This pattern of tracking 'sequences ending at position i with property X' applies to many DP problems involving subsequences" + + time_complexity: "O(n^2). We iterate through all pairs `(i, j)` where `j < i`, and hash map operations are O(1) on average." + space_complexity: "O(n^2). In the worst case, each position could have O(n) different differences stored in its hash map (e.g., when all elements are distinct)." + +solutions: + - approach_name: Dynamic Programming with Hash Maps + is_optimal: true + code: | + def number_of_arithmetic_slices(nums: list[int]) -> int: + n = len(nums) + if n < 3: + return 0 + + # dp[i] maps difference -> count of subsequences ending at i + dp = [dict() for _ in range(n)] + result = 0 + + for i in range(n): + for j in range(i): + # Calculate the common difference + diff = nums[i] - nums[j] + + # How many subsequences end at j with this difference? + count_at_j = dp[j].get(diff, 0) + + # Add to result: these are valid sequences (length >= 3) + # or extensions of already valid sequences + result += count_at_j + + # Update dp[i][diff]: + # - count_at_j sequences extended from j + # - +1 for the new pair (nums[j], nums[i]) + dp[i][diff] = dp[i].get(diff, 0) + count_at_j + 1 + + return result + explanation: | + **Time Complexity:** O(n^2) — We examine all pairs of indices. + + **Space Complexity:** O(n^2) — Each of the n hash maps can have up to O(n) entries. + + The key insight is separating "potential" sequences (length 2) from "valid" sequences (length 3+). By only adding `count_at_j` to the result (not the `+1` for new pairs), we ensure we only count sequences that have reached the minimum length of 3. + + - approach_name: Brute Force (Enumeration) + is_optimal: false + code: | + def number_of_arithmetic_slices(nums: list[int]) -> int: + from itertools import combinations + + n = len(nums) + if n < 3: + return 0 + + count = 0 + + # Check all subsequences of length 3 or more + for length in range(3, n + 1): + for indices in combinations(range(n), length): + # Extract the subsequence + subseq = [nums[i] for i in indices] + # Check if it's arithmetic + if is_arithmetic(subseq): + count += 1 + + return count + + + def is_arithmetic(seq: list[int]) -> bool: + if len(seq) < 3: + return False + diff = seq[1] - seq[0] + for i in range(2, len(seq)): + if seq[i] - seq[i - 1] != diff: + return False + return True + explanation: | + **Time Complexity:** O(2^n * n) — We enumerate all subsequences (2^n) and check each one (O(length)). + + **Space Complexity:** O(n) — For storing each subsequence during checking. + + This approach generates every possible subsequence of length 3 or more and checks if it's arithmetic. While correct, it's far too slow for the given constraints (n up to 1000). With n=1000, there are approximately 2^1000 subsequences — completely infeasible. This illustrates why the DP approach is necessary. diff --git a/backend/data/questions/arithmetic-slices.yaml b/backend/data/questions/arithmetic-slices.yaml new file mode 100644 index 0000000..1802e75 --- /dev/null +++ b/backend/data/questions/arithmetic-slices.yaml @@ -0,0 +1,180 @@ +title: Arithmetic Slices +slug: arithmetic-slices +difficulty: medium +leetcode_id: 413 +leetcode_url: https://leetcode.com/problems/arithmetic-slices/ +categories: + - arrays + - dynamic-programming +patterns: + - dynamic-programming + +description: | + An integer array is called *arithmetic* if it consists of **at least three elements** and if the difference between any two consecutive elements is the same. + + For example, `[1,3,5,7,9]`, `[7,7,7,7]`, and `[3,-1,-5,-9]` are arithmetic sequences. + + Given an integer array `nums`, return *the number of arithmetic **subarrays** of* `nums`. + + A **subarray** is a contiguous subsequence of the array. + +constraints: | + - `1 <= nums.length <= 5000` + - `-1000 <= nums[i] <= 1000` + +examples: + - input: "nums = [1,2,3,4]" + output: "3" + explanation: "We have 3 arithmetic slices in nums: [1, 2, 3], [2, 3, 4] and [1, 2, 3, 4] itself." + - input: "nums = [1]" + output: "0" + explanation: "A single element cannot form an arithmetic slice (minimum 3 elements required)." + +explanation: + intuition: | + Imagine you're walking along a staircase where each step has a consistent height difference from the previous one. An arithmetic slice is like finding a section of this staircase where the step heights are uniform — you need at least 3 steps to confirm a pattern. + + The key insight is that **arithmetic slices can extend**. If `[a, b, c]` forms an arithmetic slice with common difference `d`, and the next element `e` also continues the pattern (i.e., `e - c = d`), then we don't just get one more slice `[b, c, e]` — we also get `[a, b, c, e]`. + + Think of it like a snowball effect: each time we extend an arithmetic sequence by one element, the number of *new* slices we gain equals the number of slices that ended at the previous position, plus one more (the new minimal 3-element slice). + + For example, with `[1, 2, 3, 4]`: + - At index 2: `[1, 2, 3]` is our first slice → 1 new slice + - At index 3: The pattern continues, so we get `[2, 3, 4]` (new 3-element slice) AND `[1, 2, 3, 4]` (extended slice) → 2 new slices + + This cumulative relationship is perfect for dynamic programming. + + approach: | + We solve this using a **Dynamic Programming** approach that tracks how many arithmetic slices end at each position. + + **Step 1: Handle edge cases** + + - If the array has fewer than 3 elements, return `0` immediately (no arithmetic slice possible) + +   + + **Step 2: Initialise variables** + + - `dp`: Tracks how many arithmetic slices **end at the current index**. Starts at `0` + - `total`: Accumulates the total count of all arithmetic slices found + +   + + **Step 3: Iterate from index 2 onwards** + + - For each index `i` (starting from 2), check if `nums[i] - nums[i-1] == nums[i-1] - nums[i-2]` + - If the differences match, we can extend the arithmetic sequence: + - `dp = dp + 1` — we gain one more slice than we had ending at the previous position, plus the new 3-element slice + - Add `dp` to `total` + - If the differences don't match, reset `dp = 0` — the arithmetic sequence breaks here + +   + + **Step 4: Return the total** + + - After processing all elements, `total` contains the count of all arithmetic slices + +   + + The key insight is that `dp` at each position tells us how many slices end exactly there. When we extend a sequence, each previous slice that ended at `i-1` now has a corresponding longer version ending at `i`, plus we get one new minimal slice. + + common_pitfalls: + - title: Counting Only Minimal Slices + description: | + A common mistake is to only count 3-element slices and miss longer ones. + + For `[1, 2, 3, 4]`, some might count only `[1, 2, 3]` and `[2, 3, 4]` (2 slices) and forget that `[1, 2, 3, 4]` is also a valid arithmetic slice. + + The DP approach handles this naturally: when we extend from position 2 to position 3, we count both the new 3-element slice AND the extended 4-element slice. + wrong_approach: "Counting only consecutive triplets" + correct_approach: "Track cumulative count using DP to capture all lengths" + + - title: Restarting Count Incorrectly + description: | + When the arithmetic pattern breaks, you must reset your running count to `0`. + + For `[1, 2, 3, 5, 7]`, the sequence `[1, 2, 3]` has difference `1`, but `5 - 3 = 2` breaks the pattern. You need to reset and start fresh from `[3, 5, 7]` which has a new common difference of `2`. + + Forgetting to reset leads to overcounting by incorrectly extending slices across different common differences. + wrong_approach: "Continuing count across different common differences" + correct_approach: "Reset dp to 0 when consecutive differences don't match" + + - title: Off-by-One in Loop Start + description: | + The loop must start at index 2 (the third element) because we need at least 3 elements to form an arithmetic slice. + + Starting at index 0 or 1 would cause index-out-of-bounds errors when checking `nums[i-2]`. + wrong_approach: "Starting loop at index 0 or 1" + correct_approach: "Start at index 2 to have three elements available" + + key_takeaways: + - "**Cumulative DP pattern**: When extending a valid sequence adds multiple new valid subsequences, track how many end at each position" + - "**Snowball counting**: Each extension adds `(previous count + 1)` new items — this pattern appears in many counting problems" + - "**O(1) space DP**: When you only need the previous state, you can optimise from an array to a single variable" + - "**Foundation for harder problems**: This extends to *Arithmetic Slices II* where subsequences (non-contiguous) are counted, requiring a hash map approach" + + time_complexity: "O(n). We iterate through the array once, performing constant-time operations at each step." + space_complexity: "O(1). We only use two variables (`dp` and `total`) regardless of input size." + +solutions: + - approach_name: Dynamic Programming + is_optimal: true + code: | + def number_of_arithmetic_slices(nums: list[int]) -> int: + n = len(nums) + # Need at least 3 elements for an arithmetic slice + if n < 3: + return 0 + + # dp tracks slices ending at current position + dp = 0 + # total accumulates all arithmetic slices + total = 0 + + for i in range(2, n): + # Check if current element continues the arithmetic pattern + if nums[i] - nums[i - 1] == nums[i - 1] - nums[i - 2]: + # Extend: we get all previous slices + 1 new minimal slice + dp += 1 + total += dp + else: + # Pattern breaks, reset count + dp = 0 + + return total + explanation: | + **Time Complexity:** O(n) — Single pass through the array. + + **Space Complexity:** O(1) — Only two integer variables used. + + The key insight is that `dp` represents how many arithmetic slices end at the current index. When we can extend the sequence, each slice ending at `i-1` spawns a longer version ending at `i`, plus we get one new 3-element slice. When the pattern breaks, we reset to 0. + + - approach_name: Brute Force + is_optimal: false + code: | + def number_of_arithmetic_slices(nums: list[int]) -> int: + n = len(nums) + count = 0 + + # Try every possible starting point + for i in range(n - 2): + # Calculate the common difference for this starting point + diff = nums[i + 1] - nums[i] + + # Extend as far as possible with this difference + for j in range(i + 2, n): + # Check if the pattern continues + if nums[j] - nums[j - 1] == diff: + # Valid arithmetic slice from i to j + count += 1 + else: + # Pattern breaks, no point continuing + break + + return count + explanation: | + **Time Complexity:** O(n^2) — For each starting position, we may scan to the end. + + **Space Complexity:** O(1) — Only a counter variable used. + + This approach tries every possible starting position and extends the slice as long as the common difference is maintained. While correct, it's less efficient than the DP solution. However, with the constraint `n <= 5000`, this O(n^2) solution would still pass (around 25 million operations at worst). diff --git a/backend/data/questions/arithmetic-subarrays.yaml b/backend/data/questions/arithmetic-subarrays.yaml new file mode 100644 index 0000000..870b73a --- /dev/null +++ b/backend/data/questions/arithmetic-subarrays.yaml @@ -0,0 +1,214 @@ +title: Arithmetic Subarrays +slug: arithmetic-subarrays +difficulty: medium +leetcode_id: 1630 +leetcode_url: https://leetcode.com/problems/arithmetic-subarrays/ +categories: + - arrays + - hash-tables + - sorting +patterns: + - two-pointers + +description: | + A sequence of numbers is called **arithmetic** if it consists of at least two elements, and the difference between every two consecutive elements is the same. More formally, a sequence `s` is arithmetic if and only if `s[i+1] - s[i] == s[1] - s[0]` for all valid `i`. + + For example, these are **arithmetic** sequences: + - `1, 3, 5, 7, 9` + - `7, 7, 7, 7` + - `3, -1, -5, -9` + + The following sequence is **not** arithmetic: `1, 1, 2, 5, 7` + + You are given an array of `n` integers, `nums`, and two arrays of `m` integers each, `l` and `r`, representing the `m` range queries, where the ith query is the range `[l[i], r[i]]`. All the arrays are **0-indexed**. + + Return *a list of boolean elements* `answer`, *where* `answer[i]` *is* `true` *if the subarray* `nums[l[i]], nums[l[i]+1], ..., nums[r[i]]` *can be **rearranged** to form an **arithmetic** sequence, and* `false` *otherwise*. + +constraints: | + - `n == nums.length` + - `m == l.length` + - `m == r.length` + - `2 <= n <= 500` + - `1 <= m <= 500` + - `0 <= l[i] < r[i] < n` + - `-10^5 <= nums[i] <= 10^5` + +examples: + - input: "nums = [4,6,5,9,3,7], l = [0,0,2], r = [2,3,5]" + output: "[true, false, true]" + explanation: "In the 0th query, the subarray [4,6,5] can be rearranged as [4,5,6], an arithmetic sequence with difference 1. In the 1st query, the subarray [4,6,5,9] cannot form an arithmetic sequence. In the 2nd query, the subarray [5,9,3,7] can be rearranged as [3,5,7,9], an arithmetic sequence with difference 2." + - input: "nums = [-12,-9,-3,-12,-6,15,20,-25,-20,-15,-10], l = [0,1,6,4,8,7], r = [4,4,9,7,9,10]" + output: "[false, true, false, false, true, true]" + explanation: "Each query is evaluated independently to check if the subarray can be rearranged into an arithmetic sequence." + +explanation: + intuition: | + Think of an arithmetic sequence like evenly spaced fence posts along a straight line. If you have posts at positions 3, 5, 7, 9, the spacing (common difference) is exactly 2 between each consecutive pair. + + Now imagine someone scatters those fence posts randomly on the ground. Your task is to determine: *can these posts be arranged back into a straight line with equal spacing?* + + The key insight is that in a valid arithmetic sequence: + - The **common difference** `d` is determined by `(max - min) / (n - 1)` where `n` is the number of elements + - Every element must be exactly `min + k*d` for some integer `k` from `0` to `n-1` + - Each position `k` must be filled by exactly one element + + This means we can use a **set-based approach**: calculate what the common difference should be, then verify that every required value exists in the subarray. If any value is missing or duplicated unexpectedly, it's not arithmetic. + + approach: | + For each query, we extract the subarray and check if it can form an arithmetic sequence. + + **Step 1: Extract the subarray** + + - Get the subarray `nums[l[i]:r[i]+1]` for the current query + - Find the `min_val` and `max_val` of this subarray + +   + + **Step 2: Calculate the expected common difference** + + - The common difference `d = (max_val - min_val) / (length - 1)` + - If `length` is 1 or 2, it's always arithmetic (any two numbers form an arithmetic sequence) + - If `(max_val - min_val)` is not evenly divisible by `(length - 1)`, return `false` + +   + + **Step 3: Verify all expected values exist** + + - Create a set from the subarray elements for O(1) lookups + - For each expected value `min_val + i*d` where `i` ranges from `0` to `length-1`: + - Check if it exists in the set + - If any expected value is missing, the sequence is not arithmetic + +   + + **Step 4: Return the result** + + - If all expected values exist, return `true` + - The set automatically handles the uniqueness requirement (since we check exact count via the calculation) + +   + + An alternative approach is to simply **sort the subarray** and check if consecutive differences are equal. This is simpler but slightly slower at O(k log k) per query vs O(k) for the set approach. + + common_pitfalls: + - title: Forgetting Integer Division Check + description: | + When calculating the common difference `d = (max - min) / (n - 1)`, you must verify that this division is exact (no remainder). + + For example, with `[1, 2, 4]`: `max = 4`, `min = 1`, `n = 3`. The expected `d = (4-1)/(3-1) = 1.5`. Since `d` is not an integer, this cannot be an arithmetic sequence of integers. + + Always check `(max - min) % (n - 1) == 0` before proceeding. + wrong_approach: "Using floating-point division without checking remainder" + correct_approach: "Check divisibility first, then use integer division" + + - title: Not Handling the All-Equal Case + description: | + When all elements are equal (e.g., `[7, 7, 7, 7]`), the common difference is `0`. This is a valid arithmetic sequence! + + The formula `d = (max - min) / (n - 1) = 0 / (n - 1) = 0` correctly handles this, but some implementations might have edge case bugs when `d = 0`. + wrong_approach: "Special-casing when all elements are equal" + correct_approach: "Let the formula naturally handle d = 0" + + - title: Duplicate Elements Breaking the Set Approach + description: | + If the subarray contains duplicates like `[1, 3, 3, 5]`, the set will have fewer elements than the original subarray length. + + However, this is implicitly handled: if there are unexpected duplicates, either the divisibility check fails or some expected value won't exist in the set. + wrong_approach: "Forgetting to account for duplicates in set size" + correct_approach: "The algorithm naturally rejects invalid duplicates through the verification step" + + key_takeaways: + - "**Set for O(1) lookups**: Converting a subarray to a set enables constant-time membership checks, turning O(k^2) verification into O(k)" + - "**Mathematical invariant**: Arithmetic sequences have a deterministic structure — once you know min, max, and length, you know exactly what values must exist" + - "**Sorting alternative**: For simpler code, sorting the subarray and checking consecutive differences works well for small inputs" + - "**Range query pattern**: This problem demonstrates a common pattern where you process multiple independent subarray queries" + + time_complexity: "O(m * k) where `m` is the number of queries and `k` is the average subarray length. Each query requires O(k) to extract the subarray, find min/max, and verify values." + space_complexity: "O(k) where `k` is the maximum subarray length. We store each subarray in a set for O(1) lookups." + +solutions: + - approach_name: Set-Based Verification + is_optimal: true + code: | + def check_arithmetic_subarrays(nums: list[int], l: list[int], r: list[int]) -> list[bool]: + def is_arithmetic(arr: list[int]) -> bool: + n = len(arr) + # Any sequence of length 1 or 2 is arithmetic + if n <= 2: + return True + + min_val, max_val = min(arr), max(arr) + + # All elements are equal — valid arithmetic sequence with d=0 + if min_val == max_val: + return True + + # Check if common difference would be an integer + if (max_val - min_val) % (n - 1) != 0: + return False + + # Calculate the common difference + d = (max_val - min_val) // (n - 1) + + # Use a set for O(1) lookups + num_set = set(arr) + + # Check that all expected values exist + for i in range(n): + expected = min_val + i * d + if expected not in num_set: + return False + + return True + + # Process each query + result = [] + for i in range(len(l)): + # Extract subarray for this query + subarray = nums[l[i]:r[i] + 1] + result.append(is_arithmetic(subarray)) + + return result + explanation: | + **Time Complexity:** O(m * k) — For each of `m` queries, we do O(k) work to extract the subarray, find min/max, build the set, and verify all expected values. + + **Space Complexity:** O(k) — We create a set of size `k` for each subarray. + + This approach leverages the mathematical property that an arithmetic sequence is fully determined by its min, max, and length. We verify by checking that every expected value exists in O(1) time using a set. + + - approach_name: Sorting + is_optimal: false + code: | + def check_arithmetic_subarrays(nums: list[int], l: list[int], r: list[int]) -> list[bool]: + def is_arithmetic(arr: list[int]) -> bool: + # Sort the array to get elements in order + sorted_arr = sorted(arr) + n = len(sorted_arr) + + # Any sequence of 1 or 2 elements is arithmetic + if n <= 2: + return True + + # Calculate the common difference from first two elements + d = sorted_arr[1] - sorted_arr[0] + + # Check all consecutive pairs have the same difference + for i in range(2, n): + if sorted_arr[i] - sorted_arr[i - 1] != d: + return False + + return True + + # Process each query + result = [] + for i in range(len(l)): + subarray = nums[l[i]:r[i] + 1] + result.append(is_arithmetic(subarray)) + + return result + explanation: | + **Time Complexity:** O(m * k log k) — Sorting dominates at O(k log k) per query. + + **Space Complexity:** O(k) — Sorting requires O(k) space. + + This is the most intuitive approach: sort the subarray and verify that all consecutive differences are equal. While slightly slower than the set-based approach, it's simpler to understand and implement. For the given constraints (`n, m <= 500`), both approaches are efficient enough. diff --git a/backend/data/questions/arranging-coins.yaml b/backend/data/questions/arranging-coins.yaml new file mode 100644 index 0000000..08db0be --- /dev/null +++ b/backend/data/questions/arranging-coins.yaml @@ -0,0 +1,170 @@ +title: Arranging Coins +slug: arranging-coins +difficulty: easy +leetcode_id: 441 +leetcode_url: https://leetcode.com/problems/arranging-coins/ +categories: + - math + - binary-search +patterns: + - binary-search + +description: | + You have `n` coins and you want to build a staircase with these coins. The staircase consists of `k` rows where the ith row has exactly `i` coins. The last row of the staircase **may be** incomplete. + + Given the integer `n`, return *the number of **complete rows** of the staircase you will build*. + +constraints: | + - `1 <= n <= 2^31 - 1` + +examples: + - input: "n = 5" + output: "2" + explanation: "Because the 3rd row is incomplete, we return 2." + - input: "n = 8" + output: "3" + explanation: "Because the 4th row is incomplete, we return 3." + +explanation: + intuition: | + Imagine stacking coins row by row: the 1st row needs 1 coin, the 2nd row needs 2 coins, the 3rd row needs 3 coins, and so on. + + The total number of coins needed for `k` complete rows is `1 + 2 + 3 + ... + k`, which equals `k(k+1)/2` (the triangular number formula). + + Think of it like this: we're searching for the **largest value of k** such that the sum of the first k natural numbers does not exceed n. This is essentially asking: "What's the biggest staircase I can fully build with n coins?" + + Since the sum `k(k+1)/2` increases monotonically with k, we can use **binary search** to efficiently find the answer. Alternatively, we can solve the quadratic equation directly using the quadratic formula for an O(1) mathematical solution. + + approach: | + We can solve this using **Binary Search** on the number of rows: + + **Step 1: Define the search space** + + - `left`: Set to `1` (minimum possible complete rows) + - `right`: Set to `n` (maximum possible rows, though we'll never need this many) + +   + + **Step 2: Binary search for the largest valid k** + + - Calculate `mid` as the midpoint of the current range + - Compute the coins needed for `mid` complete rows: `mid * (mid + 1) / 2` + - If coins needed equals `n`, we found an exact match - return `mid` + - If coins needed is less than `n`, we might be able to fit more rows - search right + - If coins needed is greater than `n`, we need fewer rows - search left + +   + + **Step 3: Return the result** + + - When the loop ends, `right` contains the largest k where `k(k+1)/2 <= n` + - Return `right` as the number of complete rows + +   + + The binary search efficiently narrows down the answer in O(log n) time instead of simulating the stacking process. + + common_pitfalls: + - title: The Simulation Trap + description: | + A naive approach is to simulate building the staircase row by row: + - Start with row 1, subtract 1 coin + - Move to row 2, subtract 2 coins + - Continue until you can't complete a row + + While correct, this takes **O(sqrt(n)) time** because you'll build approximately sqrt(2n) rows. For `n = 2^31 - 1`, that's about 65,000 iterations. Binary search does it in about 31 iterations. + wrong_approach: "Simulate stacking row by row" + correct_approach: "Binary search on the number of rows" + + - title: Integer Overflow + description: | + When computing `mid * (mid + 1) / 2`, the multiplication can overflow if `mid` is large. With `n` up to `2^31 - 1`, `mid` could be around `65,000`, and `mid * (mid + 1)` could exceed 32-bit integer limits. + + In Python this isn't an issue due to arbitrary precision integers, but in languages like Java or C++, you need to use `long` or divide before multiplying: `mid / 2 * (mid + 1)` or `mid * (mid + 1) / 2` with long types. + wrong_approach: "Use 32-bit integers without overflow protection" + correct_approach: "Use 64-bit integers or Python's arbitrary precision" + + - title: Off-by-One Errors + description: | + Binary search boundary conditions can be tricky. A common mistake is returning `left` instead of `right`, or using the wrong comparison operator. + + The key insight: we want the **largest** k where `k(k+1)/2 <= n`. When the search converges, `right` holds this value because we move `right = mid - 1` when we have too many coins. + wrong_approach: "Incorrect boundary handling in binary search" + correct_approach: "Carefully track which boundary holds the answer" + + key_takeaways: + - "**Triangular number formula**: The sum `1 + 2 + ... + k = k(k+1)/2` appears frequently in problems involving sequential additions" + - "**Binary search on answer**: When searching for the largest/smallest value satisfying a condition, binary search is often applicable" + - "**Mathematical solutions**: Some binary search problems have closed-form solutions using algebra (quadratic formula here)" + - "**Monotonic property**: Binary search works because `k(k+1)/2` strictly increases with k - if k rows need more than n coins, so will k+1 rows" + + time_complexity: "O(log n). Binary search halves the search space each iteration, and the search space is at most n." + space_complexity: "O(1). We only use a constant number of variables (`left`, `right`, `mid`, `coins_needed`)." + +solutions: + - approach_name: Binary Search + is_optimal: true + code: | + def arrange_coins(n: int) -> int: + left, right = 1, n + + while left <= right: + mid = left + (right - left) // 2 + # Coins needed for mid complete rows + coins_needed = mid * (mid + 1) // 2 + + if coins_needed == n: + # Exact fit - mid rows use exactly n coins + return mid + elif coins_needed < n: + # We have coins left over, try more rows + left = mid + 1 + else: + # Not enough coins, try fewer rows + right = mid - 1 + + # right is the largest k where k(k+1)/2 <= n + return right + explanation: | + **Time Complexity:** O(log n) - Binary search on the range [1, n]. + + **Space Complexity:** O(1) - Only constant extra space used. + + We binary search for the largest k such that k(k+1)/2 <= n. Each iteration halves the search space, and we use the triangular number formula to compute coins needed in O(1). + + - approach_name: Mathematical (Quadratic Formula) + is_optimal: true + code: | + import math + + def arrange_coins(n: int) -> int: + # We need the largest k where k(k+1)/2 <= n + # Solving k^2 + k - 2n = 0 using quadratic formula: + # k = (-1 + sqrt(1 + 8n)) / 2 + return int((-1 + math.sqrt(1 + 8 * n)) / 2) + explanation: | + **Time Complexity:** O(1) - Direct calculation using the quadratic formula. + + **Space Complexity:** O(1) - Only constant extra space used. + + We solve k(k+1)/2 <= n algebraically. Rearranging to k^2 + k - 2n <= 0 and applying the quadratic formula gives k = (-1 + sqrt(1 + 8n)) / 2. Taking the floor gives the largest valid k. + + - approach_name: Linear Simulation + is_optimal: false + code: | + def arrange_coins(n: int) -> int: + rows = 0 + coins_remaining = n + + # Build rows one by one until we can't complete one + while coins_remaining >= rows + 1: + rows += 1 + coins_remaining -= rows + + return rows + explanation: | + **Time Complexity:** O(sqrt(n)) - We build approximately sqrt(2n) rows. + + **Space Complexity:** O(1) - Only constant extra space used. + + This simulates the stacking process directly. While intuitive and correct, it's slower than the binary search or mathematical approaches. Included to show the progression from brute force to optimal solution. diff --git a/backend/data/questions/array-nesting.yaml b/backend/data/questions/array-nesting.yaml new file mode 100644 index 0000000..4792c98 --- /dev/null +++ b/backend/data/questions/array-nesting.yaml @@ -0,0 +1,218 @@ +title: Array Nesting +slug: array-nesting +difficulty: medium +leetcode_id: 565 +leetcode_url: https://leetcode.com/problems/array-nesting/ +categories: + - arrays +patterns: + - dfs + +description: | + You are given an integer array `nums` of length `n` where `nums` is a permutation of the numbers in the range `[0, n - 1]`. + + You should build a set `s[k] = {nums[k], nums[nums[k]], nums[nums[nums[k]]], ... }` subjected to the following rule: + + - The first element in `s[k]` starts with the selection of the element `nums[k]` of `index = k`. + - The next element in `s[k]` should be `nums[nums[k]]`, and then `nums[nums[nums[k]]]`, and so on. + - We stop adding right before a duplicate element occurs in `s[k]`. + + Return *the longest length of a set* `s[k]`. + +constraints: | + - `1 <= nums.length <= 10^5` + - `0 <= nums[i] < nums.length` + - All the values of `nums` are **unique**. + +examples: + - input: "nums = [5,4,0,3,1,6,2]" + output: "4" + explanation: "nums[0] = 5, nums[1] = 4, nums[2] = 0, nums[3] = 3, nums[4] = 1, nums[5] = 6, nums[6] = 2. One of the longest sets s[k]: s[0] = {nums[0], nums[5], nums[6], nums[2]} = {5, 6, 2, 0}." + - input: "nums = [0,1,2]" + output: "1" + explanation: "Each element points to itself (nums[0]=0, nums[1]=1, nums[2]=2), so each set contains only one element." + +explanation: + intuition: | + Imagine the array as a directed graph where each index `i` has an edge pointing to index `nums[i]`. Since `nums` is a permutation of `[0, n-1]`, every index appears exactly once as a value. This means: + + - Every node has exactly one outgoing edge (to `nums[i]`) + - Every node has exactly one incoming edge (from the index where it appears as a value) + + This structure guarantees that the graph consists of **disjoint cycles**. Think of it like a treasure hunt where each clue points to the next location, and eventually you end up back where you started. + + The problem asks us to find the longest cycle. The key insight is that if two indices belong to the same cycle, starting from either one will trace the same cycle. So once we've explored a cycle starting from any index, we never need to revisit any index in that cycle again. + + This is why marking visited indices works: we're essentially finding all disjoint cycles and tracking the longest one. + + approach: | + We solve this using **Cycle Detection with Visited Tracking**: + + **Step 1: Initialise tracking variables** + + - `max_length`: Set to `0` to track the longest cycle found + - `visited`: A set (or array) to mark indices we've already explored + +   + + **Step 2: Iterate through each starting index** + + - For each index `i` from `0` to `n-1`: + - If `i` is already visited, skip it (it's part of a cycle we've already counted) + - If not visited, start exploring the cycle from `i` + +   + + **Step 3: Explore each cycle** + + - Starting from index `i`, follow the chain: `i -> nums[i] -> nums[nums[i]] -> ...` + - Count each step and mark each visited index + - Stop when we return to an already-visited index (the cycle is complete) + - Update `max_length` if this cycle is longer + +   + + **Step 4: Return the result** + + - After checking all indices, return `max_length` + +   + + The key optimisation is that we only visit each index once across all iterations. Once an index is marked visited, we skip it in future iterations because we already know its cycle length. + + common_pitfalls: + - title: Recomputing Cycles for Each Index + description: | + A naive approach might trace the full cycle for every starting index, even if that index was already visited from a previous starting point. + + For example, if indices 0, 5, 6, 2 form a cycle, starting from index 5 would trace the same cycle as starting from 0. Without visited tracking, you'd count this cycle 4 times. + + This leads to **O(n^2) time complexity** in the worst case. With `n = 10^5`, this could mean 10 billion operations. + wrong_approach: "Trace full cycle for every starting index" + correct_approach: "Mark visited indices and skip them in future iterations" + + - title: Using Extra Space for Cycle Storage + description: | + You don't need to actually store the set `s[k]` for each starting index. We only care about the *length* of each cycle, not its contents. + + Storing all elements would use O(n) space per cycle exploration. Instead, just count steps while following the chain. + wrong_approach: "Store each cycle's elements in a list or set" + correct_approach: "Only count the cycle length, don't store elements" + + - title: Not Recognising the Permutation Property + description: | + The constraint that `nums` is a permutation is crucial. It guarantees: + + - No index points to an out-of-bounds location + - Every index is pointed to by exactly one other index + - The graph structure is guaranteed to be disjoint cycles + + If you miss this, you might add unnecessary bounds checking or fail to recognise why the visited optimisation works correctly. + + key_takeaways: + - "**Cycle detection pattern**: When array values are indices into the same array, think of it as a graph where `nums[i]` defines edges" + - "**Permutation implies disjoint cycles**: In a permutation, following indices always forms closed cycles with no branches" + - "**Visit once optimisation**: When finding the longest cycle among disjoint cycles, each element only needs to be visited once total" + - "**Similar problems**: This pattern appears in Find the Duplicate Number, Linked List Cycle, and problems involving functional graphs" + + time_complexity: "O(n). Each index is visited at most once across all cycle explorations, giving linear time." + space_complexity: "O(n). We use a visited set/array of size `n` to track explored indices. Can be reduced to O(1) by modifying the input array in-place." + +solutions: + - approach_name: Cycle Detection with Visited Set + is_optimal: true + code: | + def array_nesting(nums: list[int]) -> int: + n = len(nums) + visited = set() + max_length = 0 + + for i in range(n): + # Skip if this index is already part of a known cycle + if i in visited: + continue + + # Explore the cycle starting from index i + current = i + cycle_length = 0 + + while current not in visited: + # Mark this index as visited + visited.add(current) + # Move to the next index in the chain + current = nums[current] + cycle_length += 1 + + # Update the maximum cycle length found + max_length = max(max_length, cycle_length) + + return max_length + explanation: | + **Time Complexity:** O(n) — Each index is visited exactly once across all iterations. + + **Space Complexity:** O(n) — The visited set stores up to n indices. + + We iterate through each index and explore its cycle if not yet visited. The visited set ensures we never process the same index twice, making the total work linear regardless of cycle structure. + + - approach_name: In-Place Marking + is_optimal: true + code: | + def array_nesting(nums: list[int]) -> int: + max_length = 0 + n = len(nums) + + for i in range(n): + # Skip if already visited (marked with -1) + if nums[i] == -1: + continue + + # Explore the cycle starting from index i + current = i + cycle_length = 0 + + while nums[current] != -1: + # Save the next index before marking + next_idx = nums[current] + # Mark as visited by setting to -1 + nums[current] = -1 + # Move to next index + current = next_idx + cycle_length += 1 + + max_length = max(max_length, cycle_length) + + return max_length + explanation: | + **Time Complexity:** O(n) — Each index is visited exactly once. + + **Space Complexity:** O(1) — We modify the input array in-place instead of using a separate visited structure. + + This optimisation marks visited indices by setting them to `-1` (an invalid value). This eliminates the need for a separate visited set but modifies the input array. Use this when space is critical and input modification is acceptable. + + - approach_name: Brute Force + is_optimal: false + code: | + def array_nesting(nums: list[int]) -> int: + max_length = 0 + n = len(nums) + + for i in range(n): + # Start a new cycle exploration from index i + seen = set() + current = i + + # Follow the chain until we see a repeat + while current not in seen: + seen.add(current) + current = nums[current] + + # Update max with this cycle's length + max_length = max(max_length, len(seen)) + + return max_length + explanation: | + **Time Complexity:** O(n^2) — In the worst case, we might trace O(n) elements for each of n starting indices. + + **Space Complexity:** O(n) — The seen set for each iteration can grow to n elements. + + This approach creates a fresh `seen` set for each starting index, potentially retracing the same cycles multiple times. While correct, it's inefficient because it doesn't share visited information across iterations. For example, if all elements form one big cycle, we'd trace all n elements n times. diff --git a/backend/data/questions/array-of-doubled-pairs.yaml b/backend/data/questions/array-of-doubled-pairs.yaml new file mode 100644 index 0000000..691df7a --- /dev/null +++ b/backend/data/questions/array-of-doubled-pairs.yaml @@ -0,0 +1,172 @@ +title: Array of Doubled Pairs +slug: array-of-doubled-pairs +difficulty: medium +leetcode_id: 954 +leetcode_url: https://leetcode.com/problems/array-of-doubled-pairs/ +categories: + - arrays + - hash-tables + - sorting +patterns: + - greedy + +description: | + Given an integer array of even length `arr`, return `true` *if it is possible to reorder* `arr` *such that* `arr[2 * i + 1] = 2 * arr[2 * i]` *for every* `0 <= i < len(arr) / 2`, *or* `false` *otherwise*. + + In other words, you need to check if the array can be rearranged into pairs where one element is exactly double the other. + +constraints: | + - `2 <= arr.length <= 3 * 10^4` + - `arr.length` is even + - `-10^5 <= arr[i] <= 10^5` + +examples: + - input: "arr = [3,1,3,6]" + output: "false" + explanation: "No valid pairing exists. The pairs would need to be (3,6) and (1,?), but there's no 2 in the array." + - input: "arr = [2,1,2,6]" + output: "false" + explanation: "We could pair (1,2) but then we'd have [2,6] left, and 6 != 2*2." + - input: "arr = [4,-2,2,-4]" + output: "true" + explanation: "We can form pairs (-2,-4) and (2,4) to create [-2,-4,2,4] or [2,4,-2,-4]." + +explanation: + intuition: | + Think of this problem like matching socks — except each "sock" must find its exact double (or half). + + The key insight is that **order matters when pairing**. If you try to pair numbers randomly, you might "use up" a number that was needed elsewhere. For example, with `[1, 2, 4, 8]`, if you greedily pair `2` with `4`, you've broken the valid pairing of `(1,2)` and `(4,8)`. + + The solution is to **process numbers by absolute value from smallest to largest**. Why? Because a smaller number can only be the "base" of a pair (the number that gets doubled), never the "double" of something even smaller that we haven't seen yet. By handling small numbers first, we ensure each number finds its correct partner. + + For negative numbers, the logic reverses: `-4` doubled is `-8`, so we still process by absolute value but the "double" is actually more negative. + + approach: | + We solve this using a **Greedy + Hash Map Approach**: + + **Step 1: Count occurrences of each number** + + - Use a hash map (Counter) to count how many times each number appears + - This allows O(1) lookup when searching for doubles + +   + + **Step 2: Sort numbers by absolute value** + + - Process numbers from smallest absolute value to largest + - This ensures we always try to pair a number with its double (not half) + - For negatives like `-2`, its double `-4` has larger absolute value, so `-2` is processed first + +   + + **Step 3: Greedily pair each number with its double** + + - For each number `x` in sorted order: + - If `count[x] == 0`, skip (already used as someone's double) + - If `count[2*x] == 0`, return `false` (no double available) + - Otherwise, decrement both `count[x]` and `count[2*x]` + +   + + **Step 4: Return the result** + + - If we successfully pair all numbers, return `true` + + common_pitfalls: + - title: Processing in Wrong Order + description: | + If you process numbers in regular sorted order (not by absolute value), negative numbers break the logic. + + For example, with `[-4, -2, 2, 4]`: + - Regular sort: `[-4, -2, 2, 4]` + - Processing `-4` first, its double is `-8` which doesn't exist! + + But `-4` should be the *double* of `-2`, not the base. Sorting by absolute value gives `[-2, 2, -4, 4]`, so `-2` correctly pairs with `-4`. + wrong_approach: "Sort by value" + correct_approach: "Sort by absolute value" + + - title: Not Handling Zero Correctly + description: | + Zero is its own double (`0 * 2 = 0`). This means zeros must appear in pairs. + + If you have an odd count of zeros, it's impossible to form valid pairs. Your counting approach handles this naturally — pairing `0` with `2*0 = 0` decrements the count by 2 each time. + wrong_approach: "Special-case zero handling" + correct_approach: "Let the general algorithm handle 0 paired with 0" + + - title: Modifying Array While Iterating + description: | + Some approaches try to remove elements from the array as they're paired. This leads to index shifting bugs and O(n) removal costs. + + Using a count map and decrementing counts is cleaner and more efficient. + wrong_approach: "Remove elements from array" + correct_approach: "Use count map and decrement" + + key_takeaways: + - "**Process by absolute value** when dealing with doubling/halving that involves negatives" + - "**Greedy works when ordering eliminates conflicts** — smallest-first ensures each number can only be a base, never a double" + - "**Count maps** are powerful for pairing problems — O(1) lookup beats O(n) search" + - "This pattern extends to problems like *Array of K Pairs* or finding pairs with any fixed ratio" + + time_complexity: "O(n log n). Sorting dominates; the pairing loop is O(n) with O(1) hash map operations." + space_complexity: "O(n). The count map stores at most `n` unique elements." + +solutions: + - approach_name: Greedy with Sorting by Absolute Value + is_optimal: true + code: | + from collections import Counter + + def can_reorder_doubled(arr: list[int]) -> bool: + # Count occurrences of each number + count = Counter(arr) + + # Sort by absolute value - process smaller magnitudes first + for x in sorted(arr, key=abs): + # Skip if this number was already used as someone's double + if count[x] == 0: + continue + + # Check if the double exists + if count[2 * x] == 0: + return False # No valid pair for x + + # Pair x with 2*x: decrement both counts + count[x] -= 1 + count[2 * x] -= 1 + + return True + explanation: | + **Time Complexity:** O(n log n) — Dominated by sorting. The iteration and hash map operations are O(n). + + **Space Complexity:** O(n) — The Counter stores up to n elements. + + By sorting by absolute value, we ensure that when we process a number `x`, its double `2*x` hasn't been incorrectly paired with something else. This greedy choice is locally and globally optimal. + + - approach_name: Brute Force with Backtracking + is_optimal: false + code: | + def can_reorder_doubled(arr: list[int]) -> bool: + def backtrack(remaining: list[int]) -> bool: + if not remaining: + return True # All numbers paired successfully + + # Try to pair the first element with its double + x = remaining[0] + target = 2 * x + + for i in range(1, len(remaining)): + if remaining[i] == target: + # Found a valid pair, recurse with remaining elements + new_remaining = remaining[1:i] + remaining[i+1:] + if backtrack(new_remaining): + return True + + return False # No valid pairing found + + return backtrack(arr) + explanation: | + **Time Complexity:** O(n! / 2^(n/2)) — Exponential due to trying all possible pairings. + + **Space Complexity:** O(n) — Recursion depth and list copies. + + This approach tries every possible way to pair elements. While correct, it's far too slow for the constraint `n <= 3 * 10^4`. Included to illustrate why the greedy approach is necessary. diff --git a/backend/data/questions/array-partition.yaml b/backend/data/questions/array-partition.yaml new file mode 100644 index 0000000..2c6cd09 --- /dev/null +++ b/backend/data/questions/array-partition.yaml @@ -0,0 +1,169 @@ +title: Array Partition +slug: array-partition +difficulty: easy +leetcode_id: 561 +leetcode_url: https://leetcode.com/problems/array-partition/ +categories: + - arrays + - sorting +patterns: + - greedy + +description: | + Given an integer array `nums` of `2n` integers, group these integers into `n` pairs `(a1, b1), (a2, b2), ..., (an, bn)` such that the sum of `min(ai, bi)` for all `i` is **maximised**. + + Return *the maximised sum*. + +constraints: | + - `1 <= n <= 10^4` + - `nums.length == 2 * n` + - `-10^4 <= nums[i] <= 10^4` + +examples: + - input: "nums = [1,4,3,2]" + output: "4" + explanation: "All possible pairings are: (1,4),(2,3) → 1+2=3; (1,3),(2,4) → 1+2=3; (1,2),(3,4) → 1+3=4. The maximum possible sum is 4." + - input: "nums = [6,2,6,5,1,2]" + output: "9" + explanation: "The optimal pairing is (1,2), (2,5), (6,6). min(1,2) + min(2,5) + min(6,6) = 1 + 2 + 6 = 9." + +explanation: + intuition: | + Imagine you have pairs of contestants in a competition where only the weaker member of each pair scores points. Your goal is to maximise the total points scored. + + The key insight is that **pairing wastes the larger element** — whenever you pair two numbers, the larger one contributes nothing to the sum. So the question becomes: how do we minimise this "waste"? + + Think of it like this: if you pair a very small number with a very large number, you're wasting the large number entirely. But if you pair numbers that are close in value, you waste less. + + This leads to the greedy strategy: **sort the array and pair adjacent elements**. When you pair the 1st and 2nd smallest, the 3rd and 4th smallest, and so on, you minimise the gap between paired elements. The smaller element in each pair (which we keep) is as large as possible given what's available. + + After sorting `[1, 2, 3, 4]`, pairing `(1,2)` and `(3,4)` gives us `1 + 3 = 4`. The "wasted" values are `2` and `4`, which are the minimum we could waste. + + approach: | + We solve this using a **Sort and Sum Alternates** approach: + + **Step 1: Sort the array** + + - Sort `nums` in ascending order + - After sorting, adjacent elements are closest in value + +   + + **Step 2: Sum every other element** + + - Take elements at indices `0, 2, 4, ...` (the smaller element of each pair) + - These are the elements that will contribute to our sum + - In each pair `(nums[0], nums[1])`, `(nums[2], nums[3])`, etc., the first element is the minimum + +   + + **Step 3: Return the sum** + + - The sum of elements at even indices is the maximum possible sum of minimums + +   + + This greedy approach works because sorting ensures we pair elements with minimal "waste" — the larger element in each pair is only slightly larger than the smaller one. + + common_pitfalls: + - title: Trying All Pairings + description: | + A brute force approach would try all possible ways to partition `2n` elements into `n` pairs. The number of such partitions is `(2n)! / (2^n * n!)`, which grows extremely fast. + + For `n = 10^4`, this is astronomically large and completely infeasible. Even for small inputs like `n = 10`, there are over 650 million possible pairings. + + The key insight is recognising that we don't need to try all pairings — the greedy approach of pairing sorted adjacent elements is provably optimal. + wrong_approach: "Enumerate all possible pairings" + correct_approach: "Sort and pair adjacent elements" + + - title: Pairing Extremes Together + description: | + An intuitive but wrong approach might be to pair the smallest with the largest, second smallest with second largest, etc. This feels like it might "balance" things out. + + For `[1, 2, 3, 4]`, pairing `(1,4)` and `(2,3)` gives `min(1,4) + min(2,3) = 1 + 2 = 3`. But pairing adjacents `(1,2)` and `(3,4)` gives `1 + 3 = 4`. + + The problem is that pairing extremes maximises the waste — the large elements contribute nothing, and you're "wasting" more value. + wrong_approach: "Pair smallest with largest" + correct_approach: "Pair adjacent elements after sorting" + + - title: Off-by-One with Indices + description: | + When summing elements at even indices, ensure you're using `range(0, len(nums), 2)` or equivalent. Starting at index 1 would sum the wrong elements — the maximums of each pair instead of the minimums. + + Alternatively, you can sum with `sum(sorted(nums)[::2])` using Python's slice notation. + + key_takeaways: + - "**Greedy insight**: When forced to discard one element per pair, minimise loss by keeping discarded values as small as possible relative to what's kept" + - "**Sorting unlocks structure**: Many pairing/partitioning problems become tractable after sorting reveals the optimal ordering" + - "**Adjacent pairing pattern**: When elements must be grouped in pairs and order matters, sorting followed by adjacent grouping is a common optimal strategy" + - "**Avoid enumeration**: Recognise when a greedy or mathematical insight eliminates the need for exhaustive search" + + time_complexity: "O(n log n). The dominant operation is sorting the array of `2n` elements." + space_complexity: "O(1) to O(n). Depends on the sorting algorithm — in-place sorts like heapsort use O(1), while Python's Timsort uses O(n) auxiliary space." + +solutions: + - approach_name: Sort and Sum Alternates + is_optimal: true + code: | + def array_pair_sum(nums: list[int]) -> int: + # Sort to pair adjacent elements (minimises waste) + nums.sort() + + # Sum elements at even indices (smaller of each pair) + total = 0 + for i in range(0, len(nums), 2): + total += nums[i] + + return total + explanation: | + **Time Complexity:** O(n log n) — Dominated by the sorting step. + + **Space Complexity:** O(1) auxiliary — We sort in-place and use a single variable for the sum. + + After sorting, elements at even indices are the minimum of each adjacent pair. Summing these gives the maximum possible sum. + + - approach_name: Pythonic One-Liner + is_optimal: true + code: | + def array_pair_sum(nums: list[int]) -> int: + # Sort, then sum every other element starting from index 0 + return sum(sorted(nums)[::2]) + explanation: | + **Time Complexity:** O(n log n) — Sorting dominates. + + **Space Complexity:** O(n) — `sorted()` creates a new list. + + This concise version uses Python's slice notation `[::2]` to select every other element starting from index 0. Functionally identical to the explicit loop approach. + + - approach_name: Counting Sort (Bounded Range) + is_optimal: false + code: | + def array_pair_sum(nums: list[int]) -> int: + # Use counting sort for bounded integer range [-10^4, 10^4] + offset = 10000 # Shift to handle negative indices + count = [0] * 20001 + + # Count occurrences of each value + for num in nums: + count[num + offset] += 1 + + total = 0 + need_pair = True # Alternates: True = add to sum, False = skip + + # Iterate through possible values in sorted order + for val in range(-10000, 10001): + while count[val + offset] > 0: + if need_pair: + total += val # This element is the min of its pair + need_pair = not need_pair + count[val + offset] -= 1 + + return total + explanation: | + **Time Complexity:** O(n + k) where k = 20001 (the value range). Effectively O(n) for this problem. + + **Space Complexity:** O(k) = O(20001) = O(1) since k is a constant. + + For the specific constraints of this problem (`-10^4 <= nums[i] <= 10^4`), counting sort achieves O(n) time. We iterate through the count array in order, alternating between adding to the sum (for even-positioned elements) and skipping (for odd-positioned elements). + + While asymptotically faster, this approach is more complex and the constant factors make it slower than comparison sort for typical input sizes. diff --git a/backend/data/questions/array-with-elements-not-equal-to-average-of-neighbors.yaml b/backend/data/questions/array-with-elements-not-equal-to-average-of-neighbors.yaml new file mode 100644 index 0000000..ab23645 --- /dev/null +++ b/backend/data/questions/array-with-elements-not-equal-to-average-of-neighbors.yaml @@ -0,0 +1,200 @@ +title: Array With Elements Not Equal to Average of Neighbors +slug: array-with-elements-not-equal-to-average-of-neighbors +difficulty: medium +leetcode_id: 1968 +leetcode_url: https://leetcode.com/problems/array-with-elements-not-equal-to-average-of-neighbors/ +categories: + - arrays + - sorting +patterns: + - greedy + +description: | + You are given a **0-indexed** array `nums` of **distinct** integers. You want to rearrange the elements in the array such that every element in the rearranged array is **not** equal to the **average** of its neighbors. + + More formally, the rearranged array should have the property such that for every `i` in the range `1 <= i < nums.length - 1`, `(nums[i-1] + nums[i+1]) / 2` is **not** equal to `nums[i]`. + + Return *any rearrangement of* `nums` *that meets the requirements*. + +constraints: | + - `3 <= nums.length <= 10^5` + - `0 <= nums[i] <= 10^5` + - All elements in `nums` are **distinct** + +examples: + - input: "nums = [1,2,3,4,5]" + output: "[1,2,4,5,3]" + explanation: "When i=1, nums[i] = 2, and the average of its neighbors is (1+4) / 2 = 2.5. When i=2, nums[i] = 4, and the average of its neighbors is (2+5) / 2 = 3.5. When i=3, nums[i] = 5, and the average of its neighbors is (4+3) / 2 = 3.5." + - input: "nums = [6,2,0,9,7]" + output: "[9,7,6,2,0]" + explanation: "When i=1, nums[i] = 7, and the average of its neighbors is (9+6) / 2 = 7.5. When i=2, nums[i] = 6, and the average of its neighbors is (7+2) / 2 = 4.5. When i=3, nums[i] = 2, and the average of its neighbors is (6+0) / 2 = 3." + +explanation: + intuition: | + The key insight is recognising when an element equals the average of its neighbours. If `nums[i] = (nums[i-1] + nums[i+1]) / 2`, then rearranging gives us `2 * nums[i] = nums[i-1] + nums[i+1]`. This only happens when the middle element is **exactly between** its neighbours numerically. + + Think of it like arranging people by height in a line: you want to avoid having anyone stand between two others where they're exactly the average height of those two. The simplest way to guarantee this? Create a **zigzag pattern** — make sure each person is either taller than both neighbours or shorter than both neighbours. + + This is called a **wiggle arrangement**: `nums[0] < nums[1] > nums[2] < nums[3] > nums[4] ...` (or the opposite pattern). When every element is a local maximum or local minimum, it can never be the average of its neighbours because it's never *between* them — it's always above or below both. + + The elegant solution: sort the array, then **interleave** elements from the smaller half and larger half. Placing small and large elements alternately guarantees the zigzag pattern. + + approach: | + We solve this using a **Sort and Interleave** approach: + + **Step 1: Sort the array** + + - Sort `nums` in ascending order + - This separates the values into a "small half" and a "large half" + +   + + **Step 2: Split into two halves** + + - Small half: elements from index `0` to `(n-1)/2` (ceiling division) + - Large half: elements from index `(n+1)/2` to end + - For odd-length arrays, the small half gets the extra element + +   + + **Step 3: Interleave the halves** + + - Create the result by alternating: `small[0], large[0], small[1], large[1], ...` + - Place elements from the small half at even indices (0, 2, 4, ...) + - Place elements from the large half at odd indices (1, 3, 5, ...) + +   + + **Step 4: Return the result** + + - The interleaved array guarantees no element equals the average of its neighbours + +   + + **Why does interleaving work?** + + After sorting and splitting, every element in the large half is strictly greater than every element in the small half (since all elements are distinct). When we interleave: + - Each "small" element has "large" neighbours (both bigger) + - Each "large" element has "small" neighbours (both smaller) + + An element can only be the average of its neighbours if it's between them. But in our arrangement, every element is either greater than both neighbours or less than both — never between. + + common_pitfalls: + - title: Random Shuffling + description: | + A tempting approach is to randomly shuffle until you find a valid arrangement. While this might work for small inputs, it has no guarantee of termination and could take extremely long for larger arrays. + + With `n = 10^5` elements and many possible arrangements, random shuffling is not a reliable algorithm. + wrong_approach: "Randomly shuffle and check validity" + correct_approach: "Use deterministic sort + interleave" + + - title: Missing the Wiggle Pattern Insight + description: | + Without recognising that a wiggle pattern (local maxima/minima alternating) solves the problem, you might try complex approaches like checking each position individually or using backtracking. + + The key mathematical insight: `nums[i] = avg(nums[i-1], nums[i+1])` means `nums[i]` is exactly between its neighbours. A wiggle pattern ensures every element is above or below both neighbours — never between. + wrong_approach: "Try to place elements one by one with backtracking" + correct_approach: "Ensure wiggle pattern via sort + interleave" + + - title: Incorrect Half Split + description: | + When splitting the sorted array, be careful with odd-length arrays. If you split incorrectly, the interleaving won't work properly. + + For `n = 5`: small half should be indices 0, 1, 2 (3 elements) and large half should be indices 3, 4 (2 elements). This ensures we have enough small elements for even positions. + wrong_approach: "Always split at n/2 without considering odd lengths" + correct_approach: "Use ceiling division for small half size" + + key_takeaways: + - "**Wiggle sort pattern**: Arranging elements so each is a local max or min (alternating) prevents any element from being the average of neighbours" + - "**Sort + interleave technique**: Sorting and interleaving halves is a powerful way to create alternating high-low patterns" + - "**Mathematical insight**: `nums[i] = (a + b) / 2` only when `nums[i]` is exactly between `a` and `b` — wiggle patterns avoid this" + - "**Related problems**: Wiggle Sort, Wiggle Sort II, and Rearrange Array Elements by Sign use similar interleaving ideas" + + time_complexity: "O(n log n). Dominated by the sorting step. The interleaving is O(n)." + space_complexity: "O(n). We create a new result array to store the interleaved elements." + +solutions: + - approach_name: Sort and Interleave + is_optimal: true + code: | + def rearrange_array(nums: list[int]) -> list[int]: + # Sort to separate small and large values + nums.sort() + n = len(nums) + result = [0] * n + + # Split into two halves + # Small half goes to even indices, large half to odd indices + small_idx = 0 + large_idx = (n + 1) // 2 # Start of large half + + for i in range(n): + if i % 2 == 0: + # Even index: place from small half + result[i] = nums[small_idx] + small_idx += 1 + else: + # Odd index: place from large half + result[i] = nums[large_idx] + large_idx += 1 + + return result + explanation: | + **Time Complexity:** O(n log n) — Sorting dominates; interleaving is O(n). + + **Space Complexity:** O(n) — We allocate a result array of size n. + + By sorting and interleaving, we guarantee that elements at even indices (from the smaller half) are always less than their odd-index neighbours (from the larger half). This creates a wiggle pattern where no element can be the average of its neighbours. + + - approach_name: In-Place Swap (Greedy) + is_optimal: false + code: | + def rearrange_array(nums: list[int]) -> list[int]: + n = len(nums) + + # We want: nums[0] < nums[1] > nums[2] < nums[3] > ... + for i in range(1, n): + # At odd indices, we want a local max + if i % 2 == 1: + if nums[i] < nums[i - 1]: + nums[i], nums[i - 1] = nums[i - 1], nums[i] + # At even indices (except 0), we want a local min + else: + if nums[i] > nums[i - 1]: + nums[i], nums[i - 1] = nums[i - 1], nums[i] + + return nums + explanation: | + **Time Complexity:** O(n) — Single pass through the array. + + **Space Complexity:** O(1) — Swaps are done in-place. + + This greedy approach iterates once, swapping adjacent elements whenever they violate the wiggle pattern. At odd indices we want a local maximum (greater than previous), at even indices we want a local minimum (less than previous). While this is O(n) time, it modifies the input array. The sort + interleave approach is clearer and avoids mutation. + + - approach_name: Sort Descending and Interleave + is_optimal: false + code: | + def rearrange_array(nums: list[int]) -> list[int]: + # Sort in descending order + nums.sort(reverse=True) + n = len(nums) + result = [] + + left = 0 + right = n - 1 + + # Alternate between largest remaining and smallest remaining + while left <= right: + result.append(nums[left]) # Add large element + left += 1 + if left <= right: + result.append(nums[right]) # Add small element + right -= 1 + + return result + explanation: | + **Time Complexity:** O(n log n) — Sorting dominates. + + **Space Complexity:** O(n) — Result array storage. + + This variation sorts descending and alternates picking from the front (large) and back (small). It achieves the same wiggle effect with a different interleaving pattern: large-small-large-small instead of small-large-small-large. Both are valid solutions. diff --git a/backend/data/questions/as-far-from-land-as-possible.yaml b/backend/data/questions/as-far-from-land-as-possible.yaml new file mode 100644 index 0000000..fb8bc5c --- /dev/null +++ b/backend/data/questions/as-far-from-land-as-possible.yaml @@ -0,0 +1,201 @@ +title: As Far from Land as Possible +slug: as-far-from-land-as-possible +difficulty: medium +leetcode_id: 1162 +leetcode_url: https://leetcode.com/problems/as-far-from-land-as-possible/ +categories: + - arrays + - graphs +patterns: + - bfs + - matrix-traversal + +description: | + Given an `n x n` `grid` containing only values `0` and `1`, where `0` represents water and `1` represents land, find a water cell such that its distance to the nearest land cell is maximized, and return the distance. + + If no land or water exists in the grid, return `-1`. + + The distance used in this problem is the *Manhattan distance*: the distance between two cells `(x0, y0)` and `(x1, y1)` is `|x0 - x1| + |y0 - y1|`. + +constraints: | + - `n == grid.length` + - `n == grid[i].length` + - `1 <= n <= 100` + - `grid[i][j]` is `0` or `1` + +examples: + - input: "grid = [[1,0,1],[0,0,0],[1,0,1]]" + output: "2" + explanation: "The cell (1, 1) is as far as possible from all the land with distance 2." + - input: "grid = [[1,0,0],[0,0,0],[0,0,0]]" + output: "4" + explanation: "The cell (2, 2) is as far as possible from all the land with distance 4." + +explanation: + intuition: | + Imagine you're standing on each piece of land simultaneously and start walking outward in all four directions at the same pace. As you expand, you mark each water cell with the number of steps it took to reach it from the nearest land. + + The key insight is that instead of calculating the distance from each water cell to its nearest land (which would require checking every water cell against every land cell), we **flip the perspective**: start from all land cells at once and expand outward. + + Think of it like dropping ink into water at every land position simultaneously. The ink spreads outward one step at a time. The last water cell to be reached by any ink droplet is the one farthest from all land — and that's our answer. + + This is a classic **multi-source BFS** problem. By starting BFS from all land cells together (distance 0), we guarantee that each water cell gets marked with its distance to the *nearest* land cell. The maximum distance we record is our answer. + + approach: | + We solve this using **Multi-Source BFS**: + + **Step 1: Initialise the queue with all land cells** + + - Scan the entire grid to find all cells with value `1` (land) + - Add each land cell to our BFS queue with distance `0` + - If there are no land cells or no water cells, return `-1` immediately + +   + + **Step 2: Perform BFS expansion** + + - Process cells level by level (this ensures we explore all cells at distance `d` before moving to distance `d + 1`) + - For each cell, explore its four neighbours (up, down, left, right) + - If a neighbour is water (`0`) and hasn't been visited, mark it with the current distance + 1 and add it to the queue + - We can mark visited cells by changing their value in the grid (e.g., setting to the distance or a special value) + +   + + **Step 3: Track the maximum distance** + + - Each time we reach a new water cell, we record its distance + - The last level of BFS gives us the farthest water cells + - Return the maximum distance found + +   + + This approach works because BFS explores in order of increasing distance. Starting from all land simultaneously means we find the shortest path from each water cell to its nearest land, and we want the maximum of these shortest paths. + + common_pitfalls: + - title: Single-Source BFS from Each Land Cell + description: | + A naive approach might run BFS from each land cell separately and track distances to all water cells. This results in **O(n^4)** complexity for an `n x n` grid — with `n = 100`, that's 100 million operations. + + Multi-source BFS runs once from all land cells together, giving us **O(n^2)** time, which is optimal since we must visit each cell at least once. + wrong_approach: "Run separate BFS from each land cell" + correct_approach: "Start BFS from all land cells simultaneously" + + - title: Forgetting Edge Cases + description: | + The problem requires returning `-1` in two scenarios: + - The grid contains only land (no water cells to measure distance to) + - The grid contains only water (no land cells to measure distance from) + + Check these cases before starting BFS by counting land cells. If the count is `0` or equals `n * n`, return `-1`. + wrong_approach: "Assume grid always has both land and water" + correct_approach: "Check for all-land or all-water grids first" + + - title: Not Using Manhattan Distance Correctly + description: | + BFS on a grid naturally computes Manhattan distance when you only move in four cardinal directions (up, down, left, right). Each step adds 1 to the distance. + + If you accidentally allow diagonal moves, you'd be computing Chebyshev distance instead. Stick to the four-directional movement for this problem. + wrong_approach: "Include diagonal neighbours in BFS" + correct_approach: "Only explore up, down, left, right neighbours" + + key_takeaways: + - "**Multi-source BFS**: When you need the shortest distance from any of multiple sources, start BFS from all sources at once" + - "**Flip the perspective**: Instead of 'from each water, find nearest land', think 'from all land, expand to all water' — this changes O(n^4) to O(n^2)" + - "**Grid BFS pattern**: Use a queue, track visited cells (by modifying the grid or using a set), and explore four-directionally" + - "**Related problems**: This pattern appears in problems like 'Walls and Gates', 'Rotting Oranges', and '01 Matrix'" + + time_complexity: "O(n^2). Each cell in the `n x n` grid is visited at most once during BFS." + space_complexity: "O(n^2). In the worst case, the queue can hold all cells (when land is only at one corner)." + +solutions: + - approach_name: Multi-Source BFS + is_optimal: true + code: | + from collections import deque + + def max_distance(grid: list[list[int]]) -> int: + n = len(grid) + queue = deque() + + # Add all land cells to the queue as starting points + for i in range(n): + for j in range(n): + if grid[i][j] == 1: + queue.append((i, j, 0)) # (row, col, distance) + + # Edge case: no land or no water + if len(queue) == 0 or len(queue) == n * n: + return -1 + + # Four directions: up, down, left, right + directions = [(-1, 0), (1, 0), (0, -1), (0, 1)] + max_dist = -1 + + while queue: + row, col, dist = queue.popleft() + + for dr, dc in directions: + new_row, new_col = row + dr, col + dc + + # Check bounds and if it's unvisited water + if 0 <= new_row < n and 0 <= new_col < n and grid[new_row][new_col] == 0: + # Mark as visited by setting to non-zero + grid[new_row][new_col] = dist + 1 + max_dist = max(max_dist, dist + 1) + queue.append((new_row, new_col, dist + 1)) + + return max_dist + explanation: | + **Time Complexity:** O(n^2) — We visit each cell at most once. + + **Space Complexity:** O(n^2) — The queue can hold up to n^2 cells in the worst case. + + We start BFS from all land cells simultaneously. Each water cell gets marked with its distance to the nearest land. The maximum distance encountered is our answer. We modify the grid in-place to track visited cells. + + - approach_name: Dynamic Programming + is_optimal: false + code: | + def max_distance(grid: list[list[int]]) -> int: + n = len(grid) + INF = n * 2 # Maximum possible Manhattan distance + 1 + + # dist[i][j] = distance to nearest land + dist = [[0 if grid[i][j] == 1 else INF for j in range(n)] for i in range(n)] + + # First pass: top-left to bottom-right + # Consider paths coming from top or left + for i in range(n): + for j in range(n): + if grid[i][j] == 1: + continue # Land cells have distance 0 + if i > 0: + dist[i][j] = min(dist[i][j], dist[i-1][j] + 1) + if j > 0: + dist[i][j] = min(dist[i][j], dist[i][j-1] + 1) + + # Second pass: bottom-right to top-left + # Consider paths coming from bottom or right + for i in range(n - 1, -1, -1): + for j in range(n - 1, -1, -1): + if grid[i][j] == 1: + continue + if i < n - 1: + dist[i][j] = min(dist[i][j], dist[i+1][j] + 1) + if j < n - 1: + dist[i][j] = min(dist[i][j], dist[i][j+1] + 1) + + # Find maximum distance among water cells + max_dist = -1 + for i in range(n): + for j in range(n): + if grid[i][j] == 0: + max_dist = max(max_dist, dist[i][j]) + + # If no water or distance is still INF (no land), return -1 + return max_dist if max_dist != INF and max_dist != -1 else -1 + explanation: | + **Time Complexity:** O(n^2) — Two passes through the grid. + + **Space Complexity:** O(n^2) — We store a distance matrix. + + This DP approach computes the distance to the nearest land using two passes. The first pass propagates distances from top-left, the second from bottom-right. Together, they cover all four directions. While equally efficient as BFS, this approach is slightly less intuitive but avoids queue overhead. diff --git a/backend/data/questions/assign-cookies.yaml b/backend/data/questions/assign-cookies.yaml new file mode 100644 index 0000000..9f054ed --- /dev/null +++ b/backend/data/questions/assign-cookies.yaml @@ -0,0 +1,174 @@ +title: Assign Cookies +slug: assign-cookies +difficulty: easy +leetcode_id: 455 +leetcode_url: https://leetcode.com/problems/assign-cookies/ +categories: + - arrays + - sorting + - two-pointers +patterns: + - greedy + - two-pointers + +description: | + Assume you are an awesome parent and want to give your children some cookies. But, you should give each child at most one cookie. + + Each child `i` has a greed factor `g[i]`, which is the minimum size of a cookie that the child will be content with; and each cookie `j` has a size `s[j]`. If `s[j] >= g[i]`, we can assign the cookie `j` to the child `i`, and the child `i` will be content. + + Your goal is to maximize the number of your content children and output the maximum number. + +constraints: | + - `1 <= g.length <= 3 * 10^4` + - `0 <= s.length <= 3 * 10^4` + - `1 <= g[i], s[j] <= 2^31 - 1` + +examples: + - input: "g = [1,2,3], s = [1,1]" + output: "1" + explanation: "You have 3 children and 2 cookies. The greed factors of 3 children are 1, 2, 3. And even though you have 2 cookies, since their size is both 1, you could only make the child whose greed factor is 1 content." + - input: "g = [1,2], s = [1,2,3]" + output: "2" + explanation: "You have 2 children and 3 cookies. The greed factors of 2 children are 1, 2. You have 3 cookies and their sizes are big enough to gratify all of the children." + +explanation: + intuition: | + Imagine you're distributing cookies to children at a party. Each child has a minimum cookie size they'll accept, and you want to make as many children happy as possible. + + The key insight is that **smaller cookies should go to less greedy children**. If you give a large cookie to a child who would have been happy with a small one, you might waste that large cookie when it could have satisfied a greedier child. + + Think of it like this: sort both the children by their greed and the cookies by their size. Then, starting with the least greedy child and smallest cookie, try to make matches. If the smallest available cookie can satisfy the least greedy unsatisfied child, it's always optimal to make that assignment. + + Why does this greedy approach work? Because giving the smallest sufficient cookie to the least greedy child never makes things worse. That cookie couldn't have satisfied a greedier child anyway (or only barely), and using it frees up larger cookies for children who truly need them. + + approach: | + We solve this using a **Sort + Two Pointers (Greedy) Approach**: + + **Step 1: Sort both arrays** + + - Sort `g` (greed factors) in ascending order + - Sort `s` (cookie sizes) in ascending order + - This allows us to match the smallest cookies to the least greedy children first + +   + + **Step 2: Initialise two pointers and a counter** + + - `child`: Pointer to the current child (starts at `0`) + - `cookie`: Pointer to the current cookie (starts at `0`) + - `content_children`: Counter for successfully assigned cookies (starts at `0`) + +   + + **Step 3: Iterate with two pointers** + + - While we have both children and cookies left to consider: + - If the current cookie `s[cookie]` can satisfy the current child `g[child]`: + - Assign it: increment `content_children` and move to the next child + - Move to the next cookie regardless (each cookie can only be used once) + +   + + **Step 4: Return the result** + + - Return `content_children` after exhausting cookies or children + +   + + This greedy approach works because sorting ensures we always try the smallest available option first, and matching the least demanding child with the smallest sufficient cookie maximises our remaining options. + + common_pitfalls: + - title: Not Sorting First + description: | + Without sorting, you might give a large cookie to a child who only needed a small one, wasting resources. + + For example, with `g = [1, 3]` and `s = [3, 1]`, processing left-to-right without sorting would give cookie size `3` to the child with greed `1`. Now the remaining cookie (size `1`) can't satisfy the child with greed `3`, resulting in only 1 content child instead of the optimal 2. + + Sorting both arrays ensures the smallest sufficient cookie always goes to the least greedy unsatisfied child. + wrong_approach: "Iterate without sorting" + correct_approach: "Sort both arrays first, then use two pointers" + + - title: Trying All Combinations + description: | + A brute force approach of trying all possible assignments is exponential in complexity. With `n` children and `m` cookies, there are potentially `m!` ways to distribute cookies. + + The greedy approach with sorting gives us O(n log n + m log m) time, which is optimal for this problem. + wrong_approach: "Try all permutations of cookie assignments" + correct_approach: "Greedy matching after sorting" + + - title: Forgetting Empty Cookie Array + description: | + The cookie array `s` can be empty (`0 <= s.length`). If there are no cookies, no children can be satisfied, and the answer is `0`. + + The two-pointer approach handles this naturally since the loop won't execute if `s` is empty. + + key_takeaways: + - "**Greedy + Sorting pattern**: When matching resources to demands, sort both and greedily assign the smallest sufficient resource to the least demanding request" + - "**Two pointers on sorted arrays**: A common technique for efficient matching and pairing problems" + - "**Local optimality leads to global optimality**: Giving the smallest valid cookie to the least greedy child never reduces our total count" + - "**Related problems**: This pattern applies to task scheduling, resource allocation, and matching problems like Maximum Matching of Players With Trainers (LC 2410)" + + time_complexity: "O(n log n + m log m). We sort both arrays (where `n = len(g)` and `m = len(s)`), then do a single linear pass through both." + space_complexity: "O(1) if sorting in-place, or O(n + m) for the space used by the sorting algorithm depending on implementation." + +solutions: + - approach_name: Sort + Two Pointers (Greedy) + is_optimal: true + code: | + def find_content_children(g: list[int], s: list[int]) -> int: + # Sort both arrays to enable greedy matching + g.sort() # Children by greed (least greedy first) + s.sort() # Cookies by size (smallest first) + + child = 0 # Pointer to current child + cookie = 0 # Pointer to current cookie + content_children = 0 + + # Try to match cookies to children + while child < len(g) and cookie < len(s): + # Can this cookie satisfy this child? + if s[cookie] >= g[child]: + # Yes! Assign it and move to next child + content_children += 1 + child += 1 + # Move to next cookie regardless + # (if it couldn't satisfy current child, it won't satisfy greedier ones) + cookie += 1 + + return content_children + explanation: | + **Time Complexity:** O(n log n + m log m) — Sorting dominates, where n and m are the lengths of g and s. + + **Space Complexity:** O(1) auxiliary space (in-place sorting), or O(n + m) for Timsort's temporary storage. + + By sorting both arrays and using two pointers, we greedily match the smallest sufficient cookie to the least greedy child. Each cookie and child is considered at most once, making the matching phase O(n + m). + + - approach_name: Greedy with Binary Search + is_optimal: false + code: | + import bisect + + def find_content_children(g: list[int], s: list[int]) -> int: + # Sort both arrays + g.sort() + s.sort() + + content_children = 0 + available = list(s) # Cookies still available + + # Process children from least to most greedy + for greed in g: + # Find smallest cookie that satisfies this child + idx = bisect.bisect_left(available, greed) + if idx < len(available): + # Found a suitable cookie, use it + content_children += 1 + available.pop(idx) # Remove used cookie + + return content_children + explanation: | + **Time Complexity:** O(n log n + m log m + n * m) — Sorting plus n binary searches with list removals. + + **Space Complexity:** O(m) — Copy of the cookie array. + + This approach uses binary search to find the smallest valid cookie for each child. While the binary search is O(log m), the `pop(idx)` operation is O(m), making this less efficient than the two-pointer approach. Included to show an alternative thought process. diff --git a/backend/data/questions/asteroid-collision.yaml b/backend/data/questions/asteroid-collision.yaml new file mode 100644 index 0000000..e5dab40 --- /dev/null +++ b/backend/data/questions/asteroid-collision.yaml @@ -0,0 +1,206 @@ +title: Asteroid Collision +slug: asteroid-collision +difficulty: medium +leetcode_id: 735 +leetcode_url: https://leetcode.com/problems/asteroid-collision/ +categories: + - arrays + - stack +patterns: + - monotonic-stack + +description: | + We are given an array `asteroids` of integers representing asteroids in a row. The indices of the asteroids in the array represent their relative position in space. + + For each asteroid, the absolute value represents its size, and the sign represents its direction (positive meaning right, negative meaning left). Each asteroid moves at the same speed. + + Find out the state of the asteroids after all collisions. If two asteroids meet, the smaller one will explode. If both are the same size, both will explode. Two asteroids moving in the same direction will never meet. + +constraints: | + - `2 <= asteroids.length <= 10^4` + - `-1000 <= asteroids[i] <= 1000` + - `asteroids[i] != 0` + +examples: + - input: "asteroids = [5, 10, -5]" + output: "[5, 10]" + explanation: "The 10 and -5 collide resulting in 10. The 5 and 10 never collide." + - input: "asteroids = [8, -8]" + output: "[]" + explanation: "The 8 and -8 collide exploding each other." + - input: "asteroids = [10, 2, -5]" + output: "[10]" + explanation: "The 2 and -5 collide resulting in -5. The 10 and -5 collide resulting in 10." + +explanation: + intuition: | + Imagine watching asteroids drift through space from above. Positive asteroids drift rightward (→), negative asteroids drift leftward (←). A collision only happens when a right-moving asteroid is followed by a left-moving one — they're heading toward each other. + + Think of it like cars on a one-lane road: two cars going the same direction never crash, and cars going opposite directions only crash if they're heading *toward* each other (right-moving in front, left-moving behind). + + The key insight is that we need to process asteroids in order and track the "survivors" — asteroids that haven't been destroyed yet. When we encounter a left-moving asteroid, we check if it will collide with any right-moving survivors behind it. + + This "check against previous survivors" pattern is a classic signal for using a **stack**: we push survivors onto the stack and pop them off when they get destroyed by incoming asteroids. + + approach: | + We solve this using a **Stack Simulation**: + + **Step 1: Initialise an empty stack** + + - `stack`: Holds the asteroids that have survived so far + - We'll process asteroids left-to-right and use the stack to track survivors + +   + + **Step 2: Process each asteroid** + + For each asteroid, determine if it will collide with anything on the stack: + + - **No collision possible** if: + - Stack is empty (nothing to collide with) + - Top of stack is negative (both moving left, same direction) + - Current asteroid is positive (moving right, away from stack) + + - **Collision happens** when: + - Top of stack is positive (moving right) + - Current asteroid is negative (moving left) + - They're heading toward each other! + +   + + **Step 3: Handle collisions** + + When a collision is detected: + - Compare sizes: `abs(stack[-1])` vs `abs(asteroid)` + - If stack asteroid is smaller → pop it, continue checking (the incoming asteroid might destroy more) + - If stack asteroid is larger → incoming asteroid is destroyed, stop + - If equal size → both explode (pop stack, don't push incoming) + +   + + **Step 4: Push survivors** + + - If the incoming asteroid survives all collisions (or no collision occurred), push it onto the stack + +   + + **Step 5: Return the stack** + + - The stack contains all surviving asteroids in their final positions + + common_pitfalls: + - title: Forgetting That Collisions Can Chain + description: | + A single left-moving asteroid can destroy multiple right-moving asteroids in sequence. + + For example, with `[5, 10, -15]`: + - `-15` first collides with `10` → `10` explodes + - `-15` then collides with `5` → `5` explodes + - Result: `[-15]` + + You must keep checking after each collision until the incoming asteroid is destroyed or no more collisions are possible. + wrong_approach: "Only checking one collision per asteroid" + correct_approach: "Loop until no collision or asteroid destroyed" + + - title: Misidentifying When Collisions Occur + description: | + Not all adjacent asteroids with opposite signs collide. The collision only happens when a **positive** (right-moving) asteroid is followed by a **negative** (left-moving) one. + + `[-5, 10]` → No collision! The `-5` is moving left, away from `10` which is moving right. + + `[10, -5]` → Collision! The `10` and `-5` are moving toward each other. + wrong_approach: "Checking collision whenever signs differ" + correct_approach: "Only collide when stack top is positive AND current is negative" + + - title: Equal Size Edge Case + description: | + When two asteroids of equal size collide, **both** are destroyed. This is easy to forget. + + For `[8, -8]`: + - `8` goes on stack + - `-8` collides with `8` (equal size) + - Both explode → stack becomes empty + - Result: `[]` + wrong_approach: "Only destroying one asteroid on equal collision" + correct_approach: "Pop stack AND don't push current when sizes are equal" + + key_takeaways: + - "**Stack for collision simulation**: When elements can 'destroy' previous elements, a stack naturally tracks survivors" + - "**Direction matters**: Collision only occurs when elements are moving *toward* each other, not just when they have opposite signs" + - "**Chain reactions**: One incoming element can trigger multiple removals — always loop until stable" + - "**Pattern recognition**: This is a variant of the 'monotonic stack' pattern where we maintain certain invariants about stack contents" + + time_complexity: "O(n). Each asteroid is pushed and popped from the stack at most once, giving us linear time." + space_complexity: "O(n). In the worst case (all asteroids survive), the stack holds all `n` asteroids." + +solutions: + - approach_name: Stack Simulation + is_optimal: true + code: | + def asteroid_collision(asteroids: list[int]) -> list[int]: + stack = [] + + for asteroid in asteroids: + # Flag to track if current asteroid survives + alive = True + + # Check for collisions: stack top moving right, current moving left + while alive and stack and stack[-1] > 0 and asteroid < 0: + # Compare sizes (absolute values) + if stack[-1] < abs(asteroid): + # Stack asteroid is smaller, it explodes + stack.pop() + # Current asteroid continues (might hit more) + elif stack[-1] == abs(asteroid): + # Equal size: both explode + stack.pop() + alive = False + else: + # Stack asteroid is larger, current explodes + alive = False + + # If current asteroid survived, add it to stack + if alive: + stack.append(asteroid) + + return stack + explanation: | + **Time Complexity:** O(n) — Each asteroid enters and leaves the stack at most once. + + **Space Complexity:** O(n) — Stack can hold all asteroids in worst case. + + We iterate through each asteroid and simulate collisions using a stack. The key insight is that a collision only occurs when the stack's top is positive (moving right) and the current asteroid is negative (moving left). We handle the three collision outcomes: smaller destroyed, larger survives, or equal means both destroyed. + + - approach_name: Simulation Without Stack + is_optimal: false + code: | + def asteroid_collision(asteroids: list[int]) -> list[int]: + result = [] + + for asteroid in asteroids: + # Process until no more collisions or asteroid destroyed + while True: + # No collision if result empty, both same direction, or moving apart + if not result or result[-1] < 0 or asteroid > 0: + result.append(asteroid) + break + + # Collision case: result[-1] > 0 and asteroid < 0 + if result[-1] < abs(asteroid): + # Result asteroid smaller, remove it and continue + result.pop() + elif result[-1] == abs(asteroid): + # Equal: both destroyed + result.pop() + break + else: + # Result asteroid larger, current destroyed + break + + return result + explanation: | + **Time Complexity:** O(n) — Same as stack approach. + + **Space Complexity:** O(n) — Result list serves as our stack. + + This is functionally identical to the stack approach but uses the result list directly instead of a separate stack variable. The logic is the same: check for collisions when directions oppose, handle the three size comparison cases, and continue until stable. diff --git a/backend/data/questions/available-captures-for-rook.yaml b/backend/data/questions/available-captures-for-rook.yaml new file mode 100644 index 0000000..ebc5192 --- /dev/null +++ b/backend/data/questions/available-captures-for-rook.yaml @@ -0,0 +1,152 @@ +title: Available Captures for Rook +slug: available-captures-for-rook +difficulty: easy +leetcode_id: 999 +leetcode_url: https://leetcode.com/problems/available-captures-for-rook/ +categories: + - arrays +patterns: + - matrix-traversal + +description: | + You are given an `8 x 8` **matrix** representing a chessboard. There is **exactly one** white rook represented by `'R'`, some number of white bishops `'B'`, and some number of black pawns `'p'`. Empty squares are represented by `'.'`. + + A rook can move any number of squares horizontally or vertically (up, down, left, right) until it reaches another piece *or* the edge of the board. A rook is **attacking** a pawn if it can move to the pawn's square in one move. + + Note: A rook cannot move through other pieces, such as bishops or pawns. This means a rook cannot attack a pawn if there is another piece blocking the path. + + Return *the number of pawns the white rook is attacking*. + +constraints: | + - `board.length == 8` + - `board[i].length == 8` + - `board[i][j]` is either `'R'`, `'.'`, `'B'`, or `'p'` + - There is exactly one cell with `board[i][j] == 'R'` + +examples: + - input: 'board = [[".",".",".",".",".",".",".","."],[".",".",".","p",".",".",".","."],[".",".",".","R",".",".",".","p"],[".",".",".",".",".",".",".","."],[".",".",".",".",".",".",".","."],[".",".",".","p",".",".",".","."],[".",".",".",".",".",".",".","."],[".",".",".",".",".",".",".","."]]' + output: "3" + explanation: "The rook is attacking all three pawns since there are no pieces blocking the path in any direction." + - input: 'board = [[".",".",".",".",".",".",".","."],[".",".",".",".",".",".",".","."],[".",".",".",".",".",".",".","."],[".",".",".",".",".",".",".","."],[".",".",".",".",".",".",".","."],[".",".",".",".",".",".",".","."],[".",".",".",".",".",".",".","."],[".",".",".",".","R",".",".","."]]' + output: "0" + explanation: "There are no pawns on the board, so the rook cannot capture any." + - input: 'board = [[".",".",".",".",".",".",".","."],[".",".",".","p",".",".",".","."],[".",".",".","p",".",".",".","."],["p","p",".","R",".","p","B","."],[".",".",".",".",".",".",".","."],[".",".",".","B",".",".",".","."],[".",".",".","p",".",".",".","."],[".",".",".",".",".",".",".","."]]' + output: "3" + explanation: "The rook attacks the pawn to its left, above (first pawn only, second is blocked), and to its right (bishop blocks the pawn beyond it). The pawn below is blocked by a bishop." + +explanation: + intuition: | + Think of this problem like shining a flashlight from the rook's position in four directions: up, down, left, and right. + + The light travels until it hits something. If it hits a pawn (`'p'`), that's a capture! If it hits a bishop (`'B'`) or the edge of the board, the light stops and we move on to the next direction. + + The key insight is that **order matters** — the rook can only capture the *first* piece it encounters in each direction. Any pieces beyond that are blocked from view, just like how a flashlight beam stops at the first obstacle. + + Since a rook moves in exactly four directions and we need to check each one until we hit something, this is a straightforward simulation problem. + + approach: | + We solve this using a **Direction Traversal** approach: + + **Step 1: Find the rook's position** + + - Iterate through the 8x8 board to locate the cell containing `'R'` + - Store its row and column coordinates + +   + + **Step 2: Define the four directions** + + - Use direction vectors: `(-1, 0)` for up, `(1, 0)` for down, `(0, -1)` for left, `(0, 1)` for right + - Each vector represents how row and column change as we move + +   + + **Step 3: Traverse each direction** + + - For each direction, start from the rook's position + - Move one square at a time in that direction + - Stop when we hit: a pawn (count it), a bishop (don't count), or the board edge + +   + + **Step 4: Return the total count** + + - Sum up all pawns encountered across the four directions + + common_pitfalls: + - title: Counting All Pawns in a Direction + description: | + A common mistake is to count *all* pawns in a direction rather than just the first piece encountered. + + For example, if there are two pawns stacked vertically above the rook, only the closest one can be captured — the second is blocked by the first. + + Always `break` out of the direction loop as soon as you hit any piece (pawn or bishop). + wrong_approach: "Continue traversing after finding a pawn" + correct_approach: "Stop traversal immediately upon hitting any piece" + + - title: Forgetting to Check Board Boundaries + description: | + When traversing in a direction, you must check that the new position is within the 8x8 board before accessing the cell. + + Accessing `board[-1][3]` or `board[8][3]` will either cause an error or give incorrect results depending on the language. + wrong_approach: "Traverse without boundary checks" + correct_approach: "Check 0 <= row < 8 and 0 <= col < 8 before accessing" + + - title: Treating Bishops as Transparent + description: | + Bishops block the rook's path just like pawns do — the only difference is bishops don't count as captures. + + If you only check for pawns and ignore bishops, you'll incorrectly count pawns that are actually blocked. + wrong_approach: "Only stop traversal for pawns" + correct_approach: "Stop traversal for any piece (pawn or bishop)" + + key_takeaways: + - "**Direction vectors** simplify grid traversal — define `(dr, dc)` pairs and loop through them" + - "**First-hit termination**: In line-of-sight problems, always stop at the first obstacle" + - "**Fixed-size boards** (like 8x8) mean O(1) complexity, but the technique scales to larger grids" + - "This pattern applies to many chess and grid simulation problems" + + time_complexity: "O(1). The board is always 8x8, and we traverse at most 8 cells in each of 4 directions (32 cells total)." + space_complexity: "O(1). We only use a few variables for position tracking and counting." + +solutions: + - approach_name: Direction Traversal + is_optimal: true + code: | + def num_rook_captures(board: list[list[str]]) -> int: + # Find the rook's position + rook_row, rook_col = 0, 0 + for r in range(8): + for c in range(8): + if board[r][c] == 'R': + rook_row, rook_col = r, c + break + + # Four directions: up, down, left, right + directions = [(-1, 0), (1, 0), (0, -1), (0, 1)] + captures = 0 + + for dr, dc in directions: + # Start from rook's position and move in this direction + r, c = rook_row + dr, rook_col + dc + + # Keep moving until we hit the edge or a piece + while 0 <= r < 8 and 0 <= c < 8: + if board[r][c] == 'p': + # Found a pawn — count it and stop + captures += 1 + break + elif board[r][c] == 'B': + # Bishop blocks the path — stop without counting + break + # Empty square — keep moving + r += dr + c += dc + + return captures + explanation: | + **Time Complexity:** O(1) — Fixed 8x8 board with at most 32 cells traversed. + + **Space Complexity:** O(1) — Only a few integer variables used. + + We locate the rook, then traverse in each of the four cardinal directions. For each direction, we move step-by-step until we either find a pawn (increment count), hit a bishop (stop), or reach the board edge (stop). diff --git a/backend/data/questions/average-of-levels-in-binary-tree.yaml b/backend/data/questions/average-of-levels-in-binary-tree.yaml new file mode 100644 index 0000000..b51e562 --- /dev/null +++ b/backend/data/questions/average-of-levels-in-binary-tree.yaml @@ -0,0 +1,200 @@ +title: Average of Levels in Binary Tree +slug: average-of-levels-in-binary-tree +difficulty: easy +leetcode_id: 637 +leetcode_url: https://leetcode.com/problems/average-of-levels-in-binary-tree/ +categories: + - trees + - queue +patterns: + - bfs + - tree-traversal + +description: | + Given the `root` of a binary tree, return *the average value of the nodes on each level in the form of an array*. + + Answers within `10^-5` of the actual answer will be accepted. + +constraints: | + - The number of nodes in the tree is in the range `[1, 10^4]` + - `-2^31 <= Node.val <= 2^31 - 1` + +examples: + - input: "root = [3,9,20,null,null,15,7]" + output: "[3.00000,14.50000,11.00000]" + explanation: "The average value of nodes on level 0 is 3, on level 1 is 14.5, and on level 2 is 11. Hence return [3, 14.5, 11]." + - input: "root = [3,9,20,15,7]" + output: "[3.00000,14.50000,11.00000]" + explanation: "The tree has three levels with averages 3, 14.5, and 11 respectively." + +explanation: + intuition: | + Imagine the binary tree as floors of a building, where each level represents a floor. Your task is to walk through each floor, count all the people (node values), and calculate the average number per floor. + + The key insight is that we need to **process the tree level by level**, keeping nodes from the same depth together. This naturally maps to **Breadth-First Search (BFS)**, which explores all nodes at the current depth before moving to the next level. + + Think of it like this: we use a queue as a "waiting room." We start with the root on floor 0. Then, for each floor, we let everyone in the waiting room step forward (process them), calculate their average, and have them send their children to the waiting room for the next floor. We repeat until no one is left waiting. + + The queue maintains the level-by-level ordering that makes this problem straightforward. + + approach: | + We solve this using **Breadth-First Search (BFS)** with a queue: + + **Step 1: Handle the edge case** + + - If `root` is `None`, return an empty list + +   + + **Step 2: Initialise data structures** + + - `result`: An empty list to store the average of each level + - `queue`: A deque initialised with the root node + +   + + **Step 3: Process the tree level by level** + + - While the queue is not empty: + - Record the current `level_size` (number of nodes at this level) + - Initialise `level_sum` to `0` for accumulating node values + - Process exactly `level_size` nodes: + - Dequeue a node from the front + - Add its value to `level_sum` + - Enqueue its left child if it exists + - Enqueue its right child if it exists + - Calculate the average: `level_sum / level_size` + - Append the average to `result` + +   + + **Step 4: Return the result** + + - Return `result` containing the average for each level + +   + + This approach ensures we process all nodes at depth `d` before any node at depth `d+1`, giving us clean level-by-level averages. + + common_pitfalls: + - title: Integer Overflow + description: | + Node values can range from `-2^31` to `2^31 - 1`. When summing values at a level with many nodes, the sum could overflow 32-bit integers. + + In Python, this isn't an issue due to arbitrary-precision integers. In languages like Java or C++, you should use `long` for the sum or cast to `double` early. + wrong_approach: "Using int for level_sum in languages with fixed-width integers" + correct_approach: "Use long or double for accumulating sums" + + - title: Using DFS Without Level Tracking + description: | + While DFS can solve this problem, it requires extra bookkeeping to track which level each node belongs to. A common mistake is to use DFS and lose track of the level structure. + + BFS naturally groups nodes by level, making it the more intuitive approach for this problem. + wrong_approach: "DFS without proper level indexing" + correct_approach: "BFS with queue processing level_size nodes at a time" + + - title: Forgetting to Snapshot Level Size + description: | + When processing a level, you must capture the queue size **before** you start dequeuing. If you check `len(queue)` inside the loop, you'll include children you just added. + + For example, with `[3,9,20]`, if you don't snapshot the size, processing the root adds `9` and `20` to the queue, and you'd process them in the same iteration. + wrong_approach: "Looping while queue has elements without fixed level size" + correct_approach: "Capture level_size = len(queue) before the inner loop" + + key_takeaways: + - "**BFS for level-order problems**: When you need to process a tree level by level, BFS with a queue is the natural choice" + - "**Snapshot the level size**: Always capture `len(queue)` before processing to avoid mixing levels" + - "**Foundation for similar problems**: This pattern applies to many tree problems — level order traversal, right side view, zigzag traversal, and more" + - "**DFS alternative**: DFS works too if you pass the level index and use a list indexed by level, but BFS is more intuitive here" + + time_complexity: "O(n). We visit each of the `n` nodes in the tree exactly once." + space_complexity: "O(w) where `w` is the maximum width of the tree. In the worst case (a complete binary tree), the last level has roughly `n/2` nodes, so this is O(n)." + +solutions: + - approach_name: BFS with Queue + is_optimal: true + code: | + from collections import deque + from typing import Optional + + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + def average_of_levels(root: Optional[TreeNode]) -> list[float]: + if not root: + return [] + + result = [] + queue = deque([root]) + + while queue: + # Snapshot the number of nodes at this level + level_size = len(queue) + level_sum = 0 + + # Process all nodes at the current level + for _ in range(level_size): + node = queue.popleft() + level_sum += node.val + + # Add children to queue for next level + if node.left: + queue.append(node.left) + if node.right: + queue.append(node.right) + + # Calculate and store the average for this level + result.append(level_sum / level_size) + + return result + explanation: | + **Time Complexity:** O(n) — Each node is visited exactly once. + + **Space Complexity:** O(w) — The queue holds at most one level of nodes at a time. The maximum width `w` can be up to `n/2` for a complete binary tree. + + BFS naturally processes nodes level by level. By snapshotting the queue size before processing, we ensure each level is handled independently, making the average calculation straightforward. + + - approach_name: DFS with Level Tracking + is_optimal: false + code: | + from typing import Optional + + class TreeNode: + def __init__(self, val=0, left=None, right=None): + self.val = val + self.left = left + self.right = right + + def average_of_levels(root: Optional[TreeNode]) -> list[float]: + # Each entry: [sum_of_values, count_of_nodes] + level_data = [] + + def dfs(node: Optional[TreeNode], level: int) -> None: + if not node: + return + + # Expand the list if we've reached a new level + if level >= len(level_data): + level_data.append([0, 0]) + + # Accumulate sum and count for this level + level_data[level][0] += node.val + level_data[level][1] += 1 + + # Recurse to children at the next level + dfs(node.left, level + 1) + dfs(node.right, level + 1) + + dfs(root, 0) + + # Convert accumulated data to averages + return [total / count for total, count in level_data] + explanation: | + **Time Complexity:** O(n) — Each node is visited exactly once. + + **Space Complexity:** O(h) for recursion stack where `h` is the tree height, plus O(h) for `level_data`. In the worst case (skewed tree), `h = n`. + + DFS can also solve this problem by tracking the level during traversal. We accumulate sums and counts in a list indexed by level, then compute averages at the end. While this works, it's less intuitive than BFS for level-based problems. diff --git a/backend/data/questions/average-salary-excluding-minimum-and-maximum.yaml b/backend/data/questions/average-salary-excluding-minimum-and-maximum.yaml new file mode 100644 index 0000000..76e0e92 --- /dev/null +++ b/backend/data/questions/average-salary-excluding-minimum-and-maximum.yaml @@ -0,0 +1,173 @@ +title: Average Salary Excluding the Minimum and Maximum Salary +slug: average-salary-excluding-minimum-and-maximum +difficulty: easy +leetcode_id: 1491 +leetcode_url: https://leetcode.com/problems/average-salary-excluding-the-minimum-and-maximum-salary/ +categories: + - arrays + - sorting +patterns: + - greedy + +description: | + You are given an array of **unique** integers `salary` where `salary[i]` is the salary of the ith employee. + + Return *the average salary of employees excluding the minimum and maximum salary*. Answers within `10^-5` of the actual answer will be accepted. + +constraints: | + - `3 <= salary.length <= 100` + - `1000 <= salary[i] <= 10^6` + - All the integers of `salary` are **unique** + +examples: + - input: "salary = [4000, 3000, 1000, 2000]" + output: "2500.00000" + explanation: "Minimum salary and maximum salary are 1000 and 4000 respectively. Average salary excluding minimum and maximum salary is (2000 + 3000) / 2 = 2500." + - input: "salary = [1000, 2000, 3000]" + output: "2000.00000" + explanation: "Minimum salary and maximum salary are 1000 and 3000 respectively. Average salary excluding minimum and maximum salary is (2000) / 1 = 2000." + +explanation: + intuition: | + Imagine you're calculating the average performance score of a team, but you want to exclude outliers — the best and worst performers — to get a more representative picture of the typical team member. + + The core insight is straightforward: we need to find the **total sum** of all salaries, then **subtract** the minimum and maximum values, and finally **divide** by the count of remaining employees (which is always `n - 2`). + + Think of it like this: instead of sorting the entire array to identify the min and max (which would work but is overkill), we can find these two values in a single pass through the data. As we scan through, we simply track the smallest and largest values we've seen, while also accumulating the total sum. + + This approach works because we only need to *identify* the min and max — we don't need them in any particular position. Finding extremes is a classic single-pass operation. + + approach: | + We solve this using a **Single Pass Approach**: + + **Step 1: Initialise tracking variables** + + - `min_salary`: Set to infinity so any salary becomes the new minimum + - `max_salary`: Set to negative infinity so any salary becomes the new maximum + - `total_sum`: Set to `0` to accumulate the sum of all salaries + +   + + **Step 2: Iterate through the salary array** + + - For each salary, add it to `total_sum` + - If the salary is less than `min_salary`, update `min_salary` + - If the salary is greater than `max_salary`, update `max_salary` + +   + + **Step 3: Calculate and return the average** + + - Subtract `min_salary` and `max_salary` from `total_sum` + - Divide by `n - 2` (the count of remaining employees) + - Return the result as a floating-point number + +   + + This single-pass approach is optimal because we gather all necessary information (sum, min, max) in one traversal of the array. + + common_pitfalls: + - title: Sorting When Not Necessary + description: | + A common first instinct is to sort the array and then sum elements from index `1` to `n-2`: + + ```python + salary.sort() + return sum(salary[1:-1]) / (len(salary) - 2) + ``` + + While this works and is readable, it has **O(n log n) time complexity** due to the sort. For this problem's constraints (`n <= 100`), it's acceptable, but the single-pass O(n) solution is more elegant and demonstrates better algorithmic thinking. + wrong_approach: "Sorting the entire array to find min/max" + correct_approach: "Track min/max while iterating once" + + - title: Integer Division + description: | + Be careful with division in languages that distinguish between integer and floating-point division. + + In Python 3, `/` always returns a float, but in Python 2 or other languages, you might accidentally get integer division: + + ```python + # Wrong in some contexts + (total - min_val - max_val) // (n - 2) # Integer division! + + # Correct + (total - min_val - max_val) / (n - 2) # Float division + ``` + wrong_approach: "Using integer division //" + correct_approach: "Using float division /" + + - title: Off-By-One in Count + description: | + Remember that after excluding the minimum and maximum, you have `n - 2` employees remaining, not `n - 1`. + + With 4 employees and 2 excluded, you average over 2 people. A common mistake is dividing by `len(salary) - 1` instead of `len(salary) - 2`. + + key_takeaways: + - "**Single-pass pattern**: When you only need to find extremes (min/max) and aggregates (sum/count), you can often do it in one traversal" + - "**Avoid unnecessary sorting**: Sorting is O(n log n) — if you only need min/max, tracking them in O(n) is more efficient" + - "**Foundation for statistical problems**: This technique of excluding outliers extends to more complex statistical calculations like trimmed means" + - "**Constraint awareness**: With `n >= 3` guaranteed, we know there's always at least one salary remaining after exclusion" + + time_complexity: "O(n). We traverse the salary array exactly once, performing constant-time operations at each step." + space_complexity: "O(1). We only use three variables (`min_salary`, `max_salary`, `total_sum`) regardless of input size." + +solutions: + - approach_name: Single Pass + is_optimal: true + code: | + def average(salary: list[int]) -> float: + # Initialise tracking variables + min_salary = float('inf') + max_salary = float('-inf') + total_sum = 0 + + # Single pass to find min, max, and sum + for s in salary: + total_sum += s + if s < min_salary: + min_salary = s + if s > max_salary: + max_salary = s + + # Calculate average excluding min and max + # n - 2 employees remain after exclusion + return (total_sum - min_salary - max_salary) / (len(salary) - 2) + explanation: | + **Time Complexity:** O(n) — Single pass through the array. + + **Space Complexity:** O(1) — Only three tracking variables used. + + We traverse once, tracking the minimum, maximum, and total sum simultaneously. This is optimal because we need to examine each element at least once to determine these values. + + - approach_name: Built-in Functions + is_optimal: true + code: | + def average(salary: list[int]) -> float: + # Use built-in functions for clarity + # Each of min(), max(), sum() is O(n), but constant factor is low + return (sum(salary) - min(salary) - max(salary)) / (len(salary) - 2) + explanation: | + **Time Complexity:** O(n) — Three passes through the array (sum, min, max). + + **Space Complexity:** O(1) — No additional data structures. + + This one-liner is equally optimal in big-O terms and arguably more readable. While it makes three passes instead of one, the constant factor difference is negligible for this problem's constraints. In interviews, this concise approach demonstrates knowledge of Python's built-in functions. + + - approach_name: Sorting + is_optimal: false + code: | + def average(salary: list[int]) -> float: + # Sort to put min at start, max at end + salary.sort() + + # Sum everything except first and last elements + middle_sum = sum(salary[1:-1]) + + # Average over n - 2 employees + return middle_sum / (len(salary) - 2) + explanation: | + **Time Complexity:** O(n log n) — Dominated by the sorting step. + + **Space Complexity:** O(1) or O(n) — Depends on the sorting algorithm used. + + While correct and readable, sorting is overkill for this problem. We only need min and max, not a fully sorted array. However, this approach is easy to understand and may be acceptable in interviews when time is limited. It also naturally handles the case where we need to sum the middle elements. diff --git a/backend/data/questions/average-value-of-even-numbers-divisible-by-three.yaml b/backend/data/questions/average-value-of-even-numbers-divisible-by-three.yaml new file mode 100644 index 0000000..c192757 --- /dev/null +++ b/backend/data/questions/average-value-of-even-numbers-divisible-by-three.yaml @@ -0,0 +1,141 @@ +title: Average Value of Even Numbers That Are Divisible by Three +slug: average-value-of-even-numbers-divisible-by-three +difficulty: easy +leetcode_id: 2455 +leetcode_url: https://leetcode.com/problems/average-value-of-even-numbers-that-are-divisible-by-3/ +categories: + - arrays + - math +patterns: + - prefix-sum + +description: | + Given an integer array `nums` of **positive** integers, return *the average value of all even integers that are divisible by* `3`. + + Note that the **average** of `n` elements is the **sum** of the `n` elements divided by `n` and **rounded down** to the nearest integer. + +constraints: | + - `1 <= nums.length <= 1000` + - `1 <= nums[i] <= 1000` + +examples: + - input: "nums = [1,3,6,10,12,15]" + output: "9" + explanation: "6 and 12 are even numbers that are divisible by 3. (6 + 12) / 2 = 9." + - input: "nums = [1,2,4,7,10]" + output: "0" + explanation: "There is no single number that satisfies the requirement, so return 0." + +explanation: + intuition: | + This problem asks us to filter numbers by two conditions and then compute their average. + + Think of it like sorting through a basket of numbered balls: we only want to pick the balls that satisfy **two criteria simultaneously**. A number must be: + 1. **Even** — divisible by 2 + 2. **Divisible by 3** + + Here's the key insight: a number that is both even (divisible by 2) AND divisible by 3 must be divisible by their least common multiple, which is **6**. So instead of checking two conditions, we can simplify to checking just one: is the number divisible by 6? + + Once we've identified all qualifying numbers, we sum them up and divide by the count. If no numbers qualify, we return 0. + + approach: | + We solve this using a **Single Pass with Running Sum** approach: + + **Step 1: Initialise tracking variables** + + - `total`: Set to `0` to accumulate the sum of qualifying numbers + - `count`: Set to `0` to track how many numbers qualify + +   + + **Step 2: Iterate through the array** + + - For each number, check if it's divisible by 6 (`num % 6 == 0`) + - If yes, add it to `total` and increment `count` + +   + + **Step 3: Calculate and return the average** + + - If `count` is `0`, return `0` (no qualifying numbers) + - Otherwise, return `total // count` (integer division for rounded-down average) + +   + + This approach processes each element exactly once, making it efficient and straightforward. + + common_pitfalls: + - title: Checking Two Conditions Separately + description: | + A common approach is to check `num % 2 == 0 and num % 3 == 0` separately. While correct, this is slightly more verbose than necessary. + + Since even numbers divisible by 3 are exactly the numbers divisible by 6 (LCM of 2 and 3), you can simplify to `num % 6 == 0`. + wrong_approach: "Check num % 2 == 0 and num % 3 == 0" + correct_approach: "Check num % 6 == 0" + + - title: Forgetting the Empty Case + description: | + If no numbers in the array satisfy the conditions, dividing by zero will cause an error. + + For example, with `nums = [1, 2, 4, 7, 10]`, no number is divisible by 6. You must check if `count == 0` before dividing and return `0` in that case. + wrong_approach: "Return total / count without checking count" + correct_approach: "Return 0 if count is 0, else total // count" + + - title: Using Float Division Instead of Integer Division + description: | + The problem specifies the average should be **rounded down** to the nearest integer. Using `/` in Python returns a float, while `//` performs integer division (floor division). + + For `(6 + 12) / 2`, Python's `/` gives `9.0`, but `//` gives `9`. The problem expects an integer result. + wrong_approach: "total / count (float division)" + correct_approach: "total // count (integer division)" + + key_takeaways: + - "**Simplify conditions with LCM**: When checking divisibility by multiple numbers, consider using their LCM. Even AND divisible by 3 = divisible by 6." + - "**Handle empty results**: Always check for division by zero when computing averages or ratios." + - "**Integer vs float division**: Use `//` for floor division when the problem asks for rounded-down results." + - "**Single pass efficiency**: Accumulating sum and count in one pass is O(n) and avoids storing filtered elements." + + time_complexity: "O(n). We iterate through the array once, checking each element and updating our running totals." + space_complexity: "O(1). We only use two variables (`total` and `count`), regardless of input size." + +solutions: + - approach_name: Single Pass with Divisibility Check + is_optimal: true + code: | + def average_value(nums: list[int]) -> int: + # Track sum and count of qualifying numbers + total = 0 + count = 0 + + for num in nums: + # Even AND divisible by 3 = divisible by 6 + if num % 6 == 0: + total += num + count += 1 + + # Avoid division by zero; return 0 if no qualifying numbers + return total // count if count > 0 else 0 + explanation: | + **Time Complexity:** O(n) — Single pass through the array. + + **Space Complexity:** O(1) — Only two integer variables used. + + We iterate once, checking each number for divisibility by 6 (the LCM of 2 and 3). This combines the "even" and "divisible by 3" checks into one operation. We accumulate the sum and count, then compute the floor-divided average. + + - approach_name: Filter and Sum + is_optimal: false + code: | + def average_value(nums: list[int]) -> int: + # Filter to get all numbers divisible by 6 + qualifying = [num for num in nums if num % 6 == 0] + + # Return 0 if no qualifying numbers, else compute average + if not qualifying: + return 0 + return sum(qualifying) // len(qualifying) + explanation: | + **Time Complexity:** O(n) — Two passes: one for filtering, one for summing. + + **Space Complexity:** O(k) — Where k is the number of qualifying elements. + + This approach is more readable by separating the filtering and averaging steps. However, it uses extra space to store the filtered list. For this problem's constraints (n <= 1000), the difference is negligible, but the single-pass approach is more memory-efficient. diff --git a/backend/data/questions/average-waiting-time.yaml b/backend/data/questions/average-waiting-time.yaml new file mode 100644 index 0000000..fb04062 --- /dev/null +++ b/backend/data/questions/average-waiting-time.yaml @@ -0,0 +1,175 @@ +title: Average Waiting Time +slug: average-waiting-time +difficulty: medium +leetcode_id: 1701 +leetcode_url: https://leetcode.com/problems/average-waiting-time/ +categories: + - arrays +patterns: + - greedy + +description: | + There is a restaurant with a single chef. You are given an array `customers`, where `customers[i] = [arrival_i, time_i]`: + + - `arrival_i` is the arrival time of the ith customer. The arrival times are sorted in **non-decreasing** order. + - `time_i` is the time needed to prepare the order of the ith customer. + + When a customer arrives, they give the chef their order, and the chef starts preparing it once idle. The customer waits until the chef finishes preparing their order. The chef does not prepare food for more than one customer at a time. The chef prepares food for customers **in the order they were given in the input**. + + Return *the **average** waiting time of all customers*. Solutions within `10^-5` from the actual answer are considered accepted. + +constraints: | + - `1 <= customers.length <= 10^5` + - `1 <= arrival_i, time_i <= 10^4` + - `arrival_i <= arrival_(i+1)` + +examples: + - input: "customers = [[1,2],[2,5],[4,3]]" + output: "5.00000" + explanation: | + 1) The first customer arrives at time 1, the chef starts immediately and finishes at time 3. Waiting time = 3 - 1 = 2. + 2) The second customer arrives at time 2, the chef starts at time 3 (after finishing customer 1) and finishes at time 8. Waiting time = 8 - 2 = 6. + 3) The third customer arrives at time 4, the chef starts at time 8 and finishes at time 11. Waiting time = 11 - 4 = 7. + Average waiting time = (2 + 6 + 7) / 3 = 5. + - input: "customers = [[5,2],[5,4],[10,3],[20,1]]" + output: "3.25000" + explanation: | + 1) Customer 1 arrives at time 5, chef finishes at time 7. Waiting time = 2. + 2) Customer 2 arrives at time 5, chef starts at 7 and finishes at 11. Waiting time = 6. + 3) Customer 3 arrives at time 10, chef starts at 11 and finishes at 14. Waiting time = 4. + 4) Customer 4 arrives at time 20, chef starts immediately and finishes at 21. Waiting time = 1. + Average waiting time = (2 + 6 + 4 + 1) / 4 = 3.25. + +explanation: + intuition: | + Imagine you're standing in line at a busy restaurant with a single chef. Each customer places an order that takes a certain amount of time to prepare. The chef works through orders one at a time, in the order they arrive. + + The key insight is understanding what "waiting time" means for each customer: it's the time from when they **arrive** until their food is **ready** — not just the cooking time. If the chef is still busy when you arrive, you wait for them to finish before your order even starts. + + Think of it like a timeline: the chef maintains a "current time" pointer that moves forward as they complete orders. When a new customer arrives: + - If the chef is free (current time <= arrival time), they start cooking immediately + - If the chef is busy (current time > arrival time), the customer must wait until the chef finishes their current order + + By simulating this process and tracking when each order completes, we can calculate each customer's total waiting time and then average them all. + + approach: | + We solve this with a **Single Pass Simulation**: + + **Step 1: Initialise tracking variables** + + - `current_time`: Set to `0`, representing when the chef becomes available + - `total_wait`: Set to `0`, accumulating the sum of all waiting times + +   + + **Step 2: Process each customer in order** + + For each customer with `[arrival, cook_time]`: + + - Determine when the chef can start: `max(current_time, arrival)` + - Calculate when the order will be finished: `start_time + cook_time` + - Calculate this customer's waiting time: `finish_time - arrival` + - Add this waiting time to `total_wait` + - Update `current_time` to `finish_time` for the next customer + +   + + **Step 3: Calculate and return the average** + + - Return `total_wait / n` where `n` is the number of customers + +   + + This approach works because customers are already sorted by arrival time. We process them in order, maintaining the chef's timeline as we go. Each customer's waiting time is simply how long from their arrival until their food is ready. + + common_pitfalls: + - title: Confusing Waiting Time with Cooking Time + description: | + A common mistake is returning just the average of all cooking times (`time_i` values). But waiting time includes: + - Any time spent waiting for the chef to become free + - Plus the actual cooking time + + For example, if customer 2 arrives at time 2 but the chef is busy until time 3, and cooking takes 5 minutes, the waiting time is `8 - 2 = 6`, not just `5`. + wrong_approach: "Sum cooking times and divide by n" + correct_approach: "Track chef availability and calculate finish_time - arrival for each customer" + + - title: Not Handling Chef Idle Time + description: | + When a customer arrives after the chef has been idle for a while (arrival > current_time), the chef starts immediately at the arrival time, not at current_time. + + For example, if the chef finishes at time 10 and the next customer arrives at time 20, cooking starts at time 20, not time 10. Use `max(current_time, arrival)` to handle both cases. + wrong_approach: "Always start cooking at current_time" + correct_approach: "Start at max(current_time, arrival)" + + - title: Integer Overflow or Precision Issues + description: | + With up to `10^5` customers, each waiting up to `10^4 + 10^4` time units, the total waiting time can reach `2 * 10^9`. This fits in a 32-bit integer but leaves little margin. + + More importantly, the problem requires floating-point division for the average. Ensure you're using float division, not integer division. + wrong_approach: "Integer division total_wait // n" + correct_approach: "Float division total_wait / n" + + key_takeaways: + - "**Simulation pattern**: When order of processing matters, simulate the timeline step by step" + - "**Greedy works here**: Processing in arrival order is optimal because we can't reorder customers" + - "**Track state with a single variable**: The chef's availability (`current_time`) is all we need to simulate the entire queue" + - "**Waiting time = finish time - arrival time**: This formula accounts for both queue wait and cooking time" + + time_complexity: "O(n). We process each customer exactly once in a single pass through the array." + space_complexity: "O(1). We only use two variables (`current_time` and `total_wait`) regardless of input size." + +solutions: + - approach_name: Single Pass Simulation + is_optimal: true + code: | + def average_waiting_time(customers: list[list[int]]) -> float: + current_time = 0 # When the chef becomes available + total_wait = 0 # Sum of all waiting times + + for arrival, cook_time in customers: + # Chef starts when free or when customer arrives (whichever is later) + start_time = max(current_time, arrival) + # Order is ready after cooking completes + finish_time = start_time + cook_time + # Waiting time is from arrival until food is ready + total_wait += finish_time - arrival + # Update chef availability for next customer + current_time = finish_time + + # Return the average waiting time + return total_wait / len(customers) + explanation: | + **Time Complexity:** O(n) — Single pass through the customers array. + + **Space Complexity:** O(1) — Only two tracking variables used. + + We simulate the chef's timeline by processing customers in order. For each customer, we determine when their order finishes and calculate how long they waited from arrival. The key insight is using `max(current_time, arrival)` to handle both cases: chef busy or chef idle. + + - approach_name: Explicit Timeline Simulation + is_optimal: false + code: | + def average_waiting_time(customers: list[list[int]]) -> float: + waiting_times = [] + chef_free_at = 0 + + for arrival, cook_time in customers: + # If chef is busy, customer waits; otherwise chef waits for customer + if chef_free_at > arrival: + # Chef is busy - customer waits for chef + wait_for_chef = chef_free_at - arrival + total_wait = wait_for_chef + cook_time + chef_free_at = chef_free_at + cook_time + else: + # Chef is free - starts immediately when customer arrives + total_wait = cook_time + chef_free_at = arrival + cook_time + + waiting_times.append(total_wait) + + return sum(waiting_times) / len(waiting_times) + explanation: | + **Time Complexity:** O(n) — Single pass plus summing the list. + + **Space Complexity:** O(n) — Stores individual waiting times in a list. + + This version explicitly handles the two cases (chef busy vs idle) and stores each customer's waiting time. While more verbose, it makes the logic clearer for understanding. The optimal solution combines these cases with `max()` and avoids the extra list. diff --git a/backend/data/questions/avoid-flood-in-the-city.yaml b/backend/data/questions/avoid-flood-in-the-city.yaml new file mode 100644 index 0000000..dd2db6a --- /dev/null +++ b/backend/data/questions/avoid-flood-in-the-city.yaml @@ -0,0 +1,231 @@ +title: Avoid Flood in The City +slug: avoid-flood-in-the-city +difficulty: medium +leetcode_id: 1488 +leetcode_url: https://leetcode.com/problems/avoid-flood-in-the-city/ +categories: + - arrays + - hash-tables +patterns: + - greedy + - binary-search + +description: | + Your country has an infinite number of lakes. Initially, all the lakes are empty, but when it rains over the nth lake, that lake becomes full of water. If it rains over a lake that is **full of water**, there will be a **flood**. + + Your goal is to avoid floods in any lake. + + Given an integer array `rains` where: + + - `rains[i] > 0` means it will rain over lake `rains[i]`. + - `rains[i] == 0` means there is no rain this day, and you **must** choose **one lake** to **dry**. + + Return *an array* `ans` where: + + - `ans.length == rains.length` + - `ans[i] == -1` if `rains[i] > 0`. + - `ans[i]` is the lake you choose to dry on the ith day if `rains[i] == 0`. + + If there are multiple valid answers, return **any** of them. If it is impossible to avoid a flood, return **an empty array**. + + **Note:** If you choose to dry a full lake, it becomes empty. If you dry an empty lake, nothing changes. + +constraints: | + - `1 <= rains.length <= 10^5` + - `0 <= rains[i] <= 10^9` + +examples: + - input: "rains = [1,2,3,4]" + output: "[-1,-1,-1,-1]" + explanation: "No dry days needed. Lakes 1, 2, 3, 4 each fill once without any lake being rained on twice." + - input: "rains = [1,2,0,0,2,1]" + output: "[-1,-1,2,1,-1,-1]" + explanation: "On day 3, we dry lake 2. On day 4, we dry lake 1. This prevents floods when lakes 2 and 1 are rained on again on days 5 and 6." + - input: "rains = [1,2,0,1,2]" + output: "[]" + explanation: "After day 2, lakes 1 and 2 are full. We only have one dry day (day 3). On days 4 and 5, both lakes 1 and 2 are rained on again. We can only dry one, so a flood is unavoidable." + +explanation: + intuition: | + Imagine you're a city planner with weather forecasts for the coming days. You know exactly when each lake will be rained on, and on dry days, you get to send a crew to empty one lake. + + The key insight is that **not all dry days are equal**. When a lake is about to flood (because it's full and will be rained on again), you need a dry day *between* the two rain events for that lake. This is a scheduling problem: you must match dry days to the right lakes. + + Think of it like this: when you see rain coming for lake X, and lake X is already full, you need to look *backwards* and find a dry day you haven't used yet that falls *after* the last time lake X was filled. You're essentially doing just-in-time scheduling — you don't decide what to dry until you *need* to dry it. + + This is where **greedy + binary search** shines. We save up our dry days, and when a flood is imminent, we find the earliest available dry day that can prevent it. Using the earliest valid day is optimal because it preserves later dry days for future emergencies. + + approach: | + We use a **Greedy with Binary Search** approach to optimally schedule which lakes to dry: + + **Step 1: Initialise data structures** + + - `full_lakes`: A hash map storing `{lake_number: day_it_was_last_filled}` — tracks which lakes are currently full + - `dry_days`: A sorted list of day indices where `rains[i] == 0` — our available dry days to use + - `result`: Output array initialised with `-1` (we'll update dry day values later) + +   + + **Step 2: Iterate through each day** + + - If `rains[i] == 0` (dry day): + - Add day `i` to our `dry_days` list (we'll decide later what to dry) + - For now, set `result[i] = 1` (placeholder — any lake number works if we end up not needing it) + + - If `rains[i] > 0` (rain day for lake `rains[i]`): + - Check if this lake is already in `full_lakes` + - If **not full**: add it to `full_lakes` with the current day index + - If **already full**: we need to dry it before today + - Binary search in `dry_days` for the smallest day index > when the lake was last filled + - If no such day exists, return `[]` (flood is unavoidable) + - Otherwise, use that dry day: set `result[dry_day] = lake_number`, remove the day from `dry_days` + - Update `full_lakes[lake]` to the current day + +   + + **Step 3: Return the result** + + - If we processed all days without returning early, return `result` + +   + + The greedy choice is to use the **earliest valid dry day** when a flood is imminent. This is optimal because it maximises flexibility for future scheduling. + + common_pitfalls: + - title: Pre-assigning Dry Days + description: | + A common mistake is trying to decide what lake to dry *on* a dry day. But you don't have enough information yet — you don't know which lakes will need drying in the future. + + For example, with `rains = [1, 0, 1]`, if you arbitrarily dry lake 1 on day 2, great! But with `rains = [1, 0, 2, 1]`, you don't know on day 2 whether you'll need that dry day for lake 1 or some other lake. + + The correct approach is to *defer* the decision until you actually need to prevent a flood. + wrong_approach: "Decide what to dry immediately on dry days" + correct_approach: "Save dry days and assign them when needed" + + - title: Using Any Available Dry Day + description: | + When a lake is about to flood, you might think any unused dry day would work. But the dry day must occur *after* the lake was last filled. + + For example, with `rains = [0, 1, 1]`: + - Day 0 is dry + - Day 1 fills lake 1 + - Day 2 rains on lake 1 again + + You cannot use day 0 to dry lake 1 — the lake wasn't even full yet! The dry day must be between the two rain events. This is why binary search is needed to find the first dry day **after** the lake was filled. + wrong_approach: "Use any available dry day" + correct_approach: "Binary search for a dry day after the lake was filled" + + - title: Linear Search for Dry Days + description: | + With up to `10^5` days and potentially many dry days to search through, a linear search for each flood prevention would result in O(n²) time complexity. + + Using a sorted list with binary search (or a balanced BST / SortedList in Python) reduces each lookup to O(log n), making the overall algorithm O(n log n). + wrong_approach: "Linear scan through dry days each time" + correct_approach: "Binary search in a sorted structure" + + key_takeaways: + - "**Deferred decision-making**: Don't assign resources until you know they're needed. Saving dry days and using them just-in-time gives maximum flexibility." + - "**Greedy + Binary Search**: When scheduling limited resources, use the earliest valid option to preserve later options for future needs." + - "**Hash map for state tracking**: `full_lakes` provides O(1) lookup to check if a lake is full and when it was last filled." + - "**Similar problems**: This pattern of matching resources to constraints appears in interval scheduling, task assignment, and meeting room problems." + + time_complexity: "O(n log n). Each of the n days is processed once, and dry day lookups use binary search (O(log n)). Insertions and deletions in a sorted structure are O(log n)." + space_complexity: "O(n). The hash map `full_lakes` and sorted list `dry_days` each store at most n entries." + +solutions: + - approach_name: Greedy with Binary Search + is_optimal: true + code: | + from sortedcontainers import SortedList + + def avoid_flood(rains: list[int]) -> list[int]: + n = len(rains) + result = [-1] * n + + # Track which lakes are full: {lake_id: day_it_was_filled} + full_lakes = {} + + # Sorted list of available dry day indices + dry_days = SortedList() + + for day in range(n): + lake = rains[day] + + if lake == 0: + # Dry day - save it for later, use placeholder value + dry_days.add(day) + result[day] = 1 # Placeholder (dry any lake) + else: + # Rain day for this lake + if lake in full_lakes: + # Lake is already full - we need to dry it! + last_filled = full_lakes[lake] + + # Find the earliest dry day AFTER the lake was filled + idx = dry_days.bisect_right(last_filled) + + if idx == len(dry_days): + # No valid dry day exists - flood is unavoidable + return [] + + # Use this dry day to dry the lake + dry_day = dry_days[idx] + result[dry_day] = lake + dry_days.remove(dry_day) + + # Mark the lake as full (or update when it was filled) + full_lakes[lake] = day + + return result + explanation: | + **Time Complexity:** O(n log n) — Each day is processed once. Binary search and sorted list operations are O(log n). + + **Space Complexity:** O(n) — Hash map and sorted list store at most n elements. + + We use `SortedList` from the `sortedcontainers` library for efficient binary search with insertion/deletion. When a lake is about to flood, we find the earliest dry day after it was filled. If no such day exists, a flood is unavoidable. + + - approach_name: Greedy with Heap (Alternative) + is_optimal: false + code: | + import heapq + from bisect import bisect_right + + def avoid_flood(rains: list[int]) -> list[int]: + n = len(rains) + result = [-1] * n + + # Track which lakes are full: {lake_id: day_it_was_filled} + full_lakes = {} + + # List of dry days (will use bisect for searching) + dry_days = [] + + for day in range(n): + lake = rains[day] + + if lake == 0: + dry_days.append(day) + result[day] = 1 # Placeholder + else: + if lake in full_lakes: + last_filled = full_lakes[lake] + + # Binary search for first dry day > last_filled + idx = bisect_right(dry_days, last_filled) + + if idx == len(dry_days): + return [] + + # Use and remove this dry day + dry_day = dry_days.pop(idx) + result[dry_day] = lake + + full_lakes[lake] = day + + return result + explanation: | + **Time Complexity:** O(n²) in worst case — While binary search is O(log n), `list.pop(idx)` is O(n) for middle elements. + + **Space Complexity:** O(n) — Same storage requirements. + + This uses Python's built-in `bisect` module instead of `SortedList`. It's simpler but less efficient because removing from the middle of a list is O(n). For interview purposes, this solution is often acceptable if you explain the trade-off and mention that a balanced BST or `SortedList` would improve it.