diff --git a/questions/190_gradient-accumulation/description.md b/questions/190_gradient-accumulation/description.md new file mode 100644 index 00000000..0dd68528 --- /dev/null +++ b/questions/190_gradient-accumulation/description.md @@ -0,0 +1,3 @@ +## Problem + +Write a Python function `accumulate_gradients` that takes a list of numpy arrays (each representing gradients from a mini-batch) and returns a single numpy array representing the sum of all gradients. All input arrays will have the same shape. Only use standard Python and numpy. The returned array should be of type float and have the same shape as the input arrays. diff --git a/questions/190_gradient-accumulation/example.json b/questions/190_gradient-accumulation/example.json new file mode 100644 index 00000000..e53b0514 --- /dev/null +++ b/questions/190_gradient-accumulation/example.json @@ -0,0 +1,5 @@ +{ + "input": "import numpy as np\ngrad_list = [np.array([1.0, 2.0]), np.array([3.0, 4.0]), np.array([5.0, 6.0])]\naccum = accumulate_gradients(grad_list)\nprint(accum)", + "output": "[9. 12.]", + "reasoning": "The sum of [1.0, 2.0], [3.0, 4.0], and [5.0, 6.0] is [9.0, 12.0]. Each element is summed across all arrays." +} diff --git a/questions/190_gradient-accumulation/learn.md b/questions/190_gradient-accumulation/learn.md new file mode 100644 index 00000000..e137bf1c --- /dev/null +++ b/questions/190_gradient-accumulation/learn.md @@ -0,0 +1,34 @@ +# **Gradient Accumulation** + +## **1. Definition** +Gradient accumulation is a technique used in machine learning to simulate larger batch sizes by accumulating gradients over multiple mini-batches before performing an optimizer step. Instead of updating the model parameters after every mini-batch, gradients are summed (accumulated) over several mini-batches, and the update is performed only after a specified number of accumulations. + +## **2. Why Use Gradient Accumulation?** +* **Simulate Large Batch Training:** Allows training with an effective batch size larger than what fits in memory by splitting it into smaller mini-batches. +* **Stabilize Training:** Larger effective batch sizes can lead to more stable gradient estimates and smoother convergence. +* **Hardware Constraints:** Useful when GPU/TPU memory is limited and cannot accommodate large batches directly. + +## **3. Gradient Accumulation Mechanism** +Given a list of gradient arrays $g_1, g_2, \ldots, g_N$ (from $N$ mini-batches), the accumulated gradient $G$ is computed as: + +$$ +G = \sum_{i=1}^N g_i +$$ + +Where: +* $g_i$: The gradient array from the $i$-th mini-batch (numpy array) +* $N$: The number of mini-batches to accumulate +* $G$: The accumulated gradient (numpy array of the same shape) + +**Example:** +If $g_1 = [1, 2]$, $g_2 = [3, 4]$, $g_3 = [5, 6]$: +* $G = [1+3+5, 2+4+6] = [9, 12]$ + +## **4. Applications of Gradient Accumulation** +Gradient accumulation is widely used in training: +* **Large Models:** When training large models that require large batch sizes for stability or convergence. +* **Distributed Training:** To synchronize gradients across multiple devices or nodes. +* **Memory-Constrained Environments:** When hardware cannot fit the desired batch size in memory. +* **Any optimization problem** where effective batch size needs to be increased without increasing memory usage. + +Gradient accumulation is a simple yet powerful tool to enable flexible and efficient training in modern machine learning workflows. diff --git a/questions/190_gradient-accumulation/meta.json b/questions/190_gradient-accumulation/meta.json new file mode 100644 index 00000000..7d921bfe --- /dev/null +++ b/questions/190_gradient-accumulation/meta.json @@ -0,0 +1,15 @@ +{ + "id": "190", + "title": "Gradient Accumulation", + "difficulty": "easy", + "category": "Machine Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/komaksym", + "name": "komaksym" + } + ] +} diff --git a/questions/190_gradient-accumulation/pytorch/solution.py b/questions/190_gradient-accumulation/pytorch/solution.py new file mode 100644 index 00000000..9b74bcbd --- /dev/null +++ b/questions/190_gradient-accumulation/pytorch/solution.py @@ -0,0 +1,2 @@ +def your_function(...): + ... diff --git a/questions/190_gradient-accumulation/pytorch/starter_code.py b/questions/190_gradient-accumulation/pytorch/starter_code.py new file mode 100644 index 00000000..d3e5beb5 --- /dev/null +++ b/questions/190_gradient-accumulation/pytorch/starter_code.py @@ -0,0 +1,2 @@ +def your_function(...): + pass diff --git a/questions/190_gradient-accumulation/pytorch/tests.json b/questions/190_gradient-accumulation/pytorch/tests.json new file mode 100644 index 00000000..e4e4b180 --- /dev/null +++ b/questions/190_gradient-accumulation/pytorch/tests.json @@ -0,0 +1,6 @@ +[ + { + "test": "print(your_function(...))", + "expected_output": "..." + } +] diff --git a/questions/190_gradient-accumulation/solution.py b/questions/190_gradient-accumulation/solution.py new file mode 100644 index 00000000..5f4bd817 --- /dev/null +++ b/questions/190_gradient-accumulation/solution.py @@ -0,0 +1,13 @@ +import numpy as np + +def accumulate_gradients(grad_list): + """ + Accumulates (sums) a list of gradient arrays into a single array. + + Args: + grad_list (list of np.ndarray): List of gradient arrays, all of the same shape. + + Returns: + np.ndarray: The accumulated (summed) gradients, same shape as input arrays. + """ + return np.sum(grad_list, axis=0).astype(float) diff --git a/questions/190_gradient-accumulation/starter_code.py b/questions/190_gradient-accumulation/starter_code.py new file mode 100644 index 00000000..af0a948c --- /dev/null +++ b/questions/190_gradient-accumulation/starter_code.py @@ -0,0 +1,14 @@ +import numpy as np + +# Implement your function below. +def accumulate_gradients(grad_list): + """ + Accumulates (sums) a list of gradient arrays into a single array. + + Args: + grad_list (list of np.ndarray): List of gradient arrays, all of the same shape. + + Returns: + np.ndarray: The accumulated (summed) gradients, same shape as input arrays. + """ + pass diff --git a/questions/190_gradient-accumulation/tests.json b/questions/190_gradient-accumulation/tests.json new file mode 100644 index 00000000..55d2bf67 --- /dev/null +++ b/questions/190_gradient-accumulation/tests.json @@ -0,0 +1,22 @@ +[ + { + "test": "import numpy as np\ngrad_list = [np.array([1.0, 2.0]), np.array([3.0, 4.0]), np.array([5.0, 6.0])]\nprint(accumulate_gradients(grad_list))", + "expected_output": "[ 9. 12.]" + }, + { + "test": "import numpy as np\ngrad_list = [np.array([0.0, 0.0]), np.array([0.0, 0.0])]\nprint(accumulate_gradients(grad_list))", + "expected_output": "[0. 0.]" + }, + { + "test": "import numpy as np\ngrad_list = [np.array([1.5, -2.5]), np.array([-1.5, 2.5])]\nprint(accumulate_gradients(grad_list))", + "expected_output": "[0. 0.]" + }, + { + "test": "import numpy as np\ngrad_list = [np.array([10.0]), np.array([20.0]), np.array([30.0])]\nprint(accumulate_gradients(grad_list))", + "expected_output": "[60.]" + }, + { + "test": "import numpy as np\ngrad_list = [np.array([1.0, 2.0, 3.0]), np.array([4.0, 5.0, 6.0])]\nprint(accumulate_gradients(grad_list))", + "expected_output": "[5. 7. 9.]" + } +] diff --git a/questions/190_gradient-accumulation/tinygrad/solution.py b/questions/190_gradient-accumulation/tinygrad/solution.py new file mode 100644 index 00000000..9b74bcbd --- /dev/null +++ b/questions/190_gradient-accumulation/tinygrad/solution.py @@ -0,0 +1,2 @@ +def your_function(...): + ... diff --git a/questions/190_gradient-accumulation/tinygrad/starter_code.py b/questions/190_gradient-accumulation/tinygrad/starter_code.py new file mode 100644 index 00000000..d3e5beb5 --- /dev/null +++ b/questions/190_gradient-accumulation/tinygrad/starter_code.py @@ -0,0 +1,2 @@ +def your_function(...): + pass diff --git a/questions/190_gradient-accumulation/tinygrad/tests.json b/questions/190_gradient-accumulation/tinygrad/tests.json new file mode 100644 index 00000000..e4e4b180 --- /dev/null +++ b/questions/190_gradient-accumulation/tinygrad/tests.json @@ -0,0 +1,6 @@ +[ + { + "test": "print(your_function(...))", + "expected_output": "..." + } +]