From b0d4f16505afd5d743443d54065b80d6893d4b60 Mon Sep 17 00:00:00 2001
From: hoang <lebahoang010296@gmail.com>
Date: Tue, 12 Aug 2025 12:02:22 -0700
Subject: [PATCH 1/2] refactor solution

---
 .../solution.py                               | 59 ++++++++++---------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/questions/47_implement-gradient-descent-variants-with-mse-loss/solution.py b/questions/47_implement-gradient-descent-variants-with-mse-loss/solution.py
index fa4579b3..5d5f1634 100644
--- a/questions/47_implement-gradient-descent-variants-with-mse-loss/solution.py
+++ b/questions/47_implement-gradient-descent-variants-with-mse-loss/solution.py
@@ -1,32 +1,33 @@
 import numpy as np
 
-def gradient_descent(X, y, weights, learning_rate, n_iterations, batch_size=1, method='batch'):
-    m = len(y)
-    
+def gradient_descent(
+    X: np.ndarray,
+    y: np.ndarray,
+    weights: np.ndarray,
+    learning_rate: float,
+    n_iterations: int,
+    batch_size: int = 1,
+    method: str = "batch",
+):
+    m: int = X.shape[0]
+    n: int = X.shape[1]
+    w = np.zeros((n, 1))
+
+    match method:
+        case "batch":
+            batch_size: int = m
+        case "stochastic":
+            batch_size: int = 1
+        case "mini_batch":
+            batch_size: int = batch_size
+        case _:
+            return w
+
     for _ in range(n_iterations):
-        if method == 'batch':
-            # Calculate the gradient using all data points
-            predictions = X.dot(weights)
-            errors = predictions - y
-            gradient = 2 * X.T.dot(errors) / m
-            weights = weights - learning_rate * gradient
-        
-        elif method == 'stochastic':
-            # Update weights for each data point individually
-            for i in range(m):
-                prediction = X[i].dot(weights)
-                error = prediction - y[i]
-                gradient = 2 * X[i].T.dot(error)
-                weights = weights - learning_rate * gradient
-        
-        elif method == 'mini_batch':
-            # Update weights using sequential batches of data points without shuffling
-            for i in range(0, m, batch_size):
-                X_batch = X[i:i+batch_size]
-                y_batch = y[i:i+batch_size]
-                predictions = X_batch.dot(weights)
-                errors = predictions - y_batch
-                gradient = 2 * X_batch.T.dot(errors) / batch_size
-                weights = weights - learning_rate * gradient
-                
-    return weights
+        for i in range(0, m, batch_size):
+            x_batch = X[i : min(i + batch_size, m), :]
+            y_batch = y[i : min(i + batch_size, m)]
+            y_hat = x_batch @ w
+            derivative = x_batch.T @ (y_hat.reshape((-1, 1)) - y_batch.reshape((-1, 1)))
+            w = w - 2 * learning_rate / batch_size * derivative
+    return w.flatten()

From 8197b6226791b77f21b88edc354da5c47530e7dd Mon Sep 17 00:00:00 2001
From: hoang <lebahoang010296@gmail.com>
Date: Tue, 12 Aug 2025 12:04:28 -0700
Subject: [PATCH 2/2] add return type

---
 .../solution.py                                                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/questions/47_implement-gradient-descent-variants-with-mse-loss/solution.py b/questions/47_implement-gradient-descent-variants-with-mse-loss/solution.py
index 5d5f1634..5e296182 100644
--- a/questions/47_implement-gradient-descent-variants-with-mse-loss/solution.py
+++ b/questions/47_implement-gradient-descent-variants-with-mse-loss/solution.py
@@ -8,7 +8,7 @@ def gradient_descent(
     n_iterations: int,
     batch_size: int = 1,
     method: str = "batch",
-):
+) -> np.ndarray:
     m: int = X.shape[0]
     n: int = X.shape[1]
     w = np.zeros((n, 1))