From 482672711538628e7291130fad1970ad6ce9204e Mon Sep 17 00:00:00 2001 From: LeTienQuyet <21520428@gm.uit.edu.vn> Date: Sun, 19 Oct 2025 10:32:39 +0700 Subject: [PATCH] Add question 189: Implement Pixel Normalization --- .../189_pixelnormalization/description.md | 2 ++ questions/189_pixelnormalization/example.json | 5 ++++ questions/189_pixelnormalization/learn.md | 25 +++++++++++++++++++ questions/189_pixelnormalization/meta.json | 15 +++++++++++ questions/189_pixelnormalization/solution.py | 4 +++ .../189_pixelnormalization/starter_code.py | 9 +++++++ questions/189_pixelnormalization/tests.json | 10 ++++++++ 7 files changed, 70 insertions(+) create mode 100644 questions/189_pixelnormalization/description.md create mode 100644 questions/189_pixelnormalization/example.json create mode 100644 questions/189_pixelnormalization/learn.md create mode 100644 questions/189_pixelnormalization/meta.json create mode 100644 questions/189_pixelnormalization/solution.py create mode 100644 questions/189_pixelnormalization/starter_code.py create mode 100644 questions/189_pixelnormalization/tests.json diff --git a/questions/189_pixelnormalization/description.md b/questions/189_pixelnormalization/description.md new file mode 100644 index 00000000..e867d364 --- /dev/null +++ b/questions/189_pixelnormalization/description.md @@ -0,0 +1,2 @@ +## Problem +Write a Python function to perform Pixel Normalization on a 4D input tensor with shape (B, C, H, W). The function should normalize each pixel across all channels by dividing its values by the square root of the mean squared activation across channels. \ No newline at end of file diff --git a/questions/189_pixelnormalization/example.json b/questions/189_pixelnormalization/example.json new file mode 100644 index 00000000..d782263e --- /dev/null +++ b/questions/189_pixelnormalization/example.json @@ -0,0 +1,5 @@ +{ + "input": "X.shape = (2, 2, 2, 2)", + "output": "Normalized tensor of shape (2, 2, 2, 2) where each spatial location is normalized across channels", + "reasoning": "For each spatial location, compute the mean of squared activations across all channels, take the square root to obtain the RMS value, and divide each channel’s activation by this value." +} diff --git a/questions/189_pixelnormalization/learn.md b/questions/189_pixelnormalization/learn.md new file mode 100644 index 00000000..94ca8904 --- /dev/null +++ b/questions/189_pixelnormalization/learn.md @@ -0,0 +1,25 @@ +## Understanding Pixel Normalization +Pixel Normalization (PN) is a normalization technique that normalizes feature vectors at each spatial location across channels. Pixel Normalization is particularly useful in generative models such as Progressive GANs, where it helps control feature magnitudes and promotes consistent feature scaling during training. +### Mathematical Definition +For an input tensor with the shape **(B, C, H, W)**, where: +* B: batch size +* C: number of channels +* H: height +* W: width +The normalization for each pixel at spatial position *(h, w)* is computed as follows: +$$ +x'_{b, c, h, w} = \frac{x_{b,c,h,w}}{\sqrt{\frac{1}{C}\sum_{i=1}^C x^2_{b,i,h,w}+\epsilon}} +$$ +where: +* $x_{b,c,h,w}$ is the pixel value of channel *c* at position *(h, w)* for sample *b*. +* $\epsilon$ is a small constant added for numerical stability (e.g., $10^-8$). + +This operation ensures that for every spatial *(h, w)*, the vector $[x'_{b, 1, h, w}, x'_{b, 2, h, w}, \ldots, x'_{b, C, h, w}]$ has unit norm, i.e: +$$ +\frac{1}{C}\sum_{i=1}^C (x'_{b, i, h, w})^2 = 1 +$$ +### Why Pixel Normalization +* **Batch size independence**: Pixel Normalization does not rely on batch-level statistics such as mean or variance, making it suitable for training with very small batch sizes, even batch size = 1. +* **Training stability**: Removing batch dependencies leads to smoother convergence and more deterministic training behavior, especially in GANs. +* **Stable feature scaling**: By normalizing each pixel accross channels, it prevents the uncontrolled growth of activations, ensuring consistent feature magnitudes. +* **No parameters**: No learnable paramters ($\gamma$, $\beta$), reducing computational overhead while maintain effectiveness in deep generative networks. \ No newline at end of file diff --git a/questions/189_pixelnormalization/meta.json b/questions/189_pixelnormalization/meta.json new file mode 100644 index 00000000..8b6aa218 --- /dev/null +++ b/questions/189_pixelnormalization/meta.json @@ -0,0 +1,15 @@ +{ + "id": "189", + "title": "Implement Pixel Normalization", + "difficulty": "easy", + "category": "Deep Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/LeTienQuyet", + "name": "LeTienQuyet" + } + ] +} \ No newline at end of file diff --git a/questions/189_pixelnormalization/solution.py b/questions/189_pixelnormalization/solution.py new file mode 100644 index 00000000..ae38b4e2 --- /dev/null +++ b/questions/189_pixelnormalization/solution.py @@ -0,0 +1,4 @@ +import numpy as np + +def pixel_normalization(X: np.ndarray, eps: float = 1e-8) -> np.ndarray: + return X / np.sqrt(np.mean(X**2, axis=1, keepdims=True) + eps) \ No newline at end of file diff --git a/questions/189_pixelnormalization/starter_code.py b/questions/189_pixelnormalization/starter_code.py new file mode 100644 index 00000000..b4b2a67c --- /dev/null +++ b/questions/189_pixelnormalization/starter_code.py @@ -0,0 +1,9 @@ +import numpy as np + +def pixel_normalization(X: np.ndarray, eps: float = 1e-8) -> np.ndarray: + """ + Perform pixel normalization on the input array X. + Each pixel value is divided by the square root of the mean of the squared pixel values + across each row, plus a small epsilon for numerical stability.""" + # Your code here + pass \ No newline at end of file diff --git a/questions/189_pixelnormalization/tests.json b/questions/189_pixelnormalization/tests.json new file mode 100644 index 00000000..e6ee6252 --- /dev/null +++ b/questions/189_pixelnormalization/tests.json @@ -0,0 +1,10 @@ +[ + { + "test": "np.random.seed(42)\nB, C, H, W = 2, 2, 2, 2\nX = np.random.randn(B, C, H, W)\noutput = pixel_normalization(X)\nprint(np.round(output, 4))", + "expected_output": "[[[[1.2792, -0.7191], [0.5366, 1.2629]], [[-0.603, -1.2177], [1.3084, 0.6364]]], [[[-1.2571, 0.3858], [-0.3669, -0.9021]], [[0.6479, -1.3606], [-1.3658, -1.0891]]]]" + }, + { + "test": "np.random.seed(42)\nB, C, H, W = 2, 2, 2, 1\nX = np.random.randn(B, C, H, W)\noutput = pixel_normalization(X)\nprint(np.round(output, 4))", + "expected_output": "[[[[0.8606], [-0.1279]], [[1.1222], [1.4084]]], [[[-0.2074],[-0.4127]], [[1.3989], [1.3527]]]]" + } +] \ No newline at end of file