From 5e2a9a96a8971b824e6f97333a45f2daff11da06 Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Mon, 15 Sep 2025 19:23:17 +0530 Subject: [PATCH 01/11] Added Two New Questions(Probability) --- build/1.json | 12 +++-- build/174.json | 51 +++++++++++++++++++ build/2.json | 8 +-- build/3.json | 8 +-- .../description.md | 3 ++ .../example.json | 5 ++ .../learn.md | 24 +++++++++ .../meta.json | 15 ++++++ .../solution.py | 14 +++++ .../starter_code.py | 7 +++ .../tests.json | 18 +++++++ .../description.md | 16 ++++++ .../example.json | 5 ++ .../183_pmf_normalization_constant 2/learn.md | 16 ++++++ .../meta.json | 15 ++++++ .../solution.py | 14 +++++ .../starter_code.py | 6 +++ .../tests.json | 10 ++++ 18 files changed, 235 insertions(+), 12 deletions(-) create mode 100644 build/174.json create mode 100644 questions/182_empirical_probability_mass_function_(pmf)/description.md create mode 100644 questions/182_empirical_probability_mass_function_(pmf)/example.json create mode 100644 questions/182_empirical_probability_mass_function_(pmf)/learn.md create mode 100644 questions/182_empirical_probability_mass_function_(pmf)/meta.json create mode 100644 questions/182_empirical_probability_mass_function_(pmf)/solution.py create mode 100644 questions/182_empirical_probability_mass_function_(pmf)/starter_code.py create mode 100644 questions/182_empirical_probability_mass_function_(pmf)/tests.json create mode 100644 questions/183_pmf_normalization_constant 2/description.md create mode 100644 questions/183_pmf_normalization_constant 2/example.json create mode 100644 questions/183_pmf_normalization_constant 2/learn.md create mode 100644 questions/183_pmf_normalization_constant 2/meta.json create mode 100644 questions/183_pmf_normalization_constant 2/solution.py create mode 100644 questions/183_pmf_normalization_constant 2/starter_code.py create mode 100644 questions/183_pmf_normalization_constant 2/tests.json diff --git a/build/1.json b/build/1.json index ad432627..09ddad8e 100644 --- a/build/1.json +++ b/build/1.json @@ -37,16 +37,20 @@ "expected_output": "[5.5, 10.0]" } ], - "tinygrad_starter_code": "from tinygrad.tensor import Tensor\n\ndef matrix_dot_vector_tg(a, b) -> Tensor:\n \"\"\"\n Compute the product of matrix `a` and vector `b` using tinygrad.\n Inputs can be Python lists, NumPy arrays, or tinygrad Tensors.\n Returns a 1-D Tensor of length m, or Tensor(-1) if dimensions mismatch.\n \"\"\"\n # Dimension mismatch check\n if len(a[0]) != len(b):\n return Tensor(-1)\n # Convert to Tensor\n a_t = Tensor(a)\n b_t = Tensor(b)\n # Your implementation here\n pass", - "tinygrad_solution": "from tinygrad.tensor import Tensor\n\ndef matrix_dot_vector_tg(a, b) -> Tensor:\n \"\"\"\n Compute the product of matrix `a` and vector `b` using tinygrad.\n Inputs can be Python lists, NumPy arrays, or tinygrad Tensors.\n Returns a 1-D Tensor of length m, or Tensor(-1) if dimensions mismatch.\n \"\"\"\n if len(a[0]) != len(b):\n return Tensor(-1)\n a_t = Tensor(a)\n b_t = Tensor(b)\n return a_t.matmul(b_t)", + "tinygrad_starter_code": "from tinygrad.tensor import Tensor\n\ndef matrix_dot_vector_tg(a:Tensor, b:Tensor) -> Tensor:\n \"\"\"\n Compute the product of matrix `a` and vector `b` using tinygrad.\n Will be tinygrad Tensors.\n Returns a 1-D Tensor of length m, or Tensor(-1) if dimensions mismatch.\n \"\"\"\n pass", + "tinygrad_solution": "from tinygrad.tensor import Tensor\n\ndef matrix_dot_vector_tg(a: Tensor, b: Tensor) -> Tensor:\n \"\"\"\n Compute the product of matrix `a` and vector `b` using tinygrad.\n Inputs will be tinygrad Tensors.\n Returns a 1-D Tensor of length m, or Tensor(-1) if dimensions mismatch.\n \"\"\"\n if len(a[0]) != len(b):\n return Tensor(-1)\n return a @ b", "tinygrad_test_cases": [ { - "test": "from tinygrad.tensor import Tensor\nres = matrix_dot_vector_tg(\n [[1,2,3],[2,4,5],[6,8,9]],\n [1,2,3]\n)\nprint(res.numpy().tolist())", + "test": "from tinygrad.tensor import Tensor\nres = matrix_dot_vector_tg(\n Tensor([[1,2,3],[2,4,5],[6,8,9]]),\n Tensor([1,2,3])\n)\nprint(res.numpy().tolist())", "expected_output": "[14.0, 25.0, 49.0]" }, { - "test": "from tinygrad.tensor import Tensor\nres = matrix_dot_vector_tg(\n [[1,2,3],[2,4,5]],\n [1,2]\n)\nprint(res.numpy().tolist())", + "test": "from tinygrad.tensor import Tensor\nres = matrix_dot_vector_tg(\n Tensor([[1,2,3],[2,4,5]]),\n Tensor([1,2])\n)\nprint(res.numpy().tolist())", "expected_output": "-1" + }, + { + "test": "from tinygrad.tensor import Tensor\nres = matrix_dot_vector_tg(\n Tensor([[1, 2], [2, 4]]),\n Tensor([1, 2])\n)\nprint(res.numpy().tolist())", + "expected_output": "[5, 10]" } ], "pytorch_starter_code": "import torch\n\ndef matrix_dot_vector(a, b) -> torch.Tensor:\n \"\"\"\n Compute the product of matrix `a` and vector `b` using PyTorch.\n Inputs can be Python lists, NumPy arrays, or torch Tensors.\n Returns a 1-D tensor of length m, or tensor(-1) if dimensions mismatch.\n \"\"\"\n a_t = torch.as_tensor(a, dtype=torch.float)\n b_t = torch.as_tensor(b, dtype=torch.float)\n # Dimension mismatch check\n if a_t.size(1) != b_t.size(0):\n return torch.tensor(-1)\n # Your implementation here\n pass", diff --git a/build/174.json b/build/174.json new file mode 100644 index 00000000..7e51f036 --- /dev/null +++ b/build/174.json @@ -0,0 +1,51 @@ +{ + "id": "174", + "title": "Train a Simple GAN on 1D Gaussian Data", + "difficulty": "hard", + "category": "Deep Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/moe18", + "name": "moe" + } + ], + "pytorch_difficulty": "medium", + "description": "In this task, you will train a Generative Adversarial Network (GAN) to learn a one-dimensional Gaussian distribution. The GAN consists of a generator that produces samples from latent noise and a discriminator that estimates the probability that a given sample is real. Both networks should have one hidden layer with ReLU activation in the hidden layer. The generator’s output layer is linear, while the discriminator's output layer uses a sigmoid activation.\n\nYou must train the GAN using the standard non-saturating GAN loss for the generator and binary cross-entropy loss for the discriminator. In the NumPy version, parameters should be updated using vanilla gradient descent. In the PyTorch version, parameters should be updated using stochastic gradient descent (SGD) with the specified learning rate. The training loop should alternate between updating the discriminator and the generator each iteration.\n\nYour function must return the trained generator forward function `gen_forward(z)`, which produces generated samples given latent noise.", + "learn_section": "## Understanding GANs for 1D Gaussian Data\nA Generative Adversarial Network (GAN) consists of two neural networks - a **Generator** $G_\\theta$ and a **Discriminator** $D_\\phi$ - trained in a minimax game.\n\n### 1. The Roles\n- **Generator** $G_\\theta(z)$: Takes a latent noise vector $z \\sim \\mathcal{N}(0, I)$ and outputs a sample intended to resemble the real data.\n- **Discriminator** $D_\\phi(x)$: Outputs a probability $p \\in (0, 1)$ that the input $x$ came from the real data distribution rather than the generator.\n\n### 2. The Objective\nThe classical GAN objective is:\n$$\n\\min_{\\theta} \\; \\max_{\\phi} \\; \\mathbb{E}_{x \\sim p_{\\text{data}}} [\\log D_\\phi(x)] + \\mathbb{E}_{z \\sim p(z)} [\\log (1 - D_\\phi(G_\\theta(z)))]\n$$\nHere:\n- $p_{\\text{data}}$ is the real data distribution.\n- $p(z)$ is the prior distribution for the latent noise (often standard normal).\n\n### 3. Practical Losses\nIn implementation, we minimize:\n- **Discriminator loss**:\n$$\n\\mathcal{L}_D = - \\left( \\frac{1}{m} \\sum_{i=1}^m \\log D(x^{(i)}_{\\text{real}}) + \\log(1 - D(x^{(i)}_{\\text{fake}})) \\right)\n$$\n- **Generator loss** (non-saturating form):\n$$\n\\mathcal{L}_G = - \\frac{1}{m} \\sum_{i=1}^m \\log D(G(z^{(i)}))\n$$\n\n### 4. Forward/Backward Flow\n1. **Discriminator step**: Real samples $x_{\\text{real}}$ and fake samples $x_{\\text{fake}} = G(z)$ are passed through $D$, and $\\mathcal{L}_D$ is minimized w.r.t. $\\phi$.\n2. **Generator step**: Fresh $z$ is sampled, $x_{\\text{fake}} = G(z)$ is passed through $D$, and $\\mathcal{L}_G$ is minimized w.r.t. $\\theta$ while keeping $\\phi$ fixed.\n\n### 5. Architecture for This Task\n- **Generator**: Fully connected layer ($\\mathbb{R}^{\\text{latent\\_dim}} \\to \\mathbb{R}^{\\text{hidden\\_dim}}$) -> ReLU -> Fully connected layer ($\\mathbb{R}^{\\text{hidden\\_dim}} \\to \\mathbb{R}^1$).\n- **Discriminator**: Fully connected layer ($\\mathbb{R}^1 \\to \\mathbb{R}^{\\text{hidden\\_dim}}$) → ReLU → Fully connected layer ($\\mathbb{R}^{\\text{hidden\\_dim}} \\to \\mathbb{R}^1$) → Sigmoid.\n\n### 6. Numerical Tips\n- Initialize weights with a small Gaussian ($\\mathcal{N}(0, 0.01)$).\n- Add $10^{-8}$ to logs for numerical stability.\n- Use a consistent batch size $m$ for both real and fake samples.\n- Always sample fresh noise for the generator on each update.\n\n**Your Task**: Implement the training loop to learn the parameters $\\theta$ and $\\phi$, and return the trained `gen_forward(z)` function. The evaluation (mean/std of generated samples) will be handled in the test cases.", + "starter_code": "import numpy as np\n\ndef train_gan(mean_real: float, std_real: float, latent_dim: int = 1, hidden_dim: int = 16, learning_rate: float = 0.001, epochs: int = 5000, batch_size: int = 128, seed: int = 42):\n \"\"\"\n Train a simple GAN to learn a 1D Gaussian distribution.\n\n Args:\n mean_real: Mean of the target Gaussian\n std_real: Std of the target Gaussian\n latent_dim: Dimension of the noise input to the generator\n hidden_dim: Hidden layer size for both networks\n learning_rate: Learning rate for gradient descent\n epochs: Number of training epochs\n batch_size: Training batch size\n seed: Random seed for reproducibility\n\n Returns:\n gen_forward: A function that takes z and returns generated samples\n \"\"\"\n # Your code here\n pass", + "solution": "import numpy as np\n\ndef relu(x):\n return np.maximum(0, x)\n\ndef sigmoid(x):\n return 1 / (1 + np.exp(-x))\n\ndef train_gan(mean_real: float, std_real: float, latent_dim: int = 1, hidden_dim: int = 16, learning_rate: float = 0.001, epochs: int = 5000, batch_size: int = 128, seed: int = 42):\n np.random.seed(seed)\n data_dim = 1\n\n # Initialize generator weights\n w1_g = np.random.normal(0, 0.01, (latent_dim, hidden_dim))\n b1_g = np.zeros(hidden_dim)\n w2_g = np.random.normal(0, 0.01, (hidden_dim, data_dim))\n b2_g = np.zeros(data_dim)\n\n # Initialize discriminator weights\n w1_d = np.random.normal(0, 0.01, (data_dim, hidden_dim))\n b1_d = np.zeros(hidden_dim)\n w2_d = np.random.normal(0, 0.01, (hidden_dim, 1))\n b2_d = np.zeros(1)\n\n def disc_forward(x):\n h1 = np.dot(x, w1_d) + b1_d\n a1 = relu(h1)\n logit = np.dot(a1, w2_d) + b2_d\n p = sigmoid(logit)\n return p, logit, a1, h1\n\n def gen_forward(z):\n h1 = np.dot(z, w1_g) + b1_g\n a1 = relu(h1)\n x_gen = np.dot(a1, w2_g) + b2_g\n return x_gen, a1, h1\n\n for epoch in range(epochs):\n # Sample real data\n x_real = np.random.normal(mean_real, std_real, batch_size)[:, None]\n z = np.random.normal(0, 1, (batch_size, latent_dim))\n x_fake, _, _ = gen_forward(z)\n\n # Discriminator forward\n p_real, _, a1_real, h1_real = disc_forward(x_real)\n p_fake, _, a1_fake, h1_fake = disc_forward(x_fake)\n\n # Discriminator gradients\n grad_logit_real = - (1 - p_real) / batch_size\n grad_a1_real = grad_logit_real @ w2_d.T\n grad_h1_real = grad_a1_real * (h1_real > 0)\n grad_w1_d_real = x_real.T @ grad_h1_real\n grad_b1_d_real = np.sum(grad_h1_real, axis=0)\n grad_w2_d_real = a1_real.T @ grad_logit_real\n grad_b2_d_real = np.sum(grad_logit_real, axis=0)\n\n grad_logit_fake = p_fake / batch_size\n grad_a1_fake = grad_logit_fake @ w2_d.T\n grad_h1_fake = grad_a1_fake * (h1_fake > 0)\n grad_w1_d_fake = x_fake.T @ grad_h1_fake\n grad_b1_d_fake = np.sum(grad_h1_fake, axis=0)\n grad_w2_d_fake = a1_fake.T @ grad_logit_fake\n grad_b2_d_fake = np.sum(grad_logit_fake, axis=0)\n\n grad_w1_d = grad_w1_d_real + grad_w1_d_fake\n grad_b1_d = grad_b1_d_real + grad_b1_d_fake\n grad_w2_d = grad_w2_d_real + grad_w2_d_fake\n grad_b2_d = grad_b2_d_real + grad_b2_d_fake\n\n w1_d -= learning_rate * grad_w1_d\n b1_d -= learning_rate * grad_b1_d\n w2_d -= learning_rate * grad_w2_d\n b2_d -= learning_rate * grad_b2_d\n\n # Generator update\n z = np.random.normal(0, 1, (batch_size, latent_dim))\n x_fake, a1_g, h1_g = gen_forward(z)\n p_fake, _, a1_d, h1_d = disc_forward(x_fake)\n\n grad_logit_fake = - (1 - p_fake) / batch_size\n grad_a1_d = grad_logit_fake @ w2_d.T\n grad_h1_d = grad_a1_d * (h1_d > 0)\n grad_x_fake = grad_h1_d @ w1_d.T\n\n grad_a1_g = grad_x_fake @ w2_g.T\n grad_h1_g = grad_a1_g * (h1_g > 0)\n grad_w1_g = z.T @ grad_h1_g\n grad_b1_g = np.sum(grad_h1_g, axis=0)\n grad_w2_g = a1_g.T @ grad_x_fake\n grad_b2_g = np.sum(grad_x_fake, axis=0)\n\n w1_g -= learning_rate * grad_w1_g\n b1_g -= learning_rate * grad_b1_g\n w2_g -= learning_rate * grad_w2_g\n b2_g -= learning_rate * grad_b2_g\n\n return gen_forward", + "example": { + "input": "gen_forward = train_gan(4.0, 1.25, epochs=1000, seed=42)\nz = np.random.normal(0, 1, (500, 1))\nx_gen, _, _ = gen_forward(z)\n(round(np.mean(x_gen), 4), round(np.std(x_gen), 4))", + "output": "(0.0004, 0.0002)", + "reasoning": "The test cases call `gen_forward` after training, sample 500 points, and then compute the mean and std." + }, + "test_cases": [ + { + "test": "gen_forward = train_gan(4.0, 1.25, epochs=1000, seed=42)\nz = np.random.normal(0, 1, (500, 1))\nx_gen, _, _ = gen_forward(z)\nprint((round(np.mean(x_gen), 4), round(np.std(x_gen), 4)))", + "expected_output": "(0.0004, 0.0002)" + }, + { + "test": "gen_forward = train_gan(0.0, 1.0, epochs=500, seed=0)\nz = np.random.normal(0, 1, (300, 1))\nx_gen, _, _ = gen_forward(z)\nprint((round(np.mean(x_gen), 4), round(np.std(x_gen), 4)))", + "expected_output": "(-0.0002, 0.0002)" + }, + { + "test": "gen_forward = train_gan(-2.0, 0.5, epochs=1500, seed=123)\nz = np.random.normal(0, 1, (400, 1))\nx_gen, _, _ = gen_forward(z)\nprint((round(np.mean(x_gen), 4), round(np.std(x_gen), 4)))", + "expected_output": "(-0.0044, 0.0002)" + } + ], + "pytorch_starter_code": "import torch\nimport torch.nn as nn\nimport torch.optim as optim\n\ndef train_gan(mean_real: float, std_real: float, latent_dim: int = 1, hidden_dim: int = 16, learning_rate: float = 0.001, epochs: int = 5000, batch_size: int = 128, seed: int = 42):\n torch.manual_seed(seed)\n # Your PyTorch implementation here\n pass", + "pytorch_solution": "import torch\nimport torch.nn as nn\nimport torch.optim as optim\n\ndef train_gan(mean_real: float, std_real: float, latent_dim: int = 1, hidden_dim: int = 16, learning_rate: float = 0.001, epochs: int = 5000, batch_size: int = 128, seed: int = 42):\n torch.manual_seed(seed)\n\n class Generator(nn.Module):\n def __init__(self):\n super().__init__()\n self.net = nn.Sequential(\n nn.Linear(latent_dim, hidden_dim),\n nn.ReLU(),\n nn.Linear(hidden_dim, 1)\n )\n def forward(self, z):\n return self.net(z)\n\n class Discriminator(nn.Module):\n def __init__(self):\n super().__init__()\n self.net = nn.Sequential(\n nn.Linear(1, hidden_dim),\n nn.ReLU(),\n nn.Linear(hidden_dim, 1),\n nn.Sigmoid()\n )\n def forward(self, x):\n return self.net(x)\n\n G = Generator()\n D = Discriminator()\n\n # Use SGD as requested\n opt_G = optim.SGD(G.parameters(), lr=learning_rate)\n opt_D = optim.SGD(D.parameters(), lr=learning_rate)\n criterion = nn.BCELoss()\n\n for _ in range(epochs):\n # Real and fake batches\n real_data = torch.normal(mean_real, std_real, size=(batch_size, 1))\n noise = torch.randn(batch_size, latent_dim)\n fake_data = G(noise)\n\n # ----- Discriminator step -----\n opt_D.zero_grad()\n pred_real = D(real_data)\n pred_fake = D(fake_data.detach())\n loss_real = criterion(pred_real, torch.ones_like(pred_real))\n loss_fake = criterion(pred_fake, torch.zeros_like(pred_fake))\n loss_D = loss_real + loss_fake\n loss_D.backward()\n opt_D.step()\n\n # ----- Generator step -----\n opt_G.zero_grad()\n pred_fake = D(fake_data)\n # non-saturating generator loss: maximize log D(G(z)) -> minimize -log D(G(z))\n loss_G = criterion(pred_fake, torch.ones_like(pred_fake))\n loss_G.backward()\n opt_G.step()\n\n return G.forward", + "pytorch_test_cases": [ + { + "test": "gen_forward = train_gan(4.0, 1.25, epochs=100, seed=42)\nz = torch.randn(500, 1)\nx_gen = gen_forward(z)\nprint((round(x_gen.mean().item(), 4), round(x_gen.std().item(), 4)))", + "expected_output": "(0.4725, 0.3563)" + }, + { + "test": "gen_forward = train_gan(0.0, 1.0, epochs=50, seed=0)\nz = torch.randn(300, 1)\nx_gen = gen_forward(z)\nprint((round(x_gen.mean().item(), 4), round(x_gen.std().item(), 4)))", + "expected_output": "(0.0644, 0.244)" + } + ] +} \ No newline at end of file diff --git a/build/2.json b/build/2.json index 8faf4c09..e42cf6db 100644 --- a/build/2.json +++ b/build/2.json @@ -33,15 +33,15 @@ "expected_output": "[[1, 4], [2, 5], [3, 6]]" } ], - "tinygrad_starter_code": "from tinygrad.tensor import Tensor\n\ndef transpose_matrix_tg(a) -> Tensor:\n \"\"\"\n Transpose a 2D matrix `a` using tinygrad.\n Inputs can be Python lists, NumPy arrays, or tinygrad Tensors.\n Returns a transposed Tensor.\n \"\"\"\n # Convert to Tensor\n a_t = Tensor(a)\n # Your implementation here\n pass", - "tinygrad_solution": "from tinygrad.tensor import Tensor\n\ndef transpose_matrix_tg(a) -> Tensor:\n \"\"\"\n Transpose a 2D matrix `a` using tinygrad.\n Inputs can be Python lists, NumPy arrays, or tinygrad Tensors.\n Returns a transposed Tensor.\n \"\"\"\n a_t = Tensor(a)\n return a_t.transpose(0,1)", + "tinygrad_starter_code": "from tinygrad.tensor import Tensor\n\ndef transpose_matrix_tg(a:Tensor) -> Tensor:\n \"\"\"\n Transpose a 2D matrix `a` using tinygrad.\n Inputs are tinygrad Tensors.\n Returns a transposed Tensor.\n \"\"\"\n pass", + "tinygrad_solution": "from tinygrad.tensor import Tensor\n\ndef transpose_matrix_tg(a) -> Tensor:\n \"\"\"\n Transpose a 2D matrix `a` using tinygrad.\n Inputs are tinygrad Tensors.\n Returns a transposed Tensor.\n \"\"\"\n return a.T", "tinygrad_test_cases": [ { - "test": "from tinygrad.tensor import Tensor\nres = transpose_matrix_tg([[1,2,3],[4,5,6]])\nprint(res.numpy().tolist())", + "test": "from tinygrad.tensor import Tensor\nres = transpose_matrix_tg(Tensor([[1,2,3],[4,5,6]]))\nprint(res.numpy().tolist())", "expected_output": "[[1, 4], [2, 5], [3, 6]]" }, { - "test": "from tinygrad.tensor import Tensor\nres = transpose_matrix_tg([[1,2],[3,4]])\nprint(res.numpy().tolist())", + "test": "from tinygrad.tensor import Tensor\nres = transpose_matrix_tg(Tensor([[1,2],[3,4]]))\nprint(res.numpy().tolist())", "expected_output": "[[1, 3], [2, 4]]" } ], diff --git a/build/3.json b/build/3.json index 5aecd531..366f7b2a 100644 --- a/build/3.json +++ b/build/3.json @@ -46,15 +46,15 @@ "expected_output": "[[1, 2, 3, 4], [5, 6, 7, 8]]" } ], - "tinygrad_starter_code": "from tinygrad.tensor import Tensor\n\ndef reshape_matrix_tg(a, new_shape) -> Tensor:\n \"\"\"\n Reshape a 2D matrix `a` to shape `new_shape` using tinygrad.\n Inputs can be Python lists, NumPy arrays, or tinygrad Tensors.\n Returns a Tensor of shape `new_shape`, or an empty Tensor on mismatch.\n \"\"\"\n # Dimension check\n if len(a) * len(a[0]) != new_shape[0] * new_shape[1]:\n return Tensor([])\n # Convert to Tensor and reshape\n a_t = Tensor(a)\n # Your implementation here\n pass", - "tinygrad_solution": "from tinygrad.tensor import Tensor\n\ndef reshape_matrix_tg(a, new_shape) -> Tensor:\n \"\"\"\n Reshape a 2D matrix `a` to shape `new_shape` using tinygrad.\n Inputs can be Python lists, NumPy arrays, or tinygrad Tensors.\n Returns a Tensor of shape `new_shape`, or an empty Tensor on mismatch.\n \"\"\"\n # Dimension check\n if len(a) * len(a[0]) != new_shape[0] * new_shape[1]:\n return Tensor([])\n a_t = Tensor(a)\n return a_t.reshape(new_shape)", + "tinygrad_starter_code": "from tinygrad.tensor import Tensor\n\ndef reshape_matrix_tg(a:Tensor, new_shape:tuple) -> Tensor:\n \"\"\"\n Reshape a 2D matrix `a` to shape `new_shape` using tinygrad.\n Inputs are tinygrad Tensors.\n Returns a Tensor of shape `new_shape`, or an empty Tensor on mismatch.\n \"\"\"\n pass", + "tinygrad_solution": "from tinygrad.tensor import Tensor\n\ndef reshape_matrix_tg(a, new_shape) -> Tensor:\n \"\"\"\n Reshape a 2D matrix `a` to shape `new_shape` using tinygrad.\n Inputs are tinygrad Tensors.\n Returns a Tensor of shape `new_shape`, or an empty Tensor on mismatch.\n \"\"\"\n # Dimension check\n if len(a) * len(a[0]) != new_shape[0] * new_shape[1]:\n return Tensor([])\n return a.reshape(new_shape)", "tinygrad_test_cases": [ { - "test": "from tinygrad.tensor import Tensor\nres = reshape_matrix_tg(\n [[1,2,3],[4,5,6]],\n (3, 2)\n)\nprint(res.numpy().tolist())", + "test": "from tinygrad.tensor import Tensor\nres = reshape_matrix_tg(\n Tensor([[1,2,3],[4,5,6]]),\n (3, 2)\n)\nprint(res.numpy().tolist())", "expected_output": "[[1, 2], [3, 4], [5, 6]]" }, { - "test": "from tinygrad.tensor import Tensor\nres = reshape_matrix_tg(\n [[1,2],[3,4]],\n (3, 2)\n)\nprint(res.numpy().tolist())", + "test": "from tinygrad.tensor import Tensor\nres = reshape_matrix_tg(\n Tensor([[1,2],[3,4]]),\n (3, 2)\n)\nprint(res.numpy().tolist())", "expected_output": "[]" } ], diff --git a/questions/182_empirical_probability_mass_function_(pmf)/description.md b/questions/182_empirical_probability_mass_function_(pmf)/description.md new file mode 100644 index 00000000..9a49f526 --- /dev/null +++ b/questions/182_empirical_probability_mass_function_(pmf)/description.md @@ -0,0 +1,3 @@ +## Problem + +Given a list of integer samples drawn from a discrete distribution, implement a function to compute the empirical Probability Mass Function (PMF). The function should return a list of `(value, probability)` pairs sorted by the value in ascending order. If the input is empty, return an empty list. diff --git a/questions/182_empirical_probability_mass_function_(pmf)/example.json b/questions/182_empirical_probability_mass_function_(pmf)/example.json new file mode 100644 index 00000000..a00d5e97 --- /dev/null +++ b/questions/182_empirical_probability_mass_function_(pmf)/example.json @@ -0,0 +1,5 @@ +{ + "input": "samples = [1, 2, 2, 3, 3, 3]", + "output": "[(1, 0.16666666666666666), (2, 0.3333333333333333), (3, 0.5)]", + "reasoning": "Counts are {1:1, 2:2, 3:3} over 6 samples, so probabilities are 1/6, 2/6, and 3/6 respectively, returned sorted by value." +} diff --git a/questions/182_empirical_probability_mass_function_(pmf)/learn.md b/questions/182_empirical_probability_mass_function_(pmf)/learn.md new file mode 100644 index 00000000..923306e3 --- /dev/null +++ b/questions/182_empirical_probability_mass_function_(pmf)/learn.md @@ -0,0 +1,24 @@ + +# Learn Section + +# Probability Mass Function (PMF) — Simple Explanation + +A **probability mass function (PMF)** describes how probabilities are assigned to the possible outcomes of a **discrete random variable**. + +- It tells you the chance of each specific outcome. +- Each probability is non-negative. +- The total of all probabilities adds up to 1. + +## Estimating from data +If the true probabilities are unknown, you can estimate them with an **empirical PMF**: +- Count how often each outcome appears. +- Divide by the total number of observations. + +## Example +Observed sequence: `1, 2, 2, 3, 3, 3` (6 outcomes total) +- “1” appears once → estimated probability = 1/6 +- “2” appears twice → estimated probability = 2/6 = 1/3 +- “3” appears three times → estimated probability = 3/6 = 1/2 + + + \ No newline at end of file diff --git a/questions/182_empirical_probability_mass_function_(pmf)/meta.json b/questions/182_empirical_probability_mass_function_(pmf)/meta.json new file mode 100644 index 00000000..a5fc5556 --- /dev/null +++ b/questions/182_empirical_probability_mass_function_(pmf)/meta.json @@ -0,0 +1,15 @@ +{ + "id": "182", + "title": "Empirical Probability Mass Function (PMF)", + "difficulty": "easy", + "category": "Probability & Statistics", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/jeetmukherjee", + "name": "jeetmukherjee" + } + ] +} diff --git a/questions/182_empirical_probability_mass_function_(pmf)/solution.py b/questions/182_empirical_probability_mass_function_(pmf)/solution.py new file mode 100644 index 00000000..b54775fe --- /dev/null +++ b/questions/182_empirical_probability_mass_function_(pmf)/solution.py @@ -0,0 +1,14 @@ +from collections import Counter + +def empirical_pmf(samples): + """ + Given an iterable of integer samples, return a list of (value, probability) + pairs sorted by value ascending. + """ + samples = list(samples) + if not samples: + return [] + total = len(samples) + cnt = Counter(samples) + result = [(k, cnt[k] / total) for k in sorted(cnt.keys())] + return result \ No newline at end of file diff --git a/questions/182_empirical_probability_mass_function_(pmf)/starter_code.py b/questions/182_empirical_probability_mass_function_(pmf)/starter_code.py new file mode 100644 index 00000000..32b35c14 --- /dev/null +++ b/questions/182_empirical_probability_mass_function_(pmf)/starter_code.py @@ -0,0 +1,7 @@ +def empirical_pmf(samples): + """ + Given an iterable of integer samples, return a list of (value, probability) + pairs sorted by value ascending. + """ + # TODO: Implement the function + pass diff --git a/questions/182_empirical_probability_mass_function_(pmf)/tests.json b/questions/182_empirical_probability_mass_function_(pmf)/tests.json new file mode 100644 index 00000000..d9cbb76b --- /dev/null +++ b/questions/182_empirical_probability_mass_function_(pmf)/tests.json @@ -0,0 +1,18 @@ +[ + { + "test": "print(empirical_pmf([1, 2, 2, 3, 3, 3]))", + "expected_output": "[(1, 0.16666666666666666), (2, 0.3333333333333333), (3, 0.5)]" + }, + { + "test": "print(empirical_pmf([5, 5, 5, 5]))", + "expected_output": "[(5, 1.0)]" + }, + { + "test": "print(empirical_pmf([]))", + "expected_output": "[]" + }, + { + "test": "print(empirical_pmf([0, 0, 1, 1, 1, 2]))", + "expected_output": "[(0, 0.3333333333333333), (1, 0.5), (2, 0.16666666666666666)]" + } +] diff --git a/questions/183_pmf_normalization_constant 2/description.md b/questions/183_pmf_normalization_constant 2/description.md new file mode 100644 index 00000000..7b00dfcc --- /dev/null +++ b/questions/183_pmf_normalization_constant 2/description.md @@ -0,0 +1,16 @@ +## Problem + +A discrete random variable `X` takes values 0 through 7 with probabilities: + +- P(X=0) = 0 +- P(X=1) = K +- P(X=2) = 2K +- P(X=3) = 2K +- P(X=4) = 3K +- P(X=5) = K^2 +- P(X=6) = 2K^2 +- P(X=7) = 7K^2 + K + +Find the value of the normalization constant `K` such that the above defines a valid PMF (i.e., probabilities are non‑negative and sum to 1). + +Implement a function `find_k()` that returns `K` as a Python float. diff --git a/questions/183_pmf_normalization_constant 2/example.json b/questions/183_pmf_normalization_constant 2/example.json new file mode 100644 index 00000000..6d92c364 --- /dev/null +++ b/questions/183_pmf_normalization_constant 2/example.json @@ -0,0 +1,5 @@ +{ + "input": "No input; call find_k()", + "output": "0.1", + "reasoning": "From the normalization condition, 10K^2 + 9K = 1 gives K = 0.1 (non-negative root)." +} diff --git a/questions/183_pmf_normalization_constant 2/learn.md b/questions/183_pmf_normalization_constant 2/learn.md new file mode 100644 index 00000000..a8835809 --- /dev/null +++ b/questions/183_pmf_normalization_constant 2/learn.md @@ -0,0 +1,16 @@ +## Solution Explanation + +For a valid PMF, probabilities must sum to 1. + +Sum all terms: + +- Linear in K: K + 2K + 2K + 3K + K = 9K +- Quadratic in K: K^2 + 2K^2 + 7K^2 = 10K^2 + +Therefore: 10K^2 + 9K = 1 => 10K^2 + 9K - 1 = 0 + +Solve the quadratic: K = [-9 ± sqrt(81 + 40)] / 20 = [-9 ± 11] / 20 + +Feasible solution (K ≥ 0): K = 2/20 = 0.1 + +So the normalization constant is K = 0.1. diff --git a/questions/183_pmf_normalization_constant 2/meta.json b/questions/183_pmf_normalization_constant 2/meta.json new file mode 100644 index 00000000..ddb93b38 --- /dev/null +++ b/questions/183_pmf_normalization_constant 2/meta.json @@ -0,0 +1,15 @@ +{ + "id": "183", + "title": "Find PMF Normalization Constant", + "difficulty": "easy", + "category": "Probability & Statistics", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/jeetmukherjee", + "name": "jeetmukherjee" + } + ] +} diff --git a/questions/183_pmf_normalization_constant 2/solution.py b/questions/183_pmf_normalization_constant 2/solution.py new file mode 100644 index 00000000..5836d3de --- /dev/null +++ b/questions/183_pmf_normalization_constant 2/solution.py @@ -0,0 +1,14 @@ +import math + +def find_k(): + """ + Solve 10*K^2 + 9*K - 1 = 0 and return the non-negative root. + """ + a = 10.0 + b = 9.0 + c = -1.0 + discriminant = b * b - 4 * a * c + sqrt_disc = math.sqrt(discriminant) + k1 = (-b + sqrt_disc) / (2 * a) + k2 = (-b - sqrt_disc) / (2 * a) + return k1 if k1 >= 0 else k2 diff --git a/questions/183_pmf_normalization_constant 2/starter_code.py b/questions/183_pmf_normalization_constant 2/starter_code.py new file mode 100644 index 00000000..0df463f9 --- /dev/null +++ b/questions/183_pmf_normalization_constant 2/starter_code.py @@ -0,0 +1,6 @@ +def find_k(): + """ + Return the normalization constant K for the given PMF as a float. + """ + # TODO: Solve for K from 10*K**2 + 9*K - 1 = 0 and return the non-negative root + pass diff --git a/questions/183_pmf_normalization_constant 2/tests.json b/questions/183_pmf_normalization_constant 2/tests.json new file mode 100644 index 00000000..d6cb3f10 --- /dev/null +++ b/questions/183_pmf_normalization_constant 2/tests.json @@ -0,0 +1,10 @@ +[ + { + "test": "print(round(find_k(), 10))", + "expected_output": "0.1" + }, + { + "test": "k = find_k(); s = 0 + k + 2*k + 2*k + 3*k + k**2 + 2*k**2 + (7*k**2 + k); print(round(s, 10))", + "expected_output": "1.0" + } +] From 558c2fc26a6abfb02fe016c4b5f3d5b97f38499a Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Mon, 15 Sep 2025 20:00:09 +0530 Subject: [PATCH 02/11] Added Two New Questions(Probability)-184,185 --- .../description.md | 0 .../example.json | 0 .../learn.md | 0 .../meta.json | 0 .../solution.py | 0 .../starter_code.py | 0 .../tests.json | 0 .../description.md | 0 .../example.json | 0 .../learn.md | 0 .../meta.json | 0 .../solution.py | 0 .../starter_code.py | 0 .../tests.json | 0 14 files changed, 0 insertions(+), 0 deletions(-) rename questions/{182_empirical_probability_mass_function_(pmf) => 184_empirical_probability_mass_function_(pmf)}/description.md (100%) rename questions/{182_empirical_probability_mass_function_(pmf) => 184_empirical_probability_mass_function_(pmf)}/example.json (100%) rename questions/{182_empirical_probability_mass_function_(pmf) => 184_empirical_probability_mass_function_(pmf)}/learn.md (100%) rename questions/{182_empirical_probability_mass_function_(pmf) => 184_empirical_probability_mass_function_(pmf)}/meta.json (100%) rename questions/{182_empirical_probability_mass_function_(pmf) => 184_empirical_probability_mass_function_(pmf)}/solution.py (100%) rename questions/{182_empirical_probability_mass_function_(pmf) => 184_empirical_probability_mass_function_(pmf)}/starter_code.py (100%) rename questions/{182_empirical_probability_mass_function_(pmf) => 184_empirical_probability_mass_function_(pmf)}/tests.json (100%) rename questions/{183_pmf_normalization_constant 2 => 185_pmf_normalization_constant 2}/description.md (100%) rename questions/{183_pmf_normalization_constant 2 => 185_pmf_normalization_constant 2}/example.json (100%) rename questions/{183_pmf_normalization_constant 2 => 185_pmf_normalization_constant 2}/learn.md (100%) rename questions/{183_pmf_normalization_constant 2 => 185_pmf_normalization_constant 2}/meta.json (100%) rename questions/{183_pmf_normalization_constant 2 => 185_pmf_normalization_constant 2}/solution.py (100%) rename questions/{183_pmf_normalization_constant 2 => 185_pmf_normalization_constant 2}/starter_code.py (100%) rename questions/{183_pmf_normalization_constant 2 => 185_pmf_normalization_constant 2}/tests.json (100%) diff --git a/questions/182_empirical_probability_mass_function_(pmf)/description.md b/questions/184_empirical_probability_mass_function_(pmf)/description.md similarity index 100% rename from questions/182_empirical_probability_mass_function_(pmf)/description.md rename to questions/184_empirical_probability_mass_function_(pmf)/description.md diff --git a/questions/182_empirical_probability_mass_function_(pmf)/example.json b/questions/184_empirical_probability_mass_function_(pmf)/example.json similarity index 100% rename from questions/182_empirical_probability_mass_function_(pmf)/example.json rename to questions/184_empirical_probability_mass_function_(pmf)/example.json diff --git a/questions/182_empirical_probability_mass_function_(pmf)/learn.md b/questions/184_empirical_probability_mass_function_(pmf)/learn.md similarity index 100% rename from questions/182_empirical_probability_mass_function_(pmf)/learn.md rename to questions/184_empirical_probability_mass_function_(pmf)/learn.md diff --git a/questions/182_empirical_probability_mass_function_(pmf)/meta.json b/questions/184_empirical_probability_mass_function_(pmf)/meta.json similarity index 100% rename from questions/182_empirical_probability_mass_function_(pmf)/meta.json rename to questions/184_empirical_probability_mass_function_(pmf)/meta.json diff --git a/questions/182_empirical_probability_mass_function_(pmf)/solution.py b/questions/184_empirical_probability_mass_function_(pmf)/solution.py similarity index 100% rename from questions/182_empirical_probability_mass_function_(pmf)/solution.py rename to questions/184_empirical_probability_mass_function_(pmf)/solution.py diff --git a/questions/182_empirical_probability_mass_function_(pmf)/starter_code.py b/questions/184_empirical_probability_mass_function_(pmf)/starter_code.py similarity index 100% rename from questions/182_empirical_probability_mass_function_(pmf)/starter_code.py rename to questions/184_empirical_probability_mass_function_(pmf)/starter_code.py diff --git a/questions/182_empirical_probability_mass_function_(pmf)/tests.json b/questions/184_empirical_probability_mass_function_(pmf)/tests.json similarity index 100% rename from questions/182_empirical_probability_mass_function_(pmf)/tests.json rename to questions/184_empirical_probability_mass_function_(pmf)/tests.json diff --git a/questions/183_pmf_normalization_constant 2/description.md b/questions/185_pmf_normalization_constant 2/description.md similarity index 100% rename from questions/183_pmf_normalization_constant 2/description.md rename to questions/185_pmf_normalization_constant 2/description.md diff --git a/questions/183_pmf_normalization_constant 2/example.json b/questions/185_pmf_normalization_constant 2/example.json similarity index 100% rename from questions/183_pmf_normalization_constant 2/example.json rename to questions/185_pmf_normalization_constant 2/example.json diff --git a/questions/183_pmf_normalization_constant 2/learn.md b/questions/185_pmf_normalization_constant 2/learn.md similarity index 100% rename from questions/183_pmf_normalization_constant 2/learn.md rename to questions/185_pmf_normalization_constant 2/learn.md diff --git a/questions/183_pmf_normalization_constant 2/meta.json b/questions/185_pmf_normalization_constant 2/meta.json similarity index 100% rename from questions/183_pmf_normalization_constant 2/meta.json rename to questions/185_pmf_normalization_constant 2/meta.json diff --git a/questions/183_pmf_normalization_constant 2/solution.py b/questions/185_pmf_normalization_constant 2/solution.py similarity index 100% rename from questions/183_pmf_normalization_constant 2/solution.py rename to questions/185_pmf_normalization_constant 2/solution.py diff --git a/questions/183_pmf_normalization_constant 2/starter_code.py b/questions/185_pmf_normalization_constant 2/starter_code.py similarity index 100% rename from questions/183_pmf_normalization_constant 2/starter_code.py rename to questions/185_pmf_normalization_constant 2/starter_code.py diff --git a/questions/183_pmf_normalization_constant 2/tests.json b/questions/185_pmf_normalization_constant 2/tests.json similarity index 100% rename from questions/183_pmf_normalization_constant 2/tests.json rename to questions/185_pmf_normalization_constant 2/tests.json From b1d2a464212381175bdacad2fc8c1f3c42de2f75 Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Mon, 15 Sep 2025 21:33:47 +0530 Subject: [PATCH 03/11] Updated the questions with fixes --- .../meta.json | 6 +-- .../185_pmf_normalization_constant 2/learn.md | 41 +++++++++++++++---- .../meta.json | 8 ++-- 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/questions/184_empirical_probability_mass_function_(pmf)/meta.json b/questions/184_empirical_probability_mass_function_(pmf)/meta.json index a5fc5556..21bc68aa 100644 --- a/questions/184_empirical_probability_mass_function_(pmf)/meta.json +++ b/questions/184_empirical_probability_mass_function_(pmf)/meta.json @@ -1,5 +1,5 @@ { - "id": "182", + "id": "184", "title": "Empirical Probability Mass Function (PMF)", "difficulty": "easy", "category": "Probability & Statistics", @@ -8,8 +8,8 @@ "dislikes": "0", "contributor": [ { - "profile_link": "https://github.com/jeetmukherjee", - "name": "jeetmukherjee" + "profile_link": "https://github.com/Jeet009", + "name": "Jeet Mukherjee" } ] } diff --git a/questions/185_pmf_normalization_constant 2/learn.md b/questions/185_pmf_normalization_constant 2/learn.md index a8835809..296e3851 100644 --- a/questions/185_pmf_normalization_constant 2/learn.md +++ b/questions/185_pmf_normalization_constant 2/learn.md @@ -1,16 +1,41 @@ -## Solution Explanation +## Learning: PMF normalization constant -For a valid PMF, probabilities must sum to 1. +### Idea and formula +- **PMF requirement**: A probability mass function must satisfy ∑ p(xᵢ) = 1 and p(xᵢ) ≥ 0. +- **Normalization by a constant**: If probabilities are given up to a constant, you determine that constant by enforcing the sum-to-1 constraint. + - If the form is p(xᵢ) = K · wᵢ with known nonnegative weights wᵢ, then + - ∑ p(xᵢ) = K · ∑ wᵢ = 1 ⇒ **K = 1 / ∑ wᵢ**. + - If the given expressions involve K in a more general way (e.g., both K and K² terms), still enforce ∑ p(xᵢ) = 1 and solve the resulting equation for K. Choose the solution that makes all probabilities nonnegative. -Sum all terms: +### Worked example (this question) +Suppose the PMF entries are expressed in terms of K such that, when summed, the K-terms group as follows: - Linear in K: K + 2K + 2K + 3K + K = 9K -- Quadratic in K: K^2 + 2K^2 + 7K^2 = 10K^2 +- Quadratic in K: K² + 2K² + 7K² = 10K² -Therefore: 10K^2 + 9K = 1 => 10K^2 + 9K - 1 = 0 +One concrete way to realize this is via the following table of outcomes and probabilities: -Solve the quadratic: K = [-9 ± sqrt(81 + 40)] / 20 = [-9 ± 11] / 20 +| X | p(X) | +|----|--------------| +| x₁ | K + K² | +| x₂ | 2K + 2K² | +| x₃ | 2K | +| x₄ | 3K + 7K² | +| x₅ | K | -Feasible solution (K ≥ 0): K = 2/20 = 0.1 +These add up to 9K + 10K² as required. -So the normalization constant is K = 0.1. +Enforce the PMF constraint: + +- 9K + 10K² = 1 ⇒ 10K² + 9K − 1 = 0 + +Quadratic formula reminder: + +- For aK² + bK + c = 0, the solutions are K = [−b ± √(b² − 4ac)] / (2a). + +Solve the quadratic: + +- K = [−9 ± √(9² + 4·10·1)] / (2·10) = [−9 ± √121] / 20 = [−9 ± 11] / 20 +- Feasible (K ≥ 0) root: K = (−9 + 11) / 20 = 2/20 = 0.1 + +Therefore, the normalization constant is **K = 0.1**. diff --git a/questions/185_pmf_normalization_constant 2/meta.json b/questions/185_pmf_normalization_constant 2/meta.json index ddb93b38..b57e0a2b 100644 --- a/questions/185_pmf_normalization_constant 2/meta.json +++ b/questions/185_pmf_normalization_constant 2/meta.json @@ -1,15 +1,15 @@ { - "id": "183", + "id": "185", "title": "Find PMF Normalization Constant", - "difficulty": "easy", + "difficulty": "medium", "category": "Probability & Statistics", "video": "", "likes": "0", "dislikes": "0", "contributor": [ { - "profile_link": "https://github.com/jeetmukherjee", - "name": "jeetmukherjee" + "profile_link": "https://github.com/Jeet009", + "name": "Jeet Mukherjee" } ] } From 84b83247073940031f6455d9627225d02964818d Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Mon, 15 Sep 2025 21:59:41 +0530 Subject: [PATCH 04/11] Updated the questions with fixes --- .../184_empirical_probability_mass_function_(pmf)/learn.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/questions/184_empirical_probability_mass_function_(pmf)/learn.md b/questions/184_empirical_probability_mass_function_(pmf)/learn.md index 923306e3..f2017eac 100644 --- a/questions/184_empirical_probability_mass_function_(pmf)/learn.md +++ b/questions/184_empirical_probability_mass_function_(pmf)/learn.md @@ -16,9 +16,9 @@ If the true probabilities are unknown, you can estimate them with an **empirical ## Example Observed sequence: `1, 2, 2, 3, 3, 3` (6 outcomes total) -- “1” appears once → estimated probability = 1/6 -- “2” appears twice → estimated probability = 2/6 = 1/3 -- “3” appears three times → estimated probability = 3/6 = 1/2 +- "1" appears once → estimated probability = 1/6 +- "2" appears twice → estimated probability = 2/6 = 1/3 +- "3" appears three times → estimated probability = 3/6 = 1/2 \ No newline at end of file From b903254ac5765bb07ee926b542ef85a968289e64 Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Mon, 15 Sep 2025 23:15:15 +0530 Subject: [PATCH 05/11] Updated the questions with fixes(added new supported notations) --- .../185_pmf_normalization_constant 2/learn.md | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/questions/185_pmf_normalization_constant 2/learn.md b/questions/185_pmf_normalization_constant 2/learn.md index 296e3851..a54fb92c 100644 --- a/questions/185_pmf_normalization_constant 2/learn.md +++ b/questions/185_pmf_normalization_constant 2/learn.md @@ -1,11 +1,11 @@ ## Learning: PMF normalization constant ### Idea and formula -- **PMF requirement**: A probability mass function must satisfy ∑ p(xᵢ) = 1 and p(xᵢ) ≥ 0. -- **Normalization by a constant**: If probabilities are given up to a constant, you determine that constant by enforcing the sum-to-1 constraint. - - If the form is p(xᵢ) = K · wᵢ with known nonnegative weights wᵢ, then - - ∑ p(xᵢ) = K · ∑ wᵢ = 1 ⇒ **K = 1 / ∑ wᵢ**. - - If the given expressions involve K in a more general way (e.g., both K and K² terms), still enforce ∑ p(xᵢ) = 1 and solve the resulting equation for K. Choose the solution that makes all probabilities nonnegative. +- **PMF requirement**: A probability mass function must satisfy $\sum_i p(x_i) = 1$ and $p(x_i) \ge 0$. +- **Normalization by a constant**: If probabilities are given up to a constant, determine that constant by enforcing the sum-to-1 constraint. + - If the form is $p(x_i) = K\,w_i$ with known nonnegative weights $w_i$, then + - $\sum_i p(x_i) = K \sum_i w_i = 1 \Rightarrow$ $\displaystyle K = \frac{1}{\sum_i w_i}$. + - If the given expressions involve $K$ in a more general way (e.g., both $K$ and $K^2$ terms), still enforce $\sum_i p(x_i) = 1$ and solve the resulting equation for $K$. Choose the solution that makes all probabilities nonnegative. ### Worked example (this question) Suppose the PMF entries are expressed in terms of K such that, when summed, the K-terms group as follows: @@ -23,19 +23,28 @@ One concrete way to realize this is via the following table of outcomes and prob | x₄ | 3K + 7K² | | x₅ | K | -These add up to 9K + 10K² as required. +These add up to $9K + 10K^2$ as required. Enforce the PMF constraint: -- 9K + 10K² = 1 ⇒ 10K² + 9K − 1 = 0 +$$ +9K + 10K^2 = 1 \;\Rightarrow\; 10K^2 + 9K - 1 = 0 +$$ Quadratic formula reminder: -- For aK² + bK + c = 0, the solutions are K = [−b ± √(b² − 4ac)] / (2a). +$$ +\text{For } aK^2 + bK + c = 0,\quad K = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}. +$$ Solve the quadratic: -- K = [−9 ± √(9² + 4·10·1)] / (2·10) = [−9 ± √121] / 20 = [−9 ± 11] / 20 -- Feasible (K ≥ 0) root: K = (−9 + 11) / 20 = 2/20 = 0.1 +$$ +K = \frac{-9 \pm \sqrt{9^2 - 4\cdot 10 \cdot (-1)}}{2\cdot 10} += \frac{-9 \pm \sqrt{121}}{20} += \frac{-9 \pm 11}{20}. +$$ -Therefore, the normalization constant is **K = 0.1**. +Feasible ($K \ge 0$) root: $\displaystyle K = \frac{2}{20} = 0.1$. + +Therefore, the normalization constant is **$K = 0.1$**. From 57e1293a1c23495202e228eb0b0faf865b799896 Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Tue, 16 Sep 2025 14:36:04 +0530 Subject: [PATCH 06/11] Added Q: Linear Regression using OLS --- .../description.md | 15 +++++++ .../example.json | 7 ++++ .../learn.md | 39 +++++++++++++++++++ .../meta.json | 16 ++++++++ .../solution.py | 23 +++++++++++ .../starter_code.py | 14 +++++++ .../tests.json | 28 +++++++++++++ 7 files changed, 142 insertions(+) create mode 100644 questions/186_linear_regression_ordinary_least_squares/description.md create mode 100644 questions/186_linear_regression_ordinary_least_squares/example.json create mode 100644 questions/186_linear_regression_ordinary_least_squares/learn.md create mode 100644 questions/186_linear_regression_ordinary_least_squares/meta.json create mode 100644 questions/186_linear_regression_ordinary_least_squares/solution.py create mode 100644 questions/186_linear_regression_ordinary_least_squares/starter_code.py create mode 100644 questions/186_linear_regression_ordinary_least_squares/tests.json diff --git a/questions/186_linear_regression_ordinary_least_squares/description.md b/questions/186_linear_regression_ordinary_least_squares/description.md new file mode 100644 index 00000000..0dded433 --- /dev/null +++ b/questions/186_linear_regression_ordinary_least_squares/description.md @@ -0,0 +1,15 @@ +### Problem + +Implement simple linear regression using Ordinary Least Squares (OLS). Given 1D inputs `X` and targets `y`, compute the slope `m`, intercept `b`, and use them to predict on a provided test input. + +You should implement the closed-form OLS solution: + +$$ +m = \frac{\sum_i (x_i - \bar{x})(y_i - \bar{y})}{\sum_i (x_i - \bar{x})^2},\quad +b = \bar{y} - m\,\bar{x}. +$$ + +Then, given `X_test`, output predictions `y_pred = m * X_test + b`. + +Return values: `m`, `b`, and `y_pred`. + diff --git a/questions/186_linear_regression_ordinary_least_squares/example.json b/questions/186_linear_regression_ordinary_least_squares/example.json new file mode 100644 index 00000000..a8d2e9f0 --- /dev/null +++ b/questions/186_linear_regression_ordinary_least_squares/example.json @@ -0,0 +1,7 @@ +{ + "input": "X_train = [1, 2, 3]; y_train = [2, 2.5, 3.5]; X_test = [4]", + "output": "m = 0.75, b = 1.166667, y_pred = [4.166667]", + "reasoning": "Using OLS: m = Cov(X,Y)/Var(X) = 1.5/2 = 0.75 and b = y_bar - m*x_bar = (8/3) - 0.75*2 = 1.166667. Prediction for X_test=[4] is 0.75*4 + 1.166667 = 4.166667." +} + + diff --git a/questions/186_linear_regression_ordinary_least_squares/learn.md b/questions/186_linear_regression_ordinary_least_squares/learn.md new file mode 100644 index 00000000..34e64f28 --- /dev/null +++ b/questions/186_linear_regression_ordinary_least_squares/learn.md @@ -0,0 +1,39 @@ +## Learning: Ordinary Least Squares for Simple Linear Regression + +### Idea and formula +- **Goal**: Fit a line $y = m x + b$ that minimizes the sum of squared errors. +- **Closed-form OLS solution** for 1D features: + +$$ +m = \frac{\sum_i (x_i - \bar{x})(y_i - \bar{y})}{\sum_i (x_i - \bar{x})^2},\quad +b = \bar{y} - m\,\bar{x} +$$ + +### Intuition +- The numerator is the sample covariance between $x$ and $y$; the denominator is the sample variance of $x$. +- So $m = \operatorname{Cov}(x,y) / \operatorname{Var}(x)$ measures how much $y$ changes per unit change in $x$. +- The intercept $b$ anchors the best-fit line so it passes through the mean point $(\bar{x},\bar{y})$. + +### Algorithm steps +1. Compute $\bar{x}$ and $\bar{y}$. +2. Accumulate numerator $\sum_i (x_i-\bar{x})(y_i-\bar{y})$ and denominator $\sum_i (x_i-\bar{x})^2$. +3. Compute $m = \text{numerator}/\text{denominator}$ (guard against zero denominator). +4. Compute $b = \bar{y} - m\,\bar{x}$. +5. Predict: $\hat{y} = m\,x + b$ for any new $x$. + +### Edge cases and tips +- If all $x_i$ are identical, $\operatorname{Var}(x)=0$ and the slope is undefined. In practice, return $m=0$ and $b=\bar{y}$ or raise an error. +- Centering data helps numerical stability but is not required for the closed form. +- Outliers can strongly influence OLS; consider robust alternatives if needed. + +### Worked example +Given $X = [1,2,3]$ and $y = [2,2.5,3.5]$: + +- $\bar{x} = 2$, $\bar{y} = 8/3$. +- $\sum (x_i-\bar{x})(y_i-\bar{y}) = (1-2)(2-8/3) + (2-2)(2.5-8/3) + (3-2)(3.5-8/3) = 1.5$ +- $\sum (x_i-\bar{x})^2 = (1-2)^2 + (2-2)^2 + (3-2)^2 = 2$ +- $m = 1.5/2 = 0.75$ +- $b = \bar{y} - m\,\bar{x} = 8/3 - 0.75\cdot 2 = 1.166666\ldots$ + +Prediction for $X_{test} = [4]$: $y_{pred} = 0.75\cdot 4 + 1.1666\ldots = 4.1666\ldots$ + diff --git a/questions/186_linear_regression_ordinary_least_squares/meta.json b/questions/186_linear_regression_ordinary_least_squares/meta.json new file mode 100644 index 00000000..1e57a41d --- /dev/null +++ b/questions/186_linear_regression_ordinary_least_squares/meta.json @@ -0,0 +1,16 @@ +{ + "id": "186", + "title": "Linear Regression via Ordinary Least Squares (OLS)", + "difficulty": "hard", + "category": "Machine Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/Jeet009", + "name": "Jeet Mukherjee" + } + ] +} + diff --git a/questions/186_linear_regression_ordinary_least_squares/solution.py b/questions/186_linear_regression_ordinary_least_squares/solution.py new file mode 100644 index 00000000..751a05f6 --- /dev/null +++ b/questions/186_linear_regression_ordinary_least_squares/solution.py @@ -0,0 +1,23 @@ +from typing import List, Tuple + + +def fit_and_predict(X_train: List[float], y_train: List[float], X_test: List[float]) -> Tuple[float, float, List[float]]: + n = len(X_train) + x_mean = sum(X_train) / n + y_mean = sum(y_train) / n + + num = 0.0 + den = 0.0 + for i in range(n): + dx = X_train[i] - x_mean + dy = y_train[i] - y_mean + num += dx * dy + den += dx * dx + + m = num / den if den != 0 else 0.0 + b = y_mean - m * x_mean + + y_pred = [m * x + b for x in X_test] + return m, b, y_pred + + diff --git a/questions/186_linear_regression_ordinary_least_squares/starter_code.py b/questions/186_linear_regression_ordinary_least_squares/starter_code.py new file mode 100644 index 00000000..50a26990 --- /dev/null +++ b/questions/186_linear_regression_ordinary_least_squares/starter_code.py @@ -0,0 +1,14 @@ +from typing import List, Tuple + + +def fit_and_predict(X_train: List[float], y_train: List[float], X_test: List[float]) -> Tuple[float, float, List[float]]: + """ + Implement simple linear regression (OLS) to compute slope m, intercept b, + and predictions on X_test. + + Returns (m, b, y_pred). + """ + # Your code here + pass + + diff --git a/questions/186_linear_regression_ordinary_least_squares/tests.json b/questions/186_linear_regression_ordinary_least_squares/tests.json new file mode 100644 index 00000000..6ebcfee4 --- /dev/null +++ b/questions/186_linear_regression_ordinary_least_squares/tests.json @@ -0,0 +1,28 @@ +[ + { + "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([1,2,3],[2,2.5,3.5],[4]); print(round(m,6), round(b,6), [round(v,6) for v in y])", + "expected_output": "0.75 1.166667 [4.166667]" + }, + { + "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([0,1,2,3],[1,3,5,7],[4,5]); print(round(m,6), round(b,6), [round(v,6) for v in y])", + "expected_output": "2 1 [9, 11]" + }, + { + "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([0,1,2],[5,2,-1],[3]); print(round(m,6), round(b,6), [round(v,6) for v in y])", + "expected_output": "-3 5 [-4]" + }, + { + "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([2,2,2],[1,4,7],[10]); print(round(m,6), round(b,6), [round(v,6) for v in y])", + "expected_output": "0.0 4.0 [4.0]" + }, + { + "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([1,2,3,4],[1.1,1.9,3.05,3.9],[5]); print(round(m,6), round(b,6), [round(v,6) for v in y])", + "expected_output": "0.955 0.1 [4.875]" + }, + { + "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([3],[7],[10]); print(round(m,6), round(b,6), [round(v,6) for v in y])", + "expected_output": "0.0 7.0 [7.0]" + } +] + + From f62a64c5b337d1af060fd42a64f3d3ff99bcdc99 Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Mon, 22 Sep 2025 23:17:18 +0530 Subject: [PATCH 07/11] Added new prob --- .../description.md | 17 +++++ .../example.json | 5 ++ .../learn.md | 57 +++++++++++++++++ .../meta.json | 15 +++++ .../solution.py | 63 +++++++++++++++++++ .../starter_code.py | 21 +++++++ .../tests.json | 26 ++++++++ 7 files changed, 204 insertions(+) create mode 100644 questions/187_perceptron_trick_logistic_regression/description.md create mode 100644 questions/187_perceptron_trick_logistic_regression/example.json create mode 100644 questions/187_perceptron_trick_logistic_regression/learn.md create mode 100644 questions/187_perceptron_trick_logistic_regression/meta.json create mode 100644 questions/187_perceptron_trick_logistic_regression/solution.py create mode 100644 questions/187_perceptron_trick_logistic_regression/starter_code.py create mode 100644 questions/187_perceptron_trick_logistic_regression/tests.json diff --git a/questions/187_perceptron_trick_logistic_regression/description.md b/questions/187_perceptron_trick_logistic_regression/description.md new file mode 100644 index 00000000..cb9b63fb --- /dev/null +++ b/questions/187_perceptron_trick_logistic_regression/description.md @@ -0,0 +1,17 @@ +### Problem + +Implement the perceptron trick for logistic regression. Given training data with binary labels, update the weights using the perceptron learning rule and return the final weights and predictions. + +The perceptron trick updates weights as follows: +- If prediction is correct: no update +- If prediction is wrong: $\mathbf{w} \leftarrow \mathbf{w} + \eta \cdot y_i \cdot \mathbf{x}_i$ + +Where: +- $\mathbf{w}$ is the weight vector (including bias) +- $\eta$ is the learning rate +- $y_i \in \{-1, +1\}$ is the true label +- $\mathbf{x}_i$ is the feature vector (with bias term) + +The prediction function is: $\hat{y} = \text{sign}(\mathbf{w}^T \mathbf{x})$ + +Return the final weights and predictions on the training set. diff --git a/questions/187_perceptron_trick_logistic_regression/example.json b/questions/187_perceptron_trick_logistic_regression/example.json new file mode 100644 index 00000000..c8d04ffd --- /dev/null +++ b/questions/187_perceptron_trick_logistic_regression/example.json @@ -0,0 +1,5 @@ +{ + "input": "X = [[1,1], [2,2], [3,3]]; y = [1, 1, -1]; learning_rate = 0.1; max_epochs = 10", + "output": "weights = [-0.2, -0.2, 0.0]; predictions = [1, 1, -1]", + "reasoning": "Perceptron algorithm updates weights only when predictions are wrong. For linearly separable data, it converges to a decision boundary that correctly classifies all training points. The final weights include the bias term as the last element." +} diff --git a/questions/187_perceptron_trick_logistic_regression/learn.md b/questions/187_perceptron_trick_logistic_regression/learn.md new file mode 100644 index 00000000..b0a6735f --- /dev/null +++ b/questions/187_perceptron_trick_logistic_regression/learn.md @@ -0,0 +1,57 @@ +## Learning: Perceptron Trick in Logistic Regression + +### Idea and formula +- **Goal**: Find a linear decision boundary that separates two classes using an iterative update rule. +- **Perceptron Learning Rule**: Update weights only when predictions are wrong. + +The perceptron algorithm iteratively updates weights: + +$$ +\mathbf{w} \leftarrow \mathbf{w} + \eta \cdot y_i \cdot \mathbf{x}_i \quad \text{if } y_i \cdot (\mathbf{w}^T \mathbf{x}_i) \leq 0 +$$ + +Where: +- $\mathbf{w}$ is the weight vector (including bias) +- $\eta$ is the learning rate +- $y_i \in \{-1, +1\}$ is the true label +- $\mathbf{x}_i$ is the feature vector (with bias term added) + +### Intuition +- When prediction is correct ($y_i \cdot (\mathbf{w}^T \mathbf{x}_i) > 0$): no update needed +- When prediction is wrong ($y_i \cdot (\mathbf{w}^T \mathbf{x}_i) \leq 0$): adjust weights to make the correct prediction more likely +- The update rule "pulls" the decision boundary toward misclassified points + +### Algorithm steps +1. Initialize weights $\mathbf{w} = \mathbf{0}$ (or small random values) +2. Add bias term to each feature vector: $\mathbf{x}_i \leftarrow [\mathbf{x}_i, 1]$ +3. For each epoch: + - For each training example $(\mathbf{x}_i, y_i)$: + - Compute prediction: $\hat{y} = \text{sign}(\mathbf{w}^T \mathbf{x}_i)$ + - If $y_i \cdot (\mathbf{w}^T \mathbf{x}_i) \leq 0$: update $\mathbf{w} \leftarrow \mathbf{w} + \eta \cdot y_i \cdot \mathbf{x}_i$ +4. Repeat until convergence or max epochs + +### Convergence guarantee +- If data is linearly separable, perceptron algorithm will converge in finite steps +- Convergence time depends on the "margin" of separation + +### Worked example +Given 2D data: $X = [[1,1], [2,2], [3,3]]$, $y = [1, 1, -1]$, $\eta = 0.1$: + +- Start: $\mathbf{w} = [0, 0, 0]$ (including bias) +- Add bias: $X = [[1,1,1], [2,2,1], [3,3,1]]$ + +Epoch 1: +- $(1,1,1)$: $\mathbf{w}^T \mathbf{x} = 0$, $y \cdot (\mathbf{w}^T \mathbf{x}) = 0 \leq 0$ → update + - $\mathbf{w} = [0,0,0] + 0.1 \cdot 1 \cdot [1,1,1] = [0.1, 0.1, 0.1]$ +- $(2,2,1)$: $\mathbf{w}^T \mathbf{x} = 0.4$, $y \cdot (\mathbf{w}^T \mathbf{x}) = 0.4 > 0$ → no update +- $(3,3,1)$: $\mathbf{w}^T \mathbf{x} = 0.7$, $y \cdot (\mathbf{w}^T \mathbf{x}) = -0.7 \leq 0$ → update + - $\mathbf{w} = [0.1,0.1,0.1] + 0.1 \cdot (-1) \cdot [3,3,1] = [-0.2, -0.2, 0.0]$ + +Continue until convergence... + +### Edge cases and tips +- **Linearly separable data**: Algorithm will converge +- **Non-separable data**: May not converge; use max epochs limit +- **Learning rate**: Too large may cause oscillation; too small may converge slowly +- **Initialization**: Starting from zero is common; random initialization can help +- **Bias handling**: Always add bias term as additional feature diff --git a/questions/187_perceptron_trick_logistic_regression/meta.json b/questions/187_perceptron_trick_logistic_regression/meta.json new file mode 100644 index 00000000..f4007aea --- /dev/null +++ b/questions/187_perceptron_trick_logistic_regression/meta.json @@ -0,0 +1,15 @@ +{ + "id": "187", + "title": "Perceptron Trick in Logistic Regression", + "difficulty": "medium", + "category": "Machine Learning", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/Jeet009", + "name": "Jeet Mukherjee" + } + ] +} diff --git a/questions/187_perceptron_trick_logistic_regression/solution.py b/questions/187_perceptron_trick_logistic_regression/solution.py new file mode 100644 index 00000000..ea8023a7 --- /dev/null +++ b/questions/187_perceptron_trick_logistic_regression/solution.py @@ -0,0 +1,63 @@ +from typing import List, Tuple +import numpy as np + + +def perceptron_trick(X: List[List[float]], y: List[int], learning_rate: float = 0.1, max_epochs: int = 100) -> Tuple[List[float], List[int]]: + """ + Implement the perceptron trick for binary classification. + + Args: + X: List of feature vectors (without bias term) + y: List of binary labels (-1 or +1) + learning_rate: Learning rate for weight updates + max_epochs: Maximum number of training epochs + + Returns: + Tuple of (final_weights, predictions) + - final_weights: Weight vector including bias term + - predictions: Predictions on training data + """ + if not X or not y: + return [], [] + + n_features = len(X[0]) + n_samples = len(X) + + # Initialize weights (including bias term) + weights = [0.0] * (n_features + 1) + + # Add bias term to each feature vector + X_with_bias = [] + for x in X: + X_with_bias.append(x + [1.0]) # Add bias term + + # Convert to numpy for easier computation + X_array = np.array(X_with_bias) + y_array = np.array(y) + weights_array = np.array(weights) + + # Training loop + for epoch in range(max_epochs): + converged = True + + for i in range(n_samples): + # Compute prediction: w^T * x + prediction = np.dot(weights_array, X_array[i]) + + # Check if prediction is wrong: y * (w^T * x) <= 0 + if y_array[i] * prediction <= 0: + # Update weights: w = w + learning_rate * y * x + weights_array += learning_rate * y_array[i] * X_array[i] + converged = False + + # If no updates were made, we've converged + if converged: + break + + # Generate predictions on training data + predictions = [] + for i in range(n_samples): + prediction = np.dot(weights_array, X_array[i]) + predictions.append(1 if prediction > 0 else -1) + + return weights_array.tolist(), predictions diff --git a/questions/187_perceptron_trick_logistic_regression/starter_code.py b/questions/187_perceptron_trick_logistic_regression/starter_code.py new file mode 100644 index 00000000..608aaa8e --- /dev/null +++ b/questions/187_perceptron_trick_logistic_regression/starter_code.py @@ -0,0 +1,21 @@ +from typing import List, Tuple +import numpy as np + + +def perceptron_trick(X: List[List[float]], y: List[int], learning_rate: float = 0.1, max_epochs: int = 100) -> Tuple[List[float], List[int]]: + """ + Implement the perceptron trick for binary classification. + + Args: + X: List of feature vectors (without bias term) + y: List of binary labels (-1 or +1) + learning_rate: Learning rate for weight updates + max_epochs: Maximum number of training epochs + + Returns: + Tuple of (final_weights, predictions) + - final_weights: Weight vector including bias term + - predictions: Predictions on training data + """ + # TODO: implement + raise NotImplementedError diff --git a/questions/187_perceptron_trick_logistic_regression/tests.json b/questions/187_perceptron_trick_logistic_regression/tests.json new file mode 100644 index 00000000..f4b2b0e7 --- /dev/null +++ b/questions/187_perceptron_trick_logistic_regression/tests.json @@ -0,0 +1,26 @@ +[ + { + "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[1,1], [2,2], [3,3]], [1, 1, -1], 0.1, 10); print([round(x, 3) for x in w], pred)", + "expected_output": "[-0.2, -0.2, 0.0] [1, 1, -1]" + }, + { + "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[0,0], [1,1], [2,2]], [-1, -1, 1], 0.5, 5); print([round(x, 3) for x in w], pred)", + "expected_output": "[1.0, 1.0, 0.0] [-1, -1, 1]" + }, + { + "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[1], [2], [3]], [1, 1, -1], 0.1, 20); print([round(x, 3) for x in w], pred)", + "expected_output": "[-0.2, 0.0] [1, 1, -1]" + }, + { + "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[0,0], [1,0], [0,1], [1,1]], [1, 1, 1, -1], 0.1, 50); print([round(x, 3) for x in w], pred)", + "expected_output": "[-0.1, -0.1, 0.0] [1, 1, 1, -1]" + }, + { + "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[1,2], [3,4], [5,6]], [1, 1, -1], 0.01, 100); print([round(x, 3) for x in w], pred)", + "expected_output": "[-0.04, -0.04, 0.0] [1, 1, -1]" + }, + { + "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([], [], 0.1, 10); print(w, pred)", + "expected_output": "[] []" + } +] From 4af6935038f1abfbdb05c5cbe50ee7130e43a39f Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Mon, 22 Sep 2025 23:19:33 +0530 Subject: [PATCH 08/11] Added Question 186 --- .../description.md | 15 ----- .../example.json | 7 --- .../learn.md | 39 ------------ .../meta.json | 16 ----- .../solution.py | 23 ------- .../starter_code.py | 14 ----- .../tests.json | 28 --------- .../description.md | 17 ----- .../example.json | 5 -- .../learn.md | 57 ----------------- .../meta.json | 15 ----- .../solution.py | 63 ------------------- .../starter_code.py | 21 ------- .../tests.json | 26 -------- 14 files changed, 346 deletions(-) delete mode 100644 questions/186_linear_regression_ordinary_least_squares/description.md delete mode 100644 questions/186_linear_regression_ordinary_least_squares/example.json delete mode 100644 questions/186_linear_regression_ordinary_least_squares/learn.md delete mode 100644 questions/186_linear_regression_ordinary_least_squares/meta.json delete mode 100644 questions/186_linear_regression_ordinary_least_squares/solution.py delete mode 100644 questions/186_linear_regression_ordinary_least_squares/starter_code.py delete mode 100644 questions/186_linear_regression_ordinary_least_squares/tests.json delete mode 100644 questions/187_perceptron_trick_logistic_regression/description.md delete mode 100644 questions/187_perceptron_trick_logistic_regression/example.json delete mode 100644 questions/187_perceptron_trick_logistic_regression/learn.md delete mode 100644 questions/187_perceptron_trick_logistic_regression/meta.json delete mode 100644 questions/187_perceptron_trick_logistic_regression/solution.py delete mode 100644 questions/187_perceptron_trick_logistic_regression/starter_code.py delete mode 100644 questions/187_perceptron_trick_logistic_regression/tests.json diff --git a/questions/186_linear_regression_ordinary_least_squares/description.md b/questions/186_linear_regression_ordinary_least_squares/description.md deleted file mode 100644 index 0dded433..00000000 --- a/questions/186_linear_regression_ordinary_least_squares/description.md +++ /dev/null @@ -1,15 +0,0 @@ -### Problem - -Implement simple linear regression using Ordinary Least Squares (OLS). Given 1D inputs `X` and targets `y`, compute the slope `m`, intercept `b`, and use them to predict on a provided test input. - -You should implement the closed-form OLS solution: - -$$ -m = \frac{\sum_i (x_i - \bar{x})(y_i - \bar{y})}{\sum_i (x_i - \bar{x})^2},\quad -b = \bar{y} - m\,\bar{x}. -$$ - -Then, given `X_test`, output predictions `y_pred = m * X_test + b`. - -Return values: `m`, `b`, and `y_pred`. - diff --git a/questions/186_linear_regression_ordinary_least_squares/example.json b/questions/186_linear_regression_ordinary_least_squares/example.json deleted file mode 100644 index a8d2e9f0..00000000 --- a/questions/186_linear_regression_ordinary_least_squares/example.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "input": "X_train = [1, 2, 3]; y_train = [2, 2.5, 3.5]; X_test = [4]", - "output": "m = 0.75, b = 1.166667, y_pred = [4.166667]", - "reasoning": "Using OLS: m = Cov(X,Y)/Var(X) = 1.5/2 = 0.75 and b = y_bar - m*x_bar = (8/3) - 0.75*2 = 1.166667. Prediction for X_test=[4] is 0.75*4 + 1.166667 = 4.166667." -} - - diff --git a/questions/186_linear_regression_ordinary_least_squares/learn.md b/questions/186_linear_regression_ordinary_least_squares/learn.md deleted file mode 100644 index 34e64f28..00000000 --- a/questions/186_linear_regression_ordinary_least_squares/learn.md +++ /dev/null @@ -1,39 +0,0 @@ -## Learning: Ordinary Least Squares for Simple Linear Regression - -### Idea and formula -- **Goal**: Fit a line $y = m x + b$ that minimizes the sum of squared errors. -- **Closed-form OLS solution** for 1D features: - -$$ -m = \frac{\sum_i (x_i - \bar{x})(y_i - \bar{y})}{\sum_i (x_i - \bar{x})^2},\quad -b = \bar{y} - m\,\bar{x} -$$ - -### Intuition -- The numerator is the sample covariance between $x$ and $y$; the denominator is the sample variance of $x$. -- So $m = \operatorname{Cov}(x,y) / \operatorname{Var}(x)$ measures how much $y$ changes per unit change in $x$. -- The intercept $b$ anchors the best-fit line so it passes through the mean point $(\bar{x},\bar{y})$. - -### Algorithm steps -1. Compute $\bar{x}$ and $\bar{y}$. -2. Accumulate numerator $\sum_i (x_i-\bar{x})(y_i-\bar{y})$ and denominator $\sum_i (x_i-\bar{x})^2$. -3. Compute $m = \text{numerator}/\text{denominator}$ (guard against zero denominator). -4. Compute $b = \bar{y} - m\,\bar{x}$. -5. Predict: $\hat{y} = m\,x + b$ for any new $x$. - -### Edge cases and tips -- If all $x_i$ are identical, $\operatorname{Var}(x)=0$ and the slope is undefined. In practice, return $m=0$ and $b=\bar{y}$ or raise an error. -- Centering data helps numerical stability but is not required for the closed form. -- Outliers can strongly influence OLS; consider robust alternatives if needed. - -### Worked example -Given $X = [1,2,3]$ and $y = [2,2.5,3.5]$: - -- $\bar{x} = 2$, $\bar{y} = 8/3$. -- $\sum (x_i-\bar{x})(y_i-\bar{y}) = (1-2)(2-8/3) + (2-2)(2.5-8/3) + (3-2)(3.5-8/3) = 1.5$ -- $\sum (x_i-\bar{x})^2 = (1-2)^2 + (2-2)^2 + (3-2)^2 = 2$ -- $m = 1.5/2 = 0.75$ -- $b = \bar{y} - m\,\bar{x} = 8/3 - 0.75\cdot 2 = 1.166666\ldots$ - -Prediction for $X_{test} = [4]$: $y_{pred} = 0.75\cdot 4 + 1.1666\ldots = 4.1666\ldots$ - diff --git a/questions/186_linear_regression_ordinary_least_squares/meta.json b/questions/186_linear_regression_ordinary_least_squares/meta.json deleted file mode 100644 index 1e57a41d..00000000 --- a/questions/186_linear_regression_ordinary_least_squares/meta.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "id": "186", - "title": "Linear Regression via Ordinary Least Squares (OLS)", - "difficulty": "hard", - "category": "Machine Learning", - "video": "", - "likes": "0", - "dislikes": "0", - "contributor": [ - { - "profile_link": "https://github.com/Jeet009", - "name": "Jeet Mukherjee" - } - ] -} - diff --git a/questions/186_linear_regression_ordinary_least_squares/solution.py b/questions/186_linear_regression_ordinary_least_squares/solution.py deleted file mode 100644 index 751a05f6..00000000 --- a/questions/186_linear_regression_ordinary_least_squares/solution.py +++ /dev/null @@ -1,23 +0,0 @@ -from typing import List, Tuple - - -def fit_and_predict(X_train: List[float], y_train: List[float], X_test: List[float]) -> Tuple[float, float, List[float]]: - n = len(X_train) - x_mean = sum(X_train) / n - y_mean = sum(y_train) / n - - num = 0.0 - den = 0.0 - for i in range(n): - dx = X_train[i] - x_mean - dy = y_train[i] - y_mean - num += dx * dy - den += dx * dx - - m = num / den if den != 0 else 0.0 - b = y_mean - m * x_mean - - y_pred = [m * x + b for x in X_test] - return m, b, y_pred - - diff --git a/questions/186_linear_regression_ordinary_least_squares/starter_code.py b/questions/186_linear_regression_ordinary_least_squares/starter_code.py deleted file mode 100644 index 50a26990..00000000 --- a/questions/186_linear_regression_ordinary_least_squares/starter_code.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import List, Tuple - - -def fit_and_predict(X_train: List[float], y_train: List[float], X_test: List[float]) -> Tuple[float, float, List[float]]: - """ - Implement simple linear regression (OLS) to compute slope m, intercept b, - and predictions on X_test. - - Returns (m, b, y_pred). - """ - # Your code here - pass - - diff --git a/questions/186_linear_regression_ordinary_least_squares/tests.json b/questions/186_linear_regression_ordinary_least_squares/tests.json deleted file mode 100644 index 6ebcfee4..00000000 --- a/questions/186_linear_regression_ordinary_least_squares/tests.json +++ /dev/null @@ -1,28 +0,0 @@ -[ - { - "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([1,2,3],[2,2.5,3.5],[4]); print(round(m,6), round(b,6), [round(v,6) for v in y])", - "expected_output": "0.75 1.166667 [4.166667]" - }, - { - "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([0,1,2,3],[1,3,5,7],[4,5]); print(round(m,6), round(b,6), [round(v,6) for v in y])", - "expected_output": "2 1 [9, 11]" - }, - { - "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([0,1,2],[5,2,-1],[3]); print(round(m,6), round(b,6), [round(v,6) for v in y])", - "expected_output": "-3 5 [-4]" - }, - { - "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([2,2,2],[1,4,7],[10]); print(round(m,6), round(b,6), [round(v,6) for v in y])", - "expected_output": "0.0 4.0 [4.0]" - }, - { - "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([1,2,3,4],[1.1,1.9,3.05,3.9],[5]); print(round(m,6), round(b,6), [round(v,6) for v in y])", - "expected_output": "0.955 0.1 [4.875]" - }, - { - "test": "from questions.186_linear_regression_ordinary_least_squares.solution import fit_and_predict; m,b,y=fit_and_predict([3],[7],[10]); print(round(m,6), round(b,6), [round(v,6) for v in y])", - "expected_output": "0.0 7.0 [7.0]" - } -] - - diff --git a/questions/187_perceptron_trick_logistic_regression/description.md b/questions/187_perceptron_trick_logistic_regression/description.md deleted file mode 100644 index cb9b63fb..00000000 --- a/questions/187_perceptron_trick_logistic_regression/description.md +++ /dev/null @@ -1,17 +0,0 @@ -### Problem - -Implement the perceptron trick for logistic regression. Given training data with binary labels, update the weights using the perceptron learning rule and return the final weights and predictions. - -The perceptron trick updates weights as follows: -- If prediction is correct: no update -- If prediction is wrong: $\mathbf{w} \leftarrow \mathbf{w} + \eta \cdot y_i \cdot \mathbf{x}_i$ - -Where: -- $\mathbf{w}$ is the weight vector (including bias) -- $\eta$ is the learning rate -- $y_i \in \{-1, +1\}$ is the true label -- $\mathbf{x}_i$ is the feature vector (with bias term) - -The prediction function is: $\hat{y} = \text{sign}(\mathbf{w}^T \mathbf{x})$ - -Return the final weights and predictions on the training set. diff --git a/questions/187_perceptron_trick_logistic_regression/example.json b/questions/187_perceptron_trick_logistic_regression/example.json deleted file mode 100644 index c8d04ffd..00000000 --- a/questions/187_perceptron_trick_logistic_regression/example.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "input": "X = [[1,1], [2,2], [3,3]]; y = [1, 1, -1]; learning_rate = 0.1; max_epochs = 10", - "output": "weights = [-0.2, -0.2, 0.0]; predictions = [1, 1, -1]", - "reasoning": "Perceptron algorithm updates weights only when predictions are wrong. For linearly separable data, it converges to a decision boundary that correctly classifies all training points. The final weights include the bias term as the last element." -} diff --git a/questions/187_perceptron_trick_logistic_regression/learn.md b/questions/187_perceptron_trick_logistic_regression/learn.md deleted file mode 100644 index b0a6735f..00000000 --- a/questions/187_perceptron_trick_logistic_regression/learn.md +++ /dev/null @@ -1,57 +0,0 @@ -## Learning: Perceptron Trick in Logistic Regression - -### Idea and formula -- **Goal**: Find a linear decision boundary that separates two classes using an iterative update rule. -- **Perceptron Learning Rule**: Update weights only when predictions are wrong. - -The perceptron algorithm iteratively updates weights: - -$$ -\mathbf{w} \leftarrow \mathbf{w} + \eta \cdot y_i \cdot \mathbf{x}_i \quad \text{if } y_i \cdot (\mathbf{w}^T \mathbf{x}_i) \leq 0 -$$ - -Where: -- $\mathbf{w}$ is the weight vector (including bias) -- $\eta$ is the learning rate -- $y_i \in \{-1, +1\}$ is the true label -- $\mathbf{x}_i$ is the feature vector (with bias term added) - -### Intuition -- When prediction is correct ($y_i \cdot (\mathbf{w}^T \mathbf{x}_i) > 0$): no update needed -- When prediction is wrong ($y_i \cdot (\mathbf{w}^T \mathbf{x}_i) \leq 0$): adjust weights to make the correct prediction more likely -- The update rule "pulls" the decision boundary toward misclassified points - -### Algorithm steps -1. Initialize weights $\mathbf{w} = \mathbf{0}$ (or small random values) -2. Add bias term to each feature vector: $\mathbf{x}_i \leftarrow [\mathbf{x}_i, 1]$ -3. For each epoch: - - For each training example $(\mathbf{x}_i, y_i)$: - - Compute prediction: $\hat{y} = \text{sign}(\mathbf{w}^T \mathbf{x}_i)$ - - If $y_i \cdot (\mathbf{w}^T \mathbf{x}_i) \leq 0$: update $\mathbf{w} \leftarrow \mathbf{w} + \eta \cdot y_i \cdot \mathbf{x}_i$ -4. Repeat until convergence or max epochs - -### Convergence guarantee -- If data is linearly separable, perceptron algorithm will converge in finite steps -- Convergence time depends on the "margin" of separation - -### Worked example -Given 2D data: $X = [[1,1], [2,2], [3,3]]$, $y = [1, 1, -1]$, $\eta = 0.1$: - -- Start: $\mathbf{w} = [0, 0, 0]$ (including bias) -- Add bias: $X = [[1,1,1], [2,2,1], [3,3,1]]$ - -Epoch 1: -- $(1,1,1)$: $\mathbf{w}^T \mathbf{x} = 0$, $y \cdot (\mathbf{w}^T \mathbf{x}) = 0 \leq 0$ → update - - $\mathbf{w} = [0,0,0] + 0.1 \cdot 1 \cdot [1,1,1] = [0.1, 0.1, 0.1]$ -- $(2,2,1)$: $\mathbf{w}^T \mathbf{x} = 0.4$, $y \cdot (\mathbf{w}^T \mathbf{x}) = 0.4 > 0$ → no update -- $(3,3,1)$: $\mathbf{w}^T \mathbf{x} = 0.7$, $y \cdot (\mathbf{w}^T \mathbf{x}) = -0.7 \leq 0$ → update - - $\mathbf{w} = [0.1,0.1,0.1] + 0.1 \cdot (-1) \cdot [3,3,1] = [-0.2, -0.2, 0.0]$ - -Continue until convergence... - -### Edge cases and tips -- **Linearly separable data**: Algorithm will converge -- **Non-separable data**: May not converge; use max epochs limit -- **Learning rate**: Too large may cause oscillation; too small may converge slowly -- **Initialization**: Starting from zero is common; random initialization can help -- **Bias handling**: Always add bias term as additional feature diff --git a/questions/187_perceptron_trick_logistic_regression/meta.json b/questions/187_perceptron_trick_logistic_regression/meta.json deleted file mode 100644 index f4007aea..00000000 --- a/questions/187_perceptron_trick_logistic_regression/meta.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "id": "187", - "title": "Perceptron Trick in Logistic Regression", - "difficulty": "medium", - "category": "Machine Learning", - "video": "", - "likes": "0", - "dislikes": "0", - "contributor": [ - { - "profile_link": "https://github.com/Jeet009", - "name": "Jeet Mukherjee" - } - ] -} diff --git a/questions/187_perceptron_trick_logistic_regression/solution.py b/questions/187_perceptron_trick_logistic_regression/solution.py deleted file mode 100644 index ea8023a7..00000000 --- a/questions/187_perceptron_trick_logistic_regression/solution.py +++ /dev/null @@ -1,63 +0,0 @@ -from typing import List, Tuple -import numpy as np - - -def perceptron_trick(X: List[List[float]], y: List[int], learning_rate: float = 0.1, max_epochs: int = 100) -> Tuple[List[float], List[int]]: - """ - Implement the perceptron trick for binary classification. - - Args: - X: List of feature vectors (without bias term) - y: List of binary labels (-1 or +1) - learning_rate: Learning rate for weight updates - max_epochs: Maximum number of training epochs - - Returns: - Tuple of (final_weights, predictions) - - final_weights: Weight vector including bias term - - predictions: Predictions on training data - """ - if not X or not y: - return [], [] - - n_features = len(X[0]) - n_samples = len(X) - - # Initialize weights (including bias term) - weights = [0.0] * (n_features + 1) - - # Add bias term to each feature vector - X_with_bias = [] - for x in X: - X_with_bias.append(x + [1.0]) # Add bias term - - # Convert to numpy for easier computation - X_array = np.array(X_with_bias) - y_array = np.array(y) - weights_array = np.array(weights) - - # Training loop - for epoch in range(max_epochs): - converged = True - - for i in range(n_samples): - # Compute prediction: w^T * x - prediction = np.dot(weights_array, X_array[i]) - - # Check if prediction is wrong: y * (w^T * x) <= 0 - if y_array[i] * prediction <= 0: - # Update weights: w = w + learning_rate * y * x - weights_array += learning_rate * y_array[i] * X_array[i] - converged = False - - # If no updates were made, we've converged - if converged: - break - - # Generate predictions on training data - predictions = [] - for i in range(n_samples): - prediction = np.dot(weights_array, X_array[i]) - predictions.append(1 if prediction > 0 else -1) - - return weights_array.tolist(), predictions diff --git a/questions/187_perceptron_trick_logistic_regression/starter_code.py b/questions/187_perceptron_trick_logistic_regression/starter_code.py deleted file mode 100644 index 608aaa8e..00000000 --- a/questions/187_perceptron_trick_logistic_regression/starter_code.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import List, Tuple -import numpy as np - - -def perceptron_trick(X: List[List[float]], y: List[int], learning_rate: float = 0.1, max_epochs: int = 100) -> Tuple[List[float], List[int]]: - """ - Implement the perceptron trick for binary classification. - - Args: - X: List of feature vectors (without bias term) - y: List of binary labels (-1 or +1) - learning_rate: Learning rate for weight updates - max_epochs: Maximum number of training epochs - - Returns: - Tuple of (final_weights, predictions) - - final_weights: Weight vector including bias term - - predictions: Predictions on training data - """ - # TODO: implement - raise NotImplementedError diff --git a/questions/187_perceptron_trick_logistic_regression/tests.json b/questions/187_perceptron_trick_logistic_regression/tests.json deleted file mode 100644 index f4b2b0e7..00000000 --- a/questions/187_perceptron_trick_logistic_regression/tests.json +++ /dev/null @@ -1,26 +0,0 @@ -[ - { - "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[1,1], [2,2], [3,3]], [1, 1, -1], 0.1, 10); print([round(x, 3) for x in w], pred)", - "expected_output": "[-0.2, -0.2, 0.0] [1, 1, -1]" - }, - { - "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[0,0], [1,1], [2,2]], [-1, -1, 1], 0.5, 5); print([round(x, 3) for x in w], pred)", - "expected_output": "[1.0, 1.0, 0.0] [-1, -1, 1]" - }, - { - "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[1], [2], [3]], [1, 1, -1], 0.1, 20); print([round(x, 3) for x in w], pred)", - "expected_output": "[-0.2, 0.0] [1, 1, -1]" - }, - { - "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[0,0], [1,0], [0,1], [1,1]], [1, 1, 1, -1], 0.1, 50); print([round(x, 3) for x in w], pred)", - "expected_output": "[-0.1, -0.1, 0.0] [1, 1, 1, -1]" - }, - { - "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([[1,2], [3,4], [5,6]], [1, 1, -1], 0.01, 100); print([round(x, 3) for x in w], pred)", - "expected_output": "[-0.04, -0.04, 0.0] [1, 1, -1]" - }, - { - "test": "from questions.187_perceptron_trick_logistic_regression.solution import perceptron_trick; w, pred = perceptron_trick([], [], 0.1, 10); print(w, pred)", - "expected_output": "[] []" - } -] From 1decb2f34732f0371d51f522c861484a31bb6a19 Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Mon, 22 Sep 2025 23:24:18 +0530 Subject: [PATCH 09/11] Added Question 186 --- .../description.md | 0 .../example.json | 0 .../learn.md | 0 .../meta.json | 0 .../solution.py | 0 .../starter_code.py | 0 .../tests.json | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename questions/{185_pmf_normalization_constant 2 => 186_pmf_normalization_constant 2}/description.md (100%) rename questions/{185_pmf_normalization_constant 2 => 186_pmf_normalization_constant 2}/example.json (100%) rename questions/{185_pmf_normalization_constant 2 => 186_pmf_normalization_constant 2}/learn.md (100%) rename questions/{185_pmf_normalization_constant 2 => 186_pmf_normalization_constant 2}/meta.json (100%) rename questions/{185_pmf_normalization_constant 2 => 186_pmf_normalization_constant 2}/solution.py (100%) rename questions/{185_pmf_normalization_constant 2 => 186_pmf_normalization_constant 2}/starter_code.py (100%) rename questions/{185_pmf_normalization_constant 2 => 186_pmf_normalization_constant 2}/tests.json (100%) diff --git a/questions/185_pmf_normalization_constant 2/description.md b/questions/186_pmf_normalization_constant 2/description.md similarity index 100% rename from questions/185_pmf_normalization_constant 2/description.md rename to questions/186_pmf_normalization_constant 2/description.md diff --git a/questions/185_pmf_normalization_constant 2/example.json b/questions/186_pmf_normalization_constant 2/example.json similarity index 100% rename from questions/185_pmf_normalization_constant 2/example.json rename to questions/186_pmf_normalization_constant 2/example.json diff --git a/questions/185_pmf_normalization_constant 2/learn.md b/questions/186_pmf_normalization_constant 2/learn.md similarity index 100% rename from questions/185_pmf_normalization_constant 2/learn.md rename to questions/186_pmf_normalization_constant 2/learn.md diff --git a/questions/185_pmf_normalization_constant 2/meta.json b/questions/186_pmf_normalization_constant 2/meta.json similarity index 100% rename from questions/185_pmf_normalization_constant 2/meta.json rename to questions/186_pmf_normalization_constant 2/meta.json diff --git a/questions/185_pmf_normalization_constant 2/solution.py b/questions/186_pmf_normalization_constant 2/solution.py similarity index 100% rename from questions/185_pmf_normalization_constant 2/solution.py rename to questions/186_pmf_normalization_constant 2/solution.py diff --git a/questions/185_pmf_normalization_constant 2/starter_code.py b/questions/186_pmf_normalization_constant 2/starter_code.py similarity index 100% rename from questions/185_pmf_normalization_constant 2/starter_code.py rename to questions/186_pmf_normalization_constant 2/starter_code.py diff --git a/questions/185_pmf_normalization_constant 2/tests.json b/questions/186_pmf_normalization_constant 2/tests.json similarity index 100% rename from questions/185_pmf_normalization_constant 2/tests.json rename to questions/186_pmf_normalization_constant 2/tests.json From cd33bcf4a4e337b03dd11f66778c312379bd059b Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Tue, 23 Sep 2025 16:13:42 +0530 Subject: [PATCH 10/11] Added new question --- build/184.json | 42 +++++++++++ build/187.json | 72 +++++++++++++++++++ .../description.md | 16 ----- .../example.json | 5 -- .../186_pmf_normalization_constant 2/learn.md | 50 ------------- .../solution.py | 14 ---- .../starter_code.py | 6 -- .../tests.json | 10 --- .../description.md | 15 ++++ .../187_probability-addition-law/example.json | 5 ++ .../187_probability-addition-law/learn.md | 33 +++++++++ .../meta.json | 8 +-- .../187_probability-addition-law/solution.py | 10 +++ .../starter_code.py | 17 +++++ .../187_probability-addition-law/tests.json | 5 ++ 15 files changed, 203 insertions(+), 105 deletions(-) create mode 100644 build/184.json create mode 100644 build/187.json delete mode 100644 questions/186_pmf_normalization_constant 2/description.md delete mode 100644 questions/186_pmf_normalization_constant 2/example.json delete mode 100644 questions/186_pmf_normalization_constant 2/learn.md delete mode 100644 questions/186_pmf_normalization_constant 2/solution.py delete mode 100644 questions/186_pmf_normalization_constant 2/starter_code.py delete mode 100644 questions/186_pmf_normalization_constant 2/tests.json create mode 100644 questions/187_probability-addition-law/description.md create mode 100644 questions/187_probability-addition-law/example.json create mode 100644 questions/187_probability-addition-law/learn.md rename questions/{186_pmf_normalization_constant 2 => 187_probability-addition-law}/meta.json (57%) create mode 100644 questions/187_probability-addition-law/solution.py create mode 100644 questions/187_probability-addition-law/starter_code.py create mode 100644 questions/187_probability-addition-law/tests.json diff --git a/build/184.json b/build/184.json new file mode 100644 index 00000000..6f75a9aa --- /dev/null +++ b/build/184.json @@ -0,0 +1,42 @@ +{ + "id": "184", + "title": "Empirical Probability Mass Function (PMF)", + "difficulty": "easy", + "category": "Probability & Statistics", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/Jeet009", + "name": "Jeet Mukherjee" + } + ], + "description": "## Problem\n\nGiven a list of integer samples drawn from a discrete distribution, implement a function to compute the empirical Probability Mass Function (PMF). The function should return a list of `(value, probability)` pairs sorted by the value in ascending order. If the input is empty, return an empty list.", + "learn_section": "\n# Learn Section\n\n# Probability Mass Function (PMF) — Simple Explanation\n\nA **probability mass function (PMF)** describes how probabilities are assigned to the possible outcomes of a **discrete random variable**.\n\n- It tells you the chance of each specific outcome. \n- Each probability is non-negative. \n- The total of all probabilities adds up to 1.\n\n## Estimating from data\nIf the true probabilities are unknown, you can estimate them with an **empirical PMF**:\n- Count how often each outcome appears. \n- Divide by the total number of observations. \n\n## Example\nObserved sequence: `1, 2, 2, 3, 3, 3` (6 outcomes total)\n- \"1\" appears once → estimated probability = 1/6 \n- \"2\" appears twice → estimated probability = 2/6 = 1/3 \n- \"3\" appears three times → estimated probability = 3/6 = 1/2 \n\n\n ", + "starter_code": "def empirical_pmf(samples):\n \"\"\"\n Given an iterable of integer samples, return a list of (value, probability)\n pairs sorted by value ascending.\n \"\"\"\n # TODO: Implement the function\n pass", + "solution": "from collections import Counter\n\ndef empirical_pmf(samples):\n \"\"\"\n Given an iterable of integer samples, return a list of (value, probability)\n pairs sorted by value ascending.\n \"\"\"\n samples = list(samples)\n if not samples:\n return []\n total = len(samples)\n cnt = Counter(samples)\n result = [(k, cnt[k] / total) for k in sorted(cnt.keys())]\n return result", + "example": { + "input": "samples = [1, 2, 2, 3, 3, 3]", + "output": "[(1, 0.16666666666666666), (2, 0.3333333333333333), (3, 0.5)]", + "reasoning": "Counts are {1:1, 2:2, 3:3} over 6 samples, so probabilities are 1/6, 2/6, and 3/6 respectively, returned sorted by value." + }, + "test_cases": [ + { + "test": "print(empirical_pmf([1, 2, 2, 3, 3, 3]))", + "expected_output": "[(1, 0.16666666666666666), (2, 0.3333333333333333), (3, 0.5)]" + }, + { + "test": "print(empirical_pmf([5, 5, 5, 5]))", + "expected_output": "[(5, 1.0)]" + }, + { + "test": "print(empirical_pmf([]))", + "expected_output": "[]" + }, + { + "test": "print(empirical_pmf([0, 0, 1, 1, 1, 2]))", + "expected_output": "[(0, 0.3333333333333333), (1, 0.5), (2, 0.16666666666666666)]" + } + ] +} \ No newline at end of file diff --git a/build/187.json b/build/187.json new file mode 100644 index 00000000..5606ff6b --- /dev/null +++ b/build/187.json @@ -0,0 +1,72 @@ +{ + "id": "187", + "title": "Probability Addition Law: Compute P(A ∪ B)", + "difficulty": "easy", + "category": "Probability & Statistics", + "video": "", + "likes": "0", + "dislikes": "0", + "contributor": [ + { + "profile_link": "https://github.com/Jeet009", + "name": "Jeet Mukherjee" + } + ], + "tinygrad_difficulty": "easy", + "pytorch_difficulty": "easy", + "description": "## Problem\n\nTwo events `A` and `B` in a probability space have the following probabilities:\n\n- P(A) = 0.6\n- P(B) = 0.5\n- P(A ∩ B) = 0.3\n\nUsing the probability addition law, compute `P(A ∪ B)`.\n\nImplement a function `prob_union(p_a, p_b, p_intersection)` that returns `P(A ∪ B)` as a float.\n\nRecall: P(A ∪ B) = P(A) + P(B) − P(A ∩ B).\n\nNote: If `A` and `B` are mutually exclusive (disjoint), then `P(A ∩ B) = 0` and the rule simplifies to `P(A ∪ B) = P(A) + P(B)`.", + "learn_section": "## Solution Explanation\n\nThe probability addition law for any two events A and B states:\n\n$$\nP(A \\cup B) = P(A) + P(B) - P(A \\cap B)\n$$\n\n- The union counts outcomes in A or B (or both).\n- We subtract the intersection once to correct double-counting.\n\n### Mutually exclusive (disjoint) events\nIf A and B cannot occur together, then \\(P(A \\cap B) = 0\\) and the addition rule simplifies to:\n\\[\nP(A \\cup B) = P(A) + P(B)\n\\]\n\n### Plug in the given values\n\nGiven: \\(P(A)=0.6\\), \\(P(B)=0.5\\), \\(P(A \\cap B)=0.3\\)\n\n\\[\nP(A \\cup B) = 0.6 + 0.5 - 0.3 = 0.8\n\\]\n\n### Validity checks\n- Probabilities must lie in [0, 1]. The result 0.8 is valid.\n- Given inputs must satisfy: \\(0 \\le P(A \\cap B) \\le \\min\\{P(A), P(B)\\}\\) and \\(P(A \\cap B) \\ge P(A) + P(B) - 1\\). Here, 0.3 is within [0.1, 0.5], so inputs are consistent.\n\n### Implementation outline\n- Accept three floats: `p_a`, `p_b`, `p_intersection`.\n- Optionally assert basic bounds to help users catch mistakes.\n- Return `p_a + p_b - p_intersection`.", + "starter_code": "# Implement your function below.\n\ndef prob_union(p_a: float, p_b: float, p_intersection: float) -> float:\n\t\"\"\"Return P(A ∪ B) using the addition law.\n\n\tAuto-detects mutually exclusive events by treating very small P(A ∩ B) as 0.\n\n\tArguments:\n\t- p_a: P(A)\n\t- p_b: P(B)\n\t- p_intersection: P(A ∩ B)\n\n\tReturns:\n\t- float: P(A ∪ B)\n\t\"\"\"\n\t# TODO: if p_intersection is ~0, return p_a + p_b; else return p_a + p_b - p_intersection\n\traise NotImplementedError", + "solution": "def prob_union(p_a: float, p_b: float, p_intersection: float) -> float:\n\t\"\"\"Reference implementation for P(A ∪ B) with auto-detection of mutual exclusivity.\n\n\tIf p_intersection is numerically very small (≤ 1e-12), treat as 0 and\n\tuse the simplified rule P(A ∪ B) = P(A) + P(B).\n\t\"\"\"\n\tepsilon = 1e-12\n\tif p_intersection <= epsilon:\n\t\treturn p_a + p_b\n\treturn p_a + p_b - p_intersection", + "example": { + "input": "prob_union(0.6, 0.5, 0.3)", + "output": "0.8", + "reasoning": "By addition law: 0.6 + 0.5 − 0.3 = 0.8." + }, + "test_cases": [ + { + "test": "from solution import prob_union; print(prob_union(0.6, 0.5, 0.3))", + "expected_output": "0.8" + }, + { + "test": "from solution import prob_union; print(prob_union(0.2, 0.4, 0.1))", + "expected_output": "0.5" + }, + { + "test": "from solution import prob_union; print(prob_union(0.3, 0.2, 0.0))", + "expected_output": "0.5" + } + ], + "tinygrad_starter_code": "def prob_union(p_a: float, p_b: float, p_intersection: float) -> float:\n\t\"\"\"Return P(A ∪ B). Treat very small P(A ∩ B) as 0 (mutually exclusive).\"\"\"\n\traise NotImplementedError", + "tinygrad_solution": "def prob_union(p_a: float, p_b: float, p_intersection: float) -> float:\n\t\"\"\"Reference implementation for P(A ∪ B) with auto-detection (Tinygrad track).\"\"\"\n\tepsilon = 1e-12\n\tif p_intersection <= epsilon:\n\t\treturn p_a + p_b\n\treturn p_a + p_b - p_intersection", + "tinygrad_test_cases": [ + { + "test": "from solution import prob_union; print(prob_union(0.6, 0.5, 0.3))", + "expected_output": "0.8" + }, + { + "test": "from solution import prob_union; print(prob_union(0.2, 0.4, 0.1))", + "expected_output": "0.5" + }, + { + "test": "from solution import prob_union; print(prob_union(0.3, 0.2, 0.0))", + "expected_output": "0.5" + } + ], + "pytorch_starter_code": "def prob_union(p_a: float, p_b: float, p_intersection: float) -> float:\n\t\"\"\"Return P(A ∪ B). Treat very small P(A ∩ B) as 0 (mutually exclusive).\"\"\"\n\traise NotImplementedError", + "pytorch_solution": "def prob_union(p_a: float, p_b: float, p_intersection: float) -> float:\n\t\"\"\"Reference implementation for P(A ∪ B) with auto-detection (PyTorch track).\"\"\"\n\tepsilon = 1e-12\n\tif p_intersection <= epsilon:\n\t\treturn p_a + p_b\n\treturn p_a + p_b - p_intersection", + "pytorch_test_cases": [ + { + "test": "from solution import prob_union; print(prob_union(0.6, 0.5, 0.3))", + "expected_output": "0.8" + }, + { + "test": "from solution import prob_union; print(prob_union(0.2, 0.4, 0.1))", + "expected_output": "0.5" + }, + { + "test": "from solution import prob_union; print(prob_union(0.3, 0.2, 0.0))", + "expected_output": "0.5" + } + ] +} \ No newline at end of file diff --git a/questions/186_pmf_normalization_constant 2/description.md b/questions/186_pmf_normalization_constant 2/description.md deleted file mode 100644 index 7b00dfcc..00000000 --- a/questions/186_pmf_normalization_constant 2/description.md +++ /dev/null @@ -1,16 +0,0 @@ -## Problem - -A discrete random variable `X` takes values 0 through 7 with probabilities: - -- P(X=0) = 0 -- P(X=1) = K -- P(X=2) = 2K -- P(X=3) = 2K -- P(X=4) = 3K -- P(X=5) = K^2 -- P(X=6) = 2K^2 -- P(X=7) = 7K^2 + K - -Find the value of the normalization constant `K` such that the above defines a valid PMF (i.e., probabilities are non‑negative and sum to 1). - -Implement a function `find_k()` that returns `K` as a Python float. diff --git a/questions/186_pmf_normalization_constant 2/example.json b/questions/186_pmf_normalization_constant 2/example.json deleted file mode 100644 index 6d92c364..00000000 --- a/questions/186_pmf_normalization_constant 2/example.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "input": "No input; call find_k()", - "output": "0.1", - "reasoning": "From the normalization condition, 10K^2 + 9K = 1 gives K = 0.1 (non-negative root)." -} diff --git a/questions/186_pmf_normalization_constant 2/learn.md b/questions/186_pmf_normalization_constant 2/learn.md deleted file mode 100644 index a54fb92c..00000000 --- a/questions/186_pmf_normalization_constant 2/learn.md +++ /dev/null @@ -1,50 +0,0 @@ -## Learning: PMF normalization constant - -### Idea and formula -- **PMF requirement**: A probability mass function must satisfy $\sum_i p(x_i) = 1$ and $p(x_i) \ge 0$. -- **Normalization by a constant**: If probabilities are given up to a constant, determine that constant by enforcing the sum-to-1 constraint. - - If the form is $p(x_i) = K\,w_i$ with known nonnegative weights $w_i$, then - - $\sum_i p(x_i) = K \sum_i w_i = 1 \Rightarrow$ $\displaystyle K = \frac{1}{\sum_i w_i}$. - - If the given expressions involve $K$ in a more general way (e.g., both $K$ and $K^2$ terms), still enforce $\sum_i p(x_i) = 1$ and solve the resulting equation for $K$. Choose the solution that makes all probabilities nonnegative. - -### Worked example (this question) -Suppose the PMF entries are expressed in terms of K such that, when summed, the K-terms group as follows: - -- Linear in K: K + 2K + 2K + 3K + K = 9K -- Quadratic in K: K² + 2K² + 7K² = 10K² - -One concrete way to realize this is via the following table of outcomes and probabilities: - -| X | p(X) | -|----|--------------| -| x₁ | K + K² | -| x₂ | 2K + 2K² | -| x₃ | 2K | -| x₄ | 3K + 7K² | -| x₅ | K | - -These add up to $9K + 10K^2$ as required. - -Enforce the PMF constraint: - -$$ -9K + 10K^2 = 1 \;\Rightarrow\; 10K^2 + 9K - 1 = 0 -$$ - -Quadratic formula reminder: - -$$ -\text{For } aK^2 + bK + c = 0,\quad K = \frac{-b \pm \sqrt{b^2 - 4ac}}{2a}. -$$ - -Solve the quadratic: - -$$ -K = \frac{-9 \pm \sqrt{9^2 - 4\cdot 10 \cdot (-1)}}{2\cdot 10} -= \frac{-9 \pm \sqrt{121}}{20} -= \frac{-9 \pm 11}{20}. -$$ - -Feasible ($K \ge 0$) root: $\displaystyle K = \frac{2}{20} = 0.1$. - -Therefore, the normalization constant is **$K = 0.1$**. diff --git a/questions/186_pmf_normalization_constant 2/solution.py b/questions/186_pmf_normalization_constant 2/solution.py deleted file mode 100644 index 5836d3de..00000000 --- a/questions/186_pmf_normalization_constant 2/solution.py +++ /dev/null @@ -1,14 +0,0 @@ -import math - -def find_k(): - """ - Solve 10*K^2 + 9*K - 1 = 0 and return the non-negative root. - """ - a = 10.0 - b = 9.0 - c = -1.0 - discriminant = b * b - 4 * a * c - sqrt_disc = math.sqrt(discriminant) - k1 = (-b + sqrt_disc) / (2 * a) - k2 = (-b - sqrt_disc) / (2 * a) - return k1 if k1 >= 0 else k2 diff --git a/questions/186_pmf_normalization_constant 2/starter_code.py b/questions/186_pmf_normalization_constant 2/starter_code.py deleted file mode 100644 index 0df463f9..00000000 --- a/questions/186_pmf_normalization_constant 2/starter_code.py +++ /dev/null @@ -1,6 +0,0 @@ -def find_k(): - """ - Return the normalization constant K for the given PMF as a float. - """ - # TODO: Solve for K from 10*K**2 + 9*K - 1 = 0 and return the non-negative root - pass diff --git a/questions/186_pmf_normalization_constant 2/tests.json b/questions/186_pmf_normalization_constant 2/tests.json deleted file mode 100644 index d6cb3f10..00000000 --- a/questions/186_pmf_normalization_constant 2/tests.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "test": "print(round(find_k(), 10))", - "expected_output": "0.1" - }, - { - "test": "k = find_k(); s = 0 + k + 2*k + 2*k + 3*k + k**2 + 2*k**2 + (7*k**2 + k); print(round(s, 10))", - "expected_output": "1.0" - } -] diff --git a/questions/187_probability-addition-law/description.md b/questions/187_probability-addition-law/description.md new file mode 100644 index 00000000..73d60706 --- /dev/null +++ b/questions/187_probability-addition-law/description.md @@ -0,0 +1,15 @@ +## Problem + +Two events `A` and `B` in a probability space have the following probabilities: + +- P(A) = 0.6 +- P(B) = 0.5 +- P(A ∩ B) = 0.3 + +Using the probability addition law, compute `P(A ∪ B)`. + +Implement a function `prob_union(p_a, p_b, p_intersection)` that returns `P(A ∪ B)` as a float. + +Recall: P(A ∪ B) = P(A) + P(B) − P(A ∩ B). + +Note: If `A` and `B` are mutually exclusive (disjoint), then `P(A ∩ B) = 0` and the rule simplifies to `P(A ∪ B) = P(A) + P(B)`. diff --git a/questions/187_probability-addition-law/example.json b/questions/187_probability-addition-law/example.json new file mode 100644 index 00000000..7ddce763 --- /dev/null +++ b/questions/187_probability-addition-law/example.json @@ -0,0 +1,5 @@ +{ + "input": "prob_union(0.6, 0.5, 0.3)", + "output": "0.8", + "reasoning": "By addition law: 0.6 + 0.5 − 0.3 = 0.8." +} diff --git a/questions/187_probability-addition-law/learn.md b/questions/187_probability-addition-law/learn.md new file mode 100644 index 00000000..d1038b38 --- /dev/null +++ b/questions/187_probability-addition-law/learn.md @@ -0,0 +1,33 @@ +## Solution Explanation + +The probability addition law for any two events A and B states: + +$$ +P(A \cup B) = P(A) + P(B) - P(A \cap B) +$$ + +- The union counts outcomes in A or B (or both). +- We subtract the intersection once to correct double-counting. + +### Mutually exclusive (disjoint) events +If A and B cannot occur together, then \(P(A \cap B) = 0\) and the addition rule simplifies to: +\[ +P(A \cup B) = P(A) + P(B) +\] + +### Plug in the given values + +Given: \(P(A)=0.6\), \(P(B)=0.5\), \(P(A \cap B)=0.3\) + +\[ +P(A \cup B) = 0.6 + 0.5 - 0.3 = 0.8 +\] + +### Validity checks +- Probabilities must lie in [0, 1]. The result 0.8 is valid. +- Given inputs must satisfy: \(0 \le P(A \cap B) \le \min\{P(A), P(B)\}\) and \(P(A \cap B) \ge P(A) + P(B) - 1\). Here, 0.3 is within [0.1, 0.5], so inputs are consistent. + +### Implementation outline +- Accept three floats: `p_a`, `p_b`, `p_intersection`. +- Optionally assert basic bounds to help users catch mistakes. +- Return `p_a + p_b - p_intersection`. diff --git a/questions/186_pmf_normalization_constant 2/meta.json b/questions/187_probability-addition-law/meta.json similarity index 57% rename from questions/186_pmf_normalization_constant 2/meta.json rename to questions/187_probability-addition-law/meta.json index b57e0a2b..58f1fd53 100644 --- a/questions/186_pmf_normalization_constant 2/meta.json +++ b/questions/187_probability-addition-law/meta.json @@ -1,8 +1,8 @@ { - "id": "185", - "title": "Find PMF Normalization Constant", - "difficulty": "medium", - "category": "Probability & Statistics", + "id": "187", + "title": "Probability Addition Law: Compute P(A ∪ B)", + "difficulty": "easy", + "category": "Probability", "video": "", "likes": "0", "dislikes": "0", diff --git a/questions/187_probability-addition-law/solution.py b/questions/187_probability-addition-law/solution.py new file mode 100644 index 00000000..c49aeaf7 --- /dev/null +++ b/questions/187_probability-addition-law/solution.py @@ -0,0 +1,10 @@ +def prob_union(p_a: float, p_b: float, p_intersection: float) -> float: + """Reference implementation for P(A ∪ B) with auto-detection of mutual exclusivity. + + If p_intersection is numerically very small (≤ 1e-12), treat as 0 and + use the simplified rule P(A ∪ B) = P(A) + P(B). + """ + epsilon = 1e-12 + if p_intersection <= epsilon: + return p_a + p_b + return p_a + p_b - p_intersection diff --git a/questions/187_probability-addition-law/starter_code.py b/questions/187_probability-addition-law/starter_code.py new file mode 100644 index 00000000..e4f8a1de --- /dev/null +++ b/questions/187_probability-addition-law/starter_code.py @@ -0,0 +1,17 @@ +# Implement your function below. + +def prob_union(p_a: float, p_b: float, p_intersection: float) -> float: + """Return P(A ∪ B) using the addition law. + + Auto-detects mutually exclusive events by treating very small P(A ∩ B) as 0. + + Arguments: + - p_a: P(A) + - p_b: P(B) + - p_intersection: P(A ∩ B) + + Returns: + - float: P(A ∪ B) + """ + # TODO: if p_intersection is ~0, return p_a + p_b; else return p_a + p_b - p_intersection + raise NotImplementedError diff --git a/questions/187_probability-addition-law/tests.json b/questions/187_probability-addition-law/tests.json new file mode 100644 index 00000000..61e25f39 --- /dev/null +++ b/questions/187_probability-addition-law/tests.json @@ -0,0 +1,5 @@ +[ + { "test": "from solution import prob_union; print(prob_union(0.6, 0.5, 0.3))", "expected_output": "0.8" }, + { "test": "from solution import prob_union; print(prob_union(0.2, 0.4, 0.1))", "expected_output": "0.5" }, + { "test": "from solution import prob_union; print(prob_union(0.3, 0.2, 0.0))", "expected_output": "0.5" } +] From f92b0dc10a615c4d580c222e808ae2f4ab9d6082 Mon Sep 17 00:00:00 2001 From: Jeet Mukherjee Date: Sun, 12 Oct 2025 22:26:25 +0530 Subject: [PATCH 11/11] Final Commit --- .../description.md | 3 --- .../example.json | 5 ---- .../learn.md | 24 ------------------- .../meta.json | 15 ------------ .../solution.py | 14 ----------- .../starter_code.py | 7 ------ .../tests.json | 18 -------------- 7 files changed, 86 deletions(-) delete mode 100644 questions/184_empirical_probability_mass_function_(pmf)/description.md delete mode 100644 questions/184_empirical_probability_mass_function_(pmf)/example.json delete mode 100644 questions/184_empirical_probability_mass_function_(pmf)/learn.md delete mode 100644 questions/184_empirical_probability_mass_function_(pmf)/meta.json delete mode 100644 questions/184_empirical_probability_mass_function_(pmf)/solution.py delete mode 100644 questions/184_empirical_probability_mass_function_(pmf)/starter_code.py delete mode 100644 questions/184_empirical_probability_mass_function_(pmf)/tests.json diff --git a/questions/184_empirical_probability_mass_function_(pmf)/description.md b/questions/184_empirical_probability_mass_function_(pmf)/description.md deleted file mode 100644 index 9a49f526..00000000 --- a/questions/184_empirical_probability_mass_function_(pmf)/description.md +++ /dev/null @@ -1,3 +0,0 @@ -## Problem - -Given a list of integer samples drawn from a discrete distribution, implement a function to compute the empirical Probability Mass Function (PMF). The function should return a list of `(value, probability)` pairs sorted by the value in ascending order. If the input is empty, return an empty list. diff --git a/questions/184_empirical_probability_mass_function_(pmf)/example.json b/questions/184_empirical_probability_mass_function_(pmf)/example.json deleted file mode 100644 index a00d5e97..00000000 --- a/questions/184_empirical_probability_mass_function_(pmf)/example.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "input": "samples = [1, 2, 2, 3, 3, 3]", - "output": "[(1, 0.16666666666666666), (2, 0.3333333333333333), (3, 0.5)]", - "reasoning": "Counts are {1:1, 2:2, 3:3} over 6 samples, so probabilities are 1/6, 2/6, and 3/6 respectively, returned sorted by value." -} diff --git a/questions/184_empirical_probability_mass_function_(pmf)/learn.md b/questions/184_empirical_probability_mass_function_(pmf)/learn.md deleted file mode 100644 index f2017eac..00000000 --- a/questions/184_empirical_probability_mass_function_(pmf)/learn.md +++ /dev/null @@ -1,24 +0,0 @@ - -# Learn Section - -# Probability Mass Function (PMF) — Simple Explanation - -A **probability mass function (PMF)** describes how probabilities are assigned to the possible outcomes of a **discrete random variable**. - -- It tells you the chance of each specific outcome. -- Each probability is non-negative. -- The total of all probabilities adds up to 1. - -## Estimating from data -If the true probabilities are unknown, you can estimate them with an **empirical PMF**: -- Count how often each outcome appears. -- Divide by the total number of observations. - -## Example -Observed sequence: `1, 2, 2, 3, 3, 3` (6 outcomes total) -- "1" appears once → estimated probability = 1/6 -- "2" appears twice → estimated probability = 2/6 = 1/3 -- "3" appears three times → estimated probability = 3/6 = 1/2 - - - \ No newline at end of file diff --git a/questions/184_empirical_probability_mass_function_(pmf)/meta.json b/questions/184_empirical_probability_mass_function_(pmf)/meta.json deleted file mode 100644 index 21bc68aa..00000000 --- a/questions/184_empirical_probability_mass_function_(pmf)/meta.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "id": "184", - "title": "Empirical Probability Mass Function (PMF)", - "difficulty": "easy", - "category": "Probability & Statistics", - "video": "", - "likes": "0", - "dislikes": "0", - "contributor": [ - { - "profile_link": "https://github.com/Jeet009", - "name": "Jeet Mukherjee" - } - ] -} diff --git a/questions/184_empirical_probability_mass_function_(pmf)/solution.py b/questions/184_empirical_probability_mass_function_(pmf)/solution.py deleted file mode 100644 index b54775fe..00000000 --- a/questions/184_empirical_probability_mass_function_(pmf)/solution.py +++ /dev/null @@ -1,14 +0,0 @@ -from collections import Counter - -def empirical_pmf(samples): - """ - Given an iterable of integer samples, return a list of (value, probability) - pairs sorted by value ascending. - """ - samples = list(samples) - if not samples: - return [] - total = len(samples) - cnt = Counter(samples) - result = [(k, cnt[k] / total) for k in sorted(cnt.keys())] - return result \ No newline at end of file diff --git a/questions/184_empirical_probability_mass_function_(pmf)/starter_code.py b/questions/184_empirical_probability_mass_function_(pmf)/starter_code.py deleted file mode 100644 index 32b35c14..00000000 --- a/questions/184_empirical_probability_mass_function_(pmf)/starter_code.py +++ /dev/null @@ -1,7 +0,0 @@ -def empirical_pmf(samples): - """ - Given an iterable of integer samples, return a list of (value, probability) - pairs sorted by value ascending. - """ - # TODO: Implement the function - pass diff --git a/questions/184_empirical_probability_mass_function_(pmf)/tests.json b/questions/184_empirical_probability_mass_function_(pmf)/tests.json deleted file mode 100644 index d9cbb76b..00000000 --- a/questions/184_empirical_probability_mass_function_(pmf)/tests.json +++ /dev/null @@ -1,18 +0,0 @@ -[ - { - "test": "print(empirical_pmf([1, 2, 2, 3, 3, 3]))", - "expected_output": "[(1, 0.16666666666666666), (2, 0.3333333333333333), (3, 0.5)]" - }, - { - "test": "print(empirical_pmf([5, 5, 5, 5]))", - "expected_output": "[(5, 1.0)]" - }, - { - "test": "print(empirical_pmf([]))", - "expected_output": "[]" - }, - { - "test": "print(empirical_pmf([0, 0, 1, 1, 1, 2]))", - "expected_output": "[(0, 0.3333333333333333), (1, 0.5), (2, 0.16666666666666666)]" - } -]