{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7ed1961d-290f-48a8-8f80-56f49f10578c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from scipy import stats\n",
    "import pandas as pd\n",
    "\n",
    "# Set random seed for reproducibility\n",
    "np.random.seed(42)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e1a4e246-4045-473b-bcfa-f467084ef6db",
   "metadata": {},
   "source": [
    "# PART 1: P-VALUES THROUGH SIMULATION"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41644323-6bd7-4416-adf0-4a8b66916b9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================\n",
    "# HELPER FUNCTIONS (PROVIDED)\n",
    "# ============================================\n",
    "\n",
    "def plot_pvalue_histogram(p_values, title=\"Distribution of P-values\"):\n",
    "    \"\"\"Visualize p-value distribution with uniform reference\"\"\"\n",
    "    plt.figure(figsize=(10, 6))\n",
    "    plt.hist(p_values, bins=20, density=True, alpha=0.7, edgecolor='black')\n",
    "    plt.axhline(y=1, color='red', linestyle='--', label='Uniform distribution')\n",
    "    plt.xlabel('P-value')\n",
    "    plt.ylabel('Density')\n",
    "    plt.title(title)\n",
    "    plt.legend()\n",
    "    plt.grid(alpha=0.3)\n",
    "    plt.show()\n",
    "\n",
    "def plot_test_statistics(test_stats, title=\"Distribution of Test Statistics\"):\n",
    "    \"\"\"Visualize test statistic distribution\"\"\"\n",
    "    plt.figure(figsize=(10, 6))\n",
    "    plt.hist(test_stats, bins=30, density=True, alpha=0.7, edgecolor='black')\n",
    "    plt.xlabel('Test Statistic')\n",
    "    plt.ylabel('Density')\n",
    "    plt.title(title)\n",
    "    plt.grid(alpha=0.3)\n",
    "    plt.show()\n",
    "\n",
    "def generate_samples(dist_type='normal', n_samples=30, param1=0, param2=1):\n",
    "    \"\"\"\n",
    "    Generate samples from various distributions\n",
    "    \n",
    "    Parameters:\n",
    "    - dist_type: 'normal', 'lognormal', 't', 'exponential', 'poisson'\n",
    "    - n_samples: number of samples to generate\n",
    "    - param1, param2: distribution parameters (meaning varies by distribution)\n",
    "    \"\"\"\n",
    "    if dist_type == 'normal':\n",
    "        return np.random.normal(param1, param2, n_samples)\n",
    "    elif dist_type == 'lognormal':\n",
    "        return np.random.lognormal(param1, param2, n_samples)\n",
    "    elif dist_type == 't':\n",
    "        return param1 + param2 * np.random.standard_t(df=3, size=n_samples)\n",
    "    elif dist_type == 'exponential':\n",
    "        return np.random.exponential(1/param1, n_samples) if param1 > 0 else np.random.exponential(1, n_samples)\n",
    "    elif dist_type == 'poisson':\n",
    "        return np.random.poisson(param1 if param1 > 0 else 5, n_samples)\n",
    "    else:\n",
    "        raise ValueError(f\"Unknown distribution type: {dist_type}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e289eeb4-feee-4f5c-952c-4c4e762192cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's explore what happens to p-values under the null hypothesis\n",
    "# We'll use a one-sample t-test comparing to a known mean\n",
    "\n",
    "# First, let's see a single example\n",
    "sample_data = generate_samples('normal', n_samples=30, param1=0, param2=1)\n",
    "print(f\"Sample mean: {np.mean(sample_data):.3f}\")\n",
    "print(f\"Sample std: {np.std(sample_data, ddof=1):.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3fcc730a-b455-4ed3-b905-5e187146cf9a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 1: P-values under the null hypothesis\n",
    "print(\"\\n--- Activity 1: P-value distribution when null is TRUE ---\")\n",
    "print(\"We'll test if mean = 0 (which is true for our generator)\")\n",
    "\n",
    "# TODO: Generate many experiments and compute p-values\n",
    "n_experiments = 1000\n",
    "p_values_null_true = []\n",
    "\n",
    "for i in range(n_experiments):\n",
    "    # TODO: Generate a sample from normal distribution with mean=0\n",
    "    sample = None  # FILL THIS IN\n",
    "    \n",
    "    # TODO: Perform one-sample t-test against null hypothesis mean=0\n",
    "    # Hint: use stats.ttest_1samp(sample, popmean=0)\n",
    "    # Extract the p-value from the result\n",
    "    p_value = None  # FILL THIS IN\n",
    "    \n",
    "    p_values_null_true.append(p_value)\n",
    "\n",
    "# Visualize the results\n",
    "plot_pvalue_histogram(p_values_null_true, \"P-value Distribution (Null is TRUE)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "73357c26-6240-4213-970c-75b90daebd22",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 2: P-values when null is FALSE\n",
    "print(\"\\n--- Activity 2: P-value distribution when null is FALSE ---\")\n",
    "print(\"Now let's test if mean = 0 when true mean is 0.5\")\n",
    "\n",
    "# TODO: Repeat the experiment but with samples from distribution with mean=0.5\n",
    "effect_size = 0.5\n",
    "p_values_null_false = []\n",
    "\n",
    "for i in range(n_experiments):\n",
    "    # TODO: Generate sample with mean=effect_size instead of 0\n",
    "    sample = None  # FILL THIS IN\n",
    "    \n",
    "    # TODO: Still test against null hypothesis mean=0\n",
    "    p_value = None  # FILL THIS IN\n",
    "    \n",
    "    p_values_null_false.append(p_value)\n",
    "\n",
    "plot_pvalue_histogram(p_values_null_false, f\"P-value Distribution (True mean = {effect_size})\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40e98218-ab96-4138-8e88-dc598f712144",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 3: Effect size impact\n",
    "print(\"\\n--- Activity 3: How effect size impacts p-value distribution ---\")\n",
    "\n",
    "# TODO: Try different effect sizes and see how p-value distribution changes\n",
    "effect_sizes = [0, 0.2, 0.5, 0.8]\n",
    "p_value_results = {}\n",
    "\n",
    "for effect in effect_sizes:\n",
    "    p_values = []\n",
    "    for i in range(n_experiments):\n",
    "        # TODO: Generate samples and compute p-values for each effect size\n",
    "        sample = None  # FILL THIS IN\n",
    "        p_value = None  # FILL THIS IN\n",
    "        p_values.append(p_value)\n",
    "    \n",
    "    p_value_results[effect] = p_values\n",
    "\n",
    "# Visualize all effect sizes together\n",
    "fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n",
    "axes = axes.ravel()\n",
    "\n",
    "for idx, effect in enumerate(effect_sizes):\n",
    "    axes[idx].hist(p_value_results[effect], bins=20, density=True, alpha=0.7, edgecolor='black')\n",
    "    axes[idx].axhline(y=1, color='red', linestyle='--', alpha=0.5)\n",
    "    axes[idx].set_title(f'Effect Size = {effect}')\n",
    "    axes[idx].set_xlabel('P-value')\n",
    "    axes[idx].set_ylabel('Density')\n",
    "    axes[idx].grid(alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "116cc1b1-32d4-4bd9-b498-0a54ab6e47dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 4: Non-normal data\n",
    "print(\"\\n--- Activity 4: What happens with non-normal data? ---\")\n",
    "print(\"Let's apply t-test to lognormal data\")\n",
    "\n",
    "# TODO: Generate p-values using lognormal data but still using t-test\n",
    "p_values_lognormal = []\n",
    "\n",
    "for i in range(n_experiments):\n",
    "    # TODO: Generate lognormal sample with meanlog=0, sdlog=0.5\n",
    "    # Test if the mean equals exp(0 + 0.5²/2) ≈ 1.133\n",
    "    sample = None  # FILL THIS IN\n",
    "    \n",
    "    # The true mean of lognormal(0, 0.5) is about 1.133\n",
    "    # TODO: Test against this true mean\n",
    "    true_mean = np.exp(0 + 0.5**2/2)\n",
    "    p_value = None  # FILL THIS IN\n",
    "    \n",
    "    p_values_lognormal.append(p_value)\n",
    "\n",
    "plot_pvalue_histogram(p_values_lognormal, \"P-values: T-test on Lognormal Data (Null is TRUE)\")\n",
    "\n",
    "# Key insight question\n",
    "print(\"\\n Think about it: Why might the p-value distribution not be uniform\")\n",
    "print(\"   even when we're testing the TRUE mean of the lognormal distribution?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "37a20ad4-5ee7-40a0-b1ad-440cfd4d0d49",
   "metadata": {},
   "source": [
    "# PART 2: CONFIDENCE INTERVALS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54178cb7-bb39-47ca-836e-ce4670e45e58",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================\n",
    "# HELPER FUNCTIONS (PROVIDED)\n",
    "# ============================================\n",
    "\n",
    "def plot_confidence_intervals(intervals, true_value, n_show=100, title=\"Confidence Intervals\"):\n",
    "    \"\"\"Visualize multiple confidence intervals and their coverage\"\"\"\n",
    "    plt.figure(figsize=(12, 8))\n",
    "    \n",
    "    # Show only first n_show intervals for clarity\n",
    "    intervals_to_show = intervals[:n_show]\n",
    "    \n",
    "    for i, (lower, upper) in enumerate(intervals_to_show):\n",
    "        color = 'green' if lower <= true_value <= upper else 'red'\n",
    "        plt.plot([i, i], [lower, upper], color=color, alpha=0.7, linewidth=1)\n",
    "    \n",
    "    plt.axhline(y=true_value, color='blue', linestyle='--', linewidth=2, label=f'True value = {true_value}')\n",
    "    plt.xlabel('Experiment number')\n",
    "    plt.ylabel('Parameter value')\n",
    "    plt.title(title)\n",
    "    plt.legend()\n",
    "    plt.grid(alpha=0.3, axis='y')\n",
    "    plt.show()\n",
    "    \n",
    "    # Calculate and print coverage\n",
    "    coverage = sum(1 for lower, upper in intervals if lower <= true_value <= upper) / len(intervals)\n",
    "    print(f\"Empirical coverage: {coverage:.3f} ({coverage*100:.1f}%)\")\n",
    "\n",
    "def plot_ci_width_distribution(intervals, title=\"Distribution of CI Widths\"):\n",
    "    \"\"\"Visualize the distribution of confidence interval widths\"\"\"\n",
    "    widths = [upper - lower for lower, upper in intervals]\n",
    "    plt.figure(figsize=(10, 6))\n",
    "    plt.hist(widths, bins=30, density=True, alpha=0.7, edgecolor='black')\n",
    "    plt.xlabel('CI Width')\n",
    "    plt.ylabel('Density')\n",
    "    plt.title(title)\n",
    "    plt.axvline(np.mean(widths), color='red', linestyle='--', label=f'Mean width = {np.mean(widths):.3f}')\n",
    "    plt.legend()\n",
    "    plt.grid(alpha=0.3)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5a0e3ac-b5a0-48fe-a486-416839e8863b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 1: Calculate a single confidence interval\n",
    "print(\"\\n--- Activity 1: Traditional Confidence Interval ---\")\n",
    "print(\"Calculate 95% CI for the mean of a normal distribution\")\n",
    "\n",
    "# Generate one sample\n",
    "sample = generate_samples('normal', n_samples=30, param1=10, param2=2)\n",
    "print(f\"Sample mean: {np.mean(sample):.3f}\")\n",
    "print(f\"Sample std: {np.std(sample, ddof=1):.3f}\")\n",
    "\n",
    "# TODO: Calculate 95% confidence interval for the mean\n",
    "# Hint: For normal data, use t-distribution\n",
    "sample_mean = None  # FILL THIS IN\n",
    "sample_std = None  # FILL THIS IN (remember ddof=1)\n",
    "n = len(sample)\n",
    "standard_error = None  # FILL THIS IN: sample_std / sqrt(n)\n",
    "\n",
    "# TODO: Find critical t-value for 95% CI\n",
    "# Hint: use stats.t.ppf() with appropriate df\n",
    "confidence_level = 0.95\n",
    "alpha = None  # FILL THIS IN: 1 - confidence_level\n",
    "t_critical = None  # FILL THIS IN: stats.t.ppf(?, df=?)\n",
    "\n",
    "# TODO: Calculate confidence interval\n",
    "margin_of_error = None  # FILL THIS IN: t_critical * standard_error\n",
    "ci_lower = None  # FILL THIS IN\n",
    "ci_upper = None  # FILL THIS IN\n",
    "\n",
    "print(f\"\\n95% Confidence Interval: [{ci_lower:.3f}, {ci_upper:.3f}]\")\n",
    "print(f\"Width: {ci_upper - ci_lower:.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee1484dd-5fc2-47b0-a19c-f8f69b22a72f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 2: Coverage simulation\n",
    "print(\"\\n--- Activity 2: What does '95% confidence' really mean? ---\")\n",
    "print(\"Generate many CIs and see how many contain the true parameter\")\n",
    "\n",
    "# TODO: Simulate confidence interval coverage\n",
    "n_simulations = 1000\n",
    "true_mean = 10\n",
    "true_std = 2\n",
    "confidence_intervals = []\n",
    "\n",
    "for i in range(n_simulations):\n",
    "    # TODO: Generate a sample\n",
    "    sample = None  # FILL THIS IN\n",
    "    \n",
    "    # TODO: Calculate CI for this sample (repeat the process from Activity 1)\n",
    "    sample_mean = None  # FILL THIS IN\n",
    "    sample_std = None  # FILL THIS IN\n",
    "    n = len(sample)\n",
    "    standard_error = None  # FILL THIS IN\n",
    "    \n",
    "    # Using same confidence level as before\n",
    "    t_critical = stats.t.ppf(1 - alpha/2, df=n-1)\n",
    "    margin_of_error = t_critical * standard_error\n",
    "    \n",
    "    ci_lower = None  # FILL THIS IN\n",
    "    ci_upper = None  # FILL THIS IN\n",
    "    \n",
    "    confidence_intervals.append((ci_lower, ci_upper))\n",
    "\n",
    "# Visualize the confidence intervals\n",
    "plot_confidence_intervals(confidence_intervals, true_mean, \n",
    "                         title=\"95% Confidence Intervals (100 shown)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed9018e4-c058-464b-b02a-81eb9664c25e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 3: Sample size effect\n",
    "print(\"\\n--- Activity 3: How sample size affects confidence intervals ---\")\n",
    "\n",
    "# TODO: Compare CI widths for different sample sizes\n",
    "sample_sizes = [10, 30, 100, 300]\n",
    "ci_results = {}\n",
    "\n",
    "for n_samples in sample_sizes:\n",
    "    intervals = []\n",
    "    \n",
    "    for i in range(500):  # Fewer simulations for speed\n",
    "        # TODO: Generate sample of size n_samples\n",
    "        sample = None  # FILL THIS IN\n",
    "        \n",
    "        # TODO: Calculate CI\n",
    "        sample_mean = None  # FILL THIS IN\n",
    "        sample_std = None  # FILL THIS IN\n",
    "        standard_error = None  # FILL THIS IN\n",
    "        \n",
    "        t_critical = stats.t.ppf(0.975, df=n_samples-1)  # 95% CI\n",
    "        margin_of_error = t_critical * standard_error\n",
    "        \n",
    "        ci_lower = sample_mean - margin_of_error\n",
    "        ci_upper = sample_mean + margin_of_error\n",
    "        \n",
    "        intervals.append((ci_lower, ci_upper))\n",
    "    \n",
    "    ci_results[n_samples] = intervals\n",
    "\n",
    "# Visualize CI widths for different sample sizes\n",
    "fig, axes = plt.subplots(2, 2, figsize=(12, 10))\n",
    "axes = axes.ravel()\n",
    "\n",
    "for idx, n_samples in enumerate(sample_sizes):\n",
    "    widths = [upper - lower for lower, upper in ci_results[n_samples]]\n",
    "    axes[idx].hist(widths, bins=30, density=True, alpha=0.7, edgecolor='black')\n",
    "    axes[idx].set_title(f'n = {n_samples} (mean width = {np.mean(widths):.3f})')\n",
    "    axes[idx].set_xlabel('CI Width')\n",
    "    axes[idx].set_ylabel('Density')\n",
    "    axes[idx].grid(alpha=0.3)\n",
    "\n",
    "plt.suptitle('CI Width Distribution by Sample Size')\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d13809c2-481d-4bd6-b552-f6febc3c97d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 4: Non-normal distributions\n",
    "print(\"\\n--- Activity 4: CI coverage for non-normal distributions ---\")\n",
    "print(\"Do traditional CIs maintain 95% coverage for non-normal data?\")\n",
    "\n",
    "# TODO: Test CI coverage for different distributions\n",
    "distributions = ['normal', 'lognormal', 'exponential', 't']\n",
    "coverage_results = {}\n",
    "\n",
    "for dist_type in distributions:\n",
    "    confidence_intervals = []\n",
    "    \n",
    "    # Set true mean based on distribution\n",
    "    if dist_type == 'normal':\n",
    "        true_mean = 5\n",
    "    elif dist_type == 'lognormal':\n",
    "        true_mean = np.exp(0 + 0.5**2/2)  # mean of lognormal(0, 0.5)\n",
    "    elif dist_type == 'exponential':\n",
    "        true_mean = 1  # mean of exponential(1)\n",
    "    elif dist_type == 't':\n",
    "        true_mean = 0  # location parameter\n",
    "    \n",
    "    for i in range(1000):\n",
    "        # TODO: Generate sample from current distribution\n",
    "        if dist_type == 'normal':\n",
    "            sample = generate_samples('normal', n_samples=30, param1=5, param2=2)\n",
    "        elif dist_type == 'lognormal':\n",
    "            sample = None  # FILL THIS IN: use generate_samples()\n",
    "        elif dist_type == 'exponential':\n",
    "            sample = None  # FILL THIS IN\n",
    "        elif dist_type == 't':\n",
    "            sample = None  # FILL THIS IN\n",
    "        \n",
    "        # TODO: Calculate traditional CI (same formula as before)\n",
    "        sample_mean = None  # FILL THIS IN\n",
    "        sample_std = None  # FILL THIS IN\n",
    "        n = len(sample)\n",
    "        standard_error = None  # FILL THIS IN\n",
    "        \n",
    "        t_critical = stats.t.ppf(0.975, df=n-1)\n",
    "        margin_of_error = t_critical * standard_error\n",
    "        \n",
    "        ci_lower = sample_mean - margin_of_error\n",
    "        ci_upper = sample_mean + margin_of_error\n",
    "        \n",
    "        confidence_intervals.append((ci_lower, ci_upper))\n",
    "    \n",
    "    # Calculate coverage\n",
    "    coverage = sum(1 for lower, upper in confidence_intervals \n",
    "                  if lower <= true_mean <= upper) / len(confidence_intervals)\n",
    "    coverage_results[dist_type] = coverage\n",
    "\n",
    "# Display coverage results\n",
    "print(\"\\nCoverage Results (should be ~0.95 for all):\")\n",
    "print(\"-\" * 40)\n",
    "for dist, coverage in coverage_results.items():\n",
    "    print(f\"{dist:12s}: {coverage:.3f} ({coverage*100:.1f}%)\")\n",
    "\n",
    "print(\"\\n Think about it: Why might coverage be poor for some distributions?\")\n",
    "print(\"   How does this relate to the assumptions of the t-based CI?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "de82e98c-af44-4d82-8318-b9cf72ca64dc",
   "metadata": {},
   "source": [
    "# PART 3: BOOTSTRAP METHODS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "521130a8-c6de-434e-9f67-c5dd818cf161",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================\n",
    "# HELPER FUNCTIONS (PROVIDED)\n",
    "# ============================================\n",
    "\n",
    "def generate_samples(dist_type='normal', n_samples=30, param1=0, param2=1):\n",
    "    \"\"\"Generate samples from various distributions\"\"\"\n",
    "    if dist_type == 'normal':\n",
    "        return np.random.normal(param1, param2, n_samples)\n",
    "    elif dist_type == 'lognormal':\n",
    "        return np.random.lognormal(param1, param2, n_samples)\n",
    "    elif dist_type == 't':\n",
    "        return param1 + param2 * np.random.standard_t(df=3, size=n_samples)\n",
    "    elif dist_type == 'exponential':\n",
    "        return np.random.exponential(1/param1, n_samples) if param1 > 0 else np.random.exponential(1, n_samples)\n",
    "    elif dist_type == 'poisson':\n",
    "        return np.random.poisson(param1 if param1 > 0 else 5, n_samples)\n",
    "\n",
    "def plot_bootstrap_distribution(bootstrap_stats, observed_stat, ci_lower, ci_upper, \n",
    "                              title=\"Bootstrap Distribution\"):\n",
    "    \"\"\"Visualize bootstrap distribution with CI\"\"\"\n",
    "    plt.figure(figsize=(10, 6))\n",
    "    plt.hist(bootstrap_stats, bins=50, density=True, alpha=0.7, edgecolor='black')\n",
    "    plt.axvline(observed_stat, color='red', linestyle='-', linewidth=2, \n",
    "                label=f'Observed = {observed_stat:.3f}')\n",
    "    plt.axvline(ci_lower, color='green', linestyle='--', linewidth=2, \n",
    "                label=f'CI lower = {ci_lower:.3f}')\n",
    "    plt.axvline(ci_upper, color='green', linestyle='--', linewidth=2, \n",
    "                label=f'CI upper = {ci_upper:.3f}')\n",
    "    plt.xlabel('Statistic Value')\n",
    "    plt.ylabel('Density')\n",
    "    plt.title(title)\n",
    "    plt.legend()\n",
    "    plt.grid(alpha=0.3)\n",
    "    plt.show()\n",
    "\n",
    "def compare_ci_methods(traditional_cis, bootstrap_cis, true_value, title=\"CI Comparison\"):\n",
    "    \"\"\"Compare traditional and bootstrap CI coverage\"\"\"\n",
    "    plt.figure(figsize=(12, 8))\n",
    "    \n",
    "    n_show = min(50, len(traditional_cis))\n",
    "    \n",
    "    # Plot traditional CIs\n",
    "    for i, (lower, upper) in enumerate(traditional_cis[:n_show]):\n",
    "        color = 'green' if lower <= true_value <= upper else 'red'\n",
    "        plt.plot([i-0.2, i-0.2], [lower, upper], color=color, alpha=0.5, linewidth=2, \n",
    "                label='Traditional' if i == 0 else '')\n",
    "    \n",
    "    # Plot bootstrap CIs\n",
    "    for i, (lower, upper) in enumerate(bootstrap_cis[:n_show]):\n",
    "        color = 'blue' if lower <= true_value <= upper else 'orange'\n",
    "        plt.plot([i+0.2, i+0.2], [lower, upper], color=color, alpha=0.5, linewidth=2,\n",
    "                label='Bootstrap' if i == 0 else '')\n",
    "    \n",
    "    plt.axhline(y=true_value, color='black', linestyle='--', linewidth=2, \n",
    "                label=f'True value = {true_value:.3f}')\n",
    "    plt.xlabel('Experiment number')\n",
    "    plt.ylabel('Parameter value')\n",
    "    plt.title(title)\n",
    "    plt.legend()\n",
    "    plt.grid(alpha=0.3, axis='y')\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "875a236c-e90f-4b71-9682-e8a8524b949b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 1: Your first bootstrap\n",
    "print(\"\\n--- Activity 1: Implementing Bootstrap for the Mean ---\")\n",
    "print(\"Bootstrap doesn't assume any particular distribution!\")\n",
    "\n",
    "# Generate a sample from a skewed distribution\n",
    "original_sample = generate_samples('lognormal', n_samples=50, param1=0, param2=0.8)\n",
    "print(f\"Original sample size: {len(original_sample)}\")\n",
    "print(f\"Sample mean: {np.mean(original_sample):.3f}\")\n",
    "print(f\"Sample median: {np.median(original_sample):.3f}\")\n",
    "\n",
    "# Visualize the original sample\n",
    "plt.figure(figsize=(10, 6))\n",
    "plt.hist(original_sample, bins=20, density=True, alpha=0.7, edgecolor='black')\n",
    "plt.title(\"Original Sample Distribution (Lognormal)\")\n",
    "plt.xlabel(\"Value\")\n",
    "plt.ylabel(\"Density\")\n",
    "plt.grid(alpha=0.3)\n",
    "plt.show()\n",
    "\n",
    "# TODO: Implement bootstrap for the mean\n",
    "n_bootstrap = 5000\n",
    "bootstrap_means = []\n",
    "\n",
    "for i in range(n_bootstrap):\n",
    "    # TODO: Resample with replacement from original_sample\n",
    "    # Hint: np.random.choice with replace=True\n",
    "    bootstrap_sample = None  # FILL THIS IN\n",
    "    \n",
    "    # TODO: Calculate statistic (mean) on bootstrap sample\n",
    "    bootstrap_mean = None  # FILL THIS IN\n",
    "    \n",
    "    bootstrap_means.append(bootstrap_mean)\n",
    "\n",
    "# TODO: Calculate percentile bootstrap confidence interval\n",
    "# For 95% CI, use 2.5th and 97.5th percentiles\n",
    "confidence_level = 0.95\n",
    "alpha = 1 - confidence_level\n",
    "\n",
    "# TODO: Find the percentiles\n",
    "# Hint: use np.percentile()\n",
    "ci_lower = None  # FILL THIS IN: lower percentile of bootstrap_means\n",
    "ci_upper = None  # FILL THIS IN: upper percentile of bootstrap_means\n",
    "\n",
    "print(f\"\\nBootstrap 95% CI for mean: [{ci_lower:.3f}, {ci_upper:.3f}]\")\n",
    "print(f\"CI width: {ci_upper - ci_lower:.3f}\")\n",
    "\n",
    "# Visualize bootstrap distribution\n",
    "plot_bootstrap_distribution(bootstrap_means, np.mean(original_sample), \n",
    "                          ci_lower, ci_upper, \"Bootstrap Distribution of Mean\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "acacc57f-70b3-43c0-8b4f-76a621cb7cbd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 2: Bootstrap vs Traditional CI\n",
    "print(\"\\n--- Activity 2: Comparing Bootstrap to Traditional CI ---\")\n",
    "print(\"Let's see how they differ for the same data\")\n",
    "\n",
    "# TODO: Calculate traditional CI for comparison\n",
    "sample_mean = np.mean(original_sample)\n",
    "sample_std = None  # FILL THIS IN\n",
    "n = len(original_sample)\n",
    "standard_error = None  # FILL THIS IN\n",
    "\n",
    "t_critical = stats.t.ppf(0.975, df=n-1)\n",
    "margin_of_error = t_critical * standard_error\n",
    "\n",
    "trad_ci_lower = None  # FILL THIS IN\n",
    "trad_ci_upper = None  # FILL THIS IN\n",
    "\n",
    "print(f\"\\nTraditional 95% CI: [{trad_ci_lower:.3f}, {trad_ci_upper:.3f}]\")\n",
    "print(f\"Bootstrap 95% CI:   [{ci_lower:.3f}, {ci_upper:.3f}]\")\n",
    "print(f\"\\nDifference in lower bound: {abs(trad_ci_lower - ci_lower):.3f}\")\n",
    "print(f\"Difference in upper bound: {abs(trad_ci_upper - ci_upper):.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe15bab1-4b46-429b-920f-e857df47d073",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 3: Bootstrap for complex statistics\n",
    "print(\"\\n--- Activity 3: Bootstrap for statistics without formulas ---\")\n",
    "print(\"What if we want a CI for the median? Or the IQR?\")\n",
    "\n",
    "# TODO: Bootstrap for different statistics\n",
    "statistics_to_estimate = {\n",
    "    'mean': np.mean,\n",
    "    'median': np.median,\n",
    "    'iqr': lambda x: np.percentile(x, 75) - np.percentile(x, 25),\n",
    "    'trimmed_mean_10%': lambda x: stats.trim_mean(x, 0.1)\n",
    "}\n",
    "\n",
    "bootstrap_results = {}\n",
    "\n",
    "for stat_name, stat_func in statistics_to_estimate.items():\n",
    "    bootstrap_values = []\n",
    "    \n",
    "    for i in range(n_bootstrap):\n",
    "        # TODO: Generate bootstrap sample\n",
    "        bootstrap_sample = None  # FILL THIS IN\n",
    "        \n",
    "        # TODO: Calculate statistic\n",
    "        bootstrap_stat = None  # FILL THIS IN: use stat_func\n",
    "        \n",
    "        bootstrap_values.append(bootstrap_stat)\n",
    "    \n",
    "    # TODO: Calculate CI\n",
    "    ci_lower = None  # FILL THIS IN\n",
    "    ci_upper = None  # FILL THIS IN\n",
    "    \n",
    "    observed = stat_func(original_sample)\n",
    "    bootstrap_results[stat_name] = {\n",
    "        'observed': observed,\n",
    "        'ci_lower': ci_lower,\n",
    "        'ci_upper': ci_upper,\n",
    "        'ci_width': ci_upper - ci_lower\n",
    "    }\n",
    "\n",
    "# Display results\n",
    "print(\"\\nBootstrap CIs for Various Statistics:\")\n",
    "print(\"-\" * 60)\n",
    "print(f\"{'Statistic':15s} {'Observed':>10s} {'95% CI':>20s} {'Width':>10s}\")\n",
    "print(\"-\" * 60)\n",
    "for stat_name, results in bootstrap_results.items():\n",
    "    ci_str = f\"[{results['ci_lower']:.3f}, {results['ci_upper']:.3f}]\"\n",
    "    print(f\"{stat_name:15s} {results['observed']:10.3f} {ci_str:>20s} {results['ci_width']:10.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f151ad7a-6a64-4c12-b198-5654a72cbcce",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Activity 4: Coverage comparison across distributions\n",
    "print(\"\\n--- Activity 4: Bootstrap coverage for different distributions ---\")\n",
    "print(\"Does bootstrap maintain 95% coverage across distributions?\")\n",
    "\n",
    "# TODO: Test bootstrap coverage for different distributions\n",
    "distributions = ['normal', 'lognormal', 'exponential', 't']\n",
    "coverage_results = {'traditional': {}, 'bootstrap': {}}\n",
    "\n",
    "for dist_type in distributions:\n",
    "    trad_cis = []\n",
    "    boot_cis = []\n",
    "    \n",
    "    # Set true mean based on distribution\n",
    "    if dist_type == 'normal':\n",
    "        true_mean = 5\n",
    "    elif dist_type == 'lognormal':\n",
    "        true_mean = np.exp(0 + 0.5**2/2)\n",
    "    elif dist_type == 'exponential':\n",
    "        true_mean = 1\n",
    "    elif dist_type == 't':\n",
    "        true_mean = 0\n",
    "    \n",
    "    # Run fewer simulations for speed\n",
    "    for i in range(200):\n",
    "        # Generate sample\n",
    "        if dist_type == 'normal':\n",
    "            sample = generate_samples('normal', n_samples=30, param1=5, param2=2)\n",
    "        elif dist_type == 'lognormal':\n",
    "            sample = generate_samples('lognormal', n_samples=30, param1=0, param2=0.5)\n",
    "        elif dist_type == 'exponential':\n",
    "            sample = generate_samples('exponential', n_samples=30, param1=1)\n",
    "        elif dist_type == 't':\n",
    "            sample = generate_samples('t', n_samples=30, param1=0, param2=1)\n",
    "        \n",
    "        # Traditional CI\n",
    "        sample_mean = np.mean(sample)\n",
    "        sample_std = np.std(sample, ddof=1)\n",
    "        se = sample_std / np.sqrt(len(sample))\n",
    "        t_crit = stats.t.ppf(0.975, df=len(sample)-1)\n",
    "        trad_ci = (sample_mean - t_crit * se, sample_mean + t_crit * se)\n",
    "        trad_cis.append(trad_ci)\n",
    "        \n",
    "        # TODO: Bootstrap CI\n",
    "        boot_means = []\n",
    "        for j in range(1000):  # Fewer bootstrap samples for speed\n",
    "            boot_sample = None  # FILL THIS IN\n",
    "            boot_means.append(np.mean(boot_sample))\n",
    "        \n",
    "        boot_ci = (None, None)  # FILL THIS IN: calculate percentile CI\n",
    "        boot_cis.append(boot_ci)\n",
    "    \n",
    "    # Calculate coverage\n",
    "    trad_coverage = sum(1 for lower, upper in trad_cis \n",
    "                       if lower <= true_mean <= upper) / len(trad_cis)\n",
    "    boot_coverage = sum(1 for lower, upper in boot_cis \n",
    "                       if lower <= true_mean <= upper) / len(boot_cis)\n",
    "    \n",
    "    coverage_results['traditional'][dist_type] = trad_coverage\n",
    "    coverage_results['bootstrap'][dist_type] = boot_coverage\n",
    "\n",
    "# Display coverage comparison\n",
    "print(\"\\nCoverage Comparison (should be ~0.95 for all):\")\n",
    "print(\"-\" * 50)\n",
    "print(f\"{'Distribution':12s} {'Traditional':>15s} {'Bootstrap':>15s}\")\n",
    "print(\"-\" * 50)\n",
    "for dist in distributions:\n",
    "    trad = coverage_results['traditional'][dist]\n",
    "    boot = coverage_results['bootstrap'][dist]\n",
    "    print(f\"{dist:12s} {trad:>14.3f} {boot:>14.3f}\")\n",
    "\n",
    "print(\"\\n🤔 Think about it: Bootstrap adapts to the actual data distribution\")\n",
    "print(\"   rather than assuming normality. When is this most valuable?\")\n",
    "\n",
    "# Final visualization: Bootstrap on highly skewed data\n",
    "print(\"\\n--- Bonus: Bootstrap shines with skewed data ---\")\n",
    "skewed_sample = generate_samples('exponential', n_samples=40, param1=2)\n",
    "\n",
    "# Show how bootstrap CI adapts to skewness\n",
    "bootstrap_means = []\n",
    "for i in range(5000):\n",
    "    boot_sample = np.random.choice(skewed_sample, size=len(skewed_sample), replace=True)\n",
    "    bootstrap_means.append(np.mean(boot_sample))\n",
    "\n",
    "boot_ci_lower = np.percentile(bootstrap_means, 2.5)\n",
    "boot_ci_upper = np.percentile(bootstrap_means, 97.5)\n",
    "\n",
    "# Traditional CI\n",
    "trad_mean = np.mean(skewed_sample)\n",
    "trad_se = np.std(skewed_sample, ddof=1) / np.sqrt(len(skewed_sample))\n",
    "t_crit = stats.t.ppf(0.975, df=len(skewed_sample)-1)\n",
    "trad_ci_lower = trad_mean - t_crit * trad_se\n",
    "trad_ci_upper = trad_mean + t_crit * trad_se\n",
    "\n",
    "plt.figure(figsize=(12, 5))\n",
    "\n",
    "# Original data\n",
    "plt.subplot(1, 2, 1)\n",
    "plt.hist(skewed_sample, bins=20, density=True, alpha=0.7, edgecolor='black')\n",
    "plt.title(\"Original Exponential Sample\")\n",
    "plt.xlabel(\"Value\")\n",
    "plt.ylabel(\"Density\")\n",
    "\n",
    "# Bootstrap distribution with both CIs\n",
    "plt.subplot(1, 2, 2)\n",
    "plt.hist(bootstrap_means, bins=50, density=True, alpha=0.7, edgecolor='black')\n",
    "plt.axvline(trad_mean, color='red', linestyle='-', linewidth=2, label='Sample mean')\n",
    "plt.axvspan(boot_ci_lower, boot_ci_upper, alpha=0.2, color='green', label='Bootstrap CI')\n",
    "plt.axvspan(trad_ci_lower, trad_ci_upper, alpha=0.2, color='blue', label='Traditional CI')\n",
    "plt.title(\"Bootstrap Distribution with CIs\")\n",
    "plt.xlabel(\"Mean\")\n",
    "plt.ylabel(\"Density\")\n",
    "plt.legend()\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "print(f\"\\nFor exponential data:\")\n",
    "print(f\"Traditional CI: [{trad_ci_lower:.3f}, {trad_ci_upper:.3f}] (symmetric)\")\n",
    "print(f\"Bootstrap CI:   [{boot_ci_lower:.3f}, {boot_ci_upper:.3f}] (adapts to skewness)\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b197b537-1cdb-4097-b938-0f36e4612556",
   "metadata": {},
   "source": [
    "# PART 4: EXTRA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "820adba2-f390-4b69-aa3d-d0ffe1f813c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ============================================\n",
    "# HELPER FUNCTIONS (PROVIDED)\n",
    "# ============================================\n",
    "\n",
    "def generate_samples(dist_type='normal', n_samples=30, param1=0, param2=1):\n",
    "    \"\"\"Generate samples from various distributions\"\"\"\n",
    "    if dist_type == 'normal':\n",
    "        return np.random.normal(param1, param2, n_samples)\n",
    "    elif dist_type == 'lognormal':\n",
    "        return np.random.lognormal(param1, param2, n_samples)\n",
    "    elif dist_type == 't':\n",
    "        return param1 + param2 * np.random.standard_t(df=3, size=n_samples)\n",
    "    elif dist_type == 'exponential':\n",
    "        return np.random.exponential(1/param1, n_samples) if param1 > 0 else np.random.exponential(1, n_samples)\n",
    "    elif dist_type == 'poisson':\n",
    "        return np.random.poisson(param1 if param1 > 0 else 5, n_samples)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6317f6f-6ae2-4f06-91b0-a2bc6d617f83",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Scenario: Analyzing a new dataset\n",
    "print(\"\\n--- Scenario: Analyzing reaction times ---\")\n",
    "print(\"You've collected reaction time data and want to:\")\n",
    "print(\"1. Test if mean reaction time differs from 250ms\")\n",
    "print(\"2. Estimate the mean with confidence interval\")\n",
    "print(\"3. Compare methods on this real-world type of data\")\n",
    "\n",
    "# Reaction times are often right-skewed (lognormal-ish)\n",
    "reaction_times = generate_samples('lognormal', n_samples=45, param1=5.4, param2=0.3)\n",
    "print(f\"\\nSample size: {len(reaction_times)}\")\n",
    "print(f\"Sample mean: {np.mean(reaction_times):.1f}ms\")\n",
    "print(f\"Sample median: {np.median(reaction_times):.1f}ms\")\n",
    "print(f\"Sample std: {np.std(reaction_times, ddof=1):.1f}ms\")\n",
    "\n",
    "# Visualize the data\n",
    "plt.figure(figsize=(10, 6))\n",
    "plt.hist(reaction_times, bins=20, density=True, alpha=0.7, edgecolor='black')\n",
    "plt.xlabel('Reaction Time (ms)')\n",
    "plt.ylabel('Density')\n",
    "plt.title('Reaction Time Distribution')\n",
    "plt.axvline(np.mean(reaction_times), color='red', linestyle='--', \n",
    "            label=f'Mean = {np.mean(reaction_times):.1f}ms')\n",
    "plt.axvline(np.median(reaction_times), color='green', linestyle='--', \n",
    "            label=f'Median = {np.median(reaction_times):.1f}ms')\n",
    "plt.legend()\n",
    "plt.grid(alpha=0.3)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7bb550b3-0564-452a-8b84-32cdccabffd9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO: Task 1 - Hypothesis test (H0: μ = 250ms)\n",
    "print(\"\\n--- Task 1: Hypothesis Test ---\")\n",
    "null_mean = 250\n",
    "\n",
    "# TODO: Perform t-test\n",
    "t_statistic, p_value = None, None  # FILL THIS IN: use stats.ttest_1samp()\n",
    "print(f\"T-statistic: {t_statistic:.3f}\")\n",
    "print(f\"P-value: {p_value:.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "45459ed3-f1d3-418c-b6af-4fea6149529d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO: Task 2 - Traditional confidence interval\n",
    "print(\"\\n--- Task 2: Traditional 95% CI ---\")\n",
    "sample_mean = None  # FILL THIS IN\n",
    "sample_std = None  # FILL THIS IN\n",
    "n = len(reaction_times)\n",
    "standard_error = None  # FILL THIS IN\n",
    "\n",
    "t_critical = stats.t.ppf(0.975, df=n-1)\n",
    "margin_of_error = t_critical * standard_error\n",
    "\n",
    "trad_ci_lower = None  # FILL THIS IN\n",
    "trad_ci_upper = None  # FILL THIS IN\n",
    "\n",
    "print(f\"Traditional 95% CI: [{trad_ci_lower:.1f}, {trad_ci_upper:.1f}]ms\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca6a957c-ad93-4ea1-87c7-be7f617e0f14",
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO: Task 3 - Bootstrap confidence interval\n",
    "print(\"\\n--- Task 3: Bootstrap 95% CI ---\")\n",
    "n_bootstrap = 5000\n",
    "bootstrap_means = []\n",
    "\n",
    "for i in range(n_bootstrap):\n",
    "    # TODO: Generate bootstrap sample and calculate mean\n",
    "    bootstrap_sample = None  # FILL THIS IN\n",
    "    bootstrap_mean = None  # FILL THIS IN\n",
    "    bootstrap_means.append(bootstrap_mean)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95968349-7860-4a12-9856-1196327f71e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO: Calculate bootstrap CI\n",
    "boot_ci_lower = None  # FILL THIS IN: 2.5th percentile\n",
    "boot_ci_upper = None  # FILL THIS IN: 97.5th percentile\n",
    "\n",
    "print(f\"Bootstrap 95% CI: [{boot_ci_lower:.1f}, {boot_ci_upper:.1f}]ms\")\n",
    "\n",
    "# Visualize comparison\n",
    "plt.figure(figsize=(12, 6))\n",
    "\n",
    "# Bootstrap distribution\n",
    "plt.subplot(1, 2, 1)\n",
    "plt.hist(bootstrap_means, bins=50, density=True, alpha=0.7, edgecolor='black')\n",
    "plt.axvline(sample_mean, color='red', linestyle='-', linewidth=2, label='Sample mean')\n",
    "plt.axvspan(boot_ci_lower, boot_ci_upper, alpha=0.2, color='green', label='Bootstrap CI')\n",
    "plt.axvspan(trad_ci_lower, trad_ci_upper, alpha=0.2, color='blue', label='Traditional CI')\n",
    "plt.xlabel('Mean Reaction Time (ms)')\n",
    "plt.ylabel('Density')\n",
    "plt.title('Bootstrap Distribution')\n",
    "plt.legend()\n",
    "plt.grid(alpha=0.3)\n",
    "\n",
    "# CI comparison\n",
    "plt.subplot(1, 2, 2)\n",
    "methods = ['Traditional', 'Bootstrap']\n",
    "lower_bounds = [trad_ci_lower, boot_ci_lower]\n",
    "upper_bounds = [trad_ci_upper, boot_ci_upper]\n",
    "centers = [sample_mean, sample_mean]\n",
    "\n",
    "for i, method in enumerate(methods):\n",
    "    plt.plot([i, i], [lower_bounds[i], upper_bounds[i]], linewidth=4, \n",
    "             label=method, marker='o', markersize=8)\n",
    "    plt.plot(i, centers[i], 'ko', markersize=10)\n",
    "\n",
    "plt.axhline(null_mean, color='red', linestyle='--', label='Null hypothesis (250ms)')\n",
    "plt.xticks(range(len(methods)), methods)\n",
    "plt.ylabel('Reaction Time (ms)')\n",
    "plt.title('Confidence Interval Comparison')\n",
    "plt.legend()\n",
    "plt.grid(alpha=0.3, axis='y')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Summary insights\n",
    "print(\"\\n--- Summary Insights ---\")\n",
    "print(f\"1. P-value ({p_value:.3f}) suggests we {'reject' if p_value < 0.05 else 'fail to reject'} H0: μ = 250ms\")\n",
    "print(f\"2. Traditional CI assumes normality - may not be ideal for skewed data\")\n",
    "print(f\"3. Bootstrap CI: [{boot_ci_lower:.1f}, {boot_ci_upper:.1f}]ms\")\n",
    "print(f\"   - Adapts to actual data distribution\")\n",
    "print(f\"   - Often asymmetric for skewed data\")\n",
    "print(f\"   - Width: {boot_ci_upper - boot_ci_lower:.1f}ms\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "732eb559-c5a0-4840-adeb-591fa0dc3cf1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO: Bonus exploration - What about median reaction time?\n",
    "print(\"\\n--- Bonus: Inference for the Median ---\")\n",
    "print(\"What if we're more interested in the median reaction time?\")\n",
    "\n",
    "# TODO: Bootstrap CI for median\n",
    "bootstrap_medians = []\n",
    "\n",
    "for i in range(n_bootstrap):\n",
    "    bootstrap_sample = None  # FILL THIS IN\n",
    "    bootstrap_median = None  # FILL THIS IN: calculate median\n",
    "    bootstrap_medians.append(bootstrap_median)\n",
    "\n",
    "median_ci_lower = None  # FILL THIS IN\n",
    "median_ci_upper = None  # FILL THIS IN\n",
    "\n",
    "print(f\"\\nSample median: {np.median(reaction_times):.1f}ms\")\n",
    "print(f\"Bootstrap 95% CI for median: [{median_ci_lower:.1f}, {median_ci_upper:.1f}]ms\")\n",
    "\n",
    "# Key takeaways\n",
    "print(\"\\n\" + \"=\"*60)\n",
    "print(\"KEY TAKEAWAYS:\")\n",
    "print(\"=\"*60)\n",
    "print(\"1. P-values: Uniform under H0, skewed when H0 is false\")\n",
    "print(\"2. Traditional CIs: Work well for normal data, assume symmetry\")\n",
    "print(\"3. Bootstrap: Distribution-free, works for any statistic\")\n",
    "print(\"4. Always visualize your data before choosing methods!\")\n",
    "print(\"5. Bootstrap is especially valuable for:\")\n",
    "print(\"   - Skewed distributions\")\n",
    "print(\"   - Small samples\")\n",
    "print(\"   - Complex statistics without known formulas\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}