!pip install -q numpy pandas statsmodels scipy matplotlib 

import numpy as np, pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: pip install --upgrade pip

# Exaggerated funnel: y vs "income"-like x with variance that grows sharply
n_plot = 350
x_plot = np.linspace(0.5, 6.0, n_plot)
sigma_plot = 0.10 + 0.70 * x_plot          # exaggerated spread
y_true = 1.0 + 0.8 * x_plot
y_plot = y_true + np.random.normal(scale=sigma_plot, size=n_plot)

plt.figure(figsize=(6,4))
plt.scatter(x_plot, y_plot, s=12, alpha=0.6, label="data")
plt.plot(x_plot, y_true, label="true mean")
plt.title("Exaggerated funnel: residual spread rises with income")
plt.xlabel("income-like x"); plt.ylabel("y")
plt.legend(frameon=False); plt.show()

# --- DGP: income-style funnel (variance rises with x1) ---
seed = 40
rng = np.random.default_rng(seed)
n = 180

# x1 positive → clearer funnel in residuals vs x1
x1 = rng.normal(size=n)
x2 = rng.normal(size=n)
x3 = rng.normal(size=n)

# Heteroskedasticity: variance increases with x1 (megaphone/funnel)
sigma_i = 0.5 + 1 * np.abs(x1)
u = rng.normal(scale=sigma_i, size=n)

beta1 = 0.0  # H0 true to illustrate over-rejection (classical)
y = 1.0 + beta1*x1 + 0.8*x2 - 0.5*x3 + u

X = sm.add_constant(pd.DataFrame({"x1": x1, "x2": x2, "x3": x3}))

model       = sm.OLS(y, X)
ols_classic = model.fit()                 # classical (assumes constant spread)
ols_hc0     = model.fit(cov_type="HC0")   # robust (White/Huber/Eicker), z-based p-values

# Compact coefficient tables for quick lecture read
print("=== Classical (assumes constant spread) ===")
print(ols_classic.summary().tables[1])

print("\n=== Robust (HC0, z-based) ===")
print(ols_hc0.summary().tables[1])

=== Classical (assumes constant spread) ===
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9107      0.118      7.702      0.000       0.677       1.144
x1             0.2684      0.119      2.254      0.025       0.033       0.503
x2             0.7932      0.108      7.360      0.000       0.580       1.006
x3            -0.5469      0.117     -4.664      0.000      -0.778      -0.315
==============================================================================

=== Robust (HC0, z-based) ===
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.9107      0.119      7.628      0.000       0.677       1.145
x1             0.2684      0.172      1.560      0.119      -0.069       0.606
x2             0.7932      0.115      6.873      0.000       0.567       1.019
x3            -0.5469      0.101     -5.416      0.000      -0.745      -0.349
==============================================================================

# Clear funnel diagnostic: residuals vs x1
plt.figure(figsize=(6,4))
plt.scatter(x1, ols_classic.resid, s=12, alpha=0.6)
plt.axhline(0, ls="--")
plt.title("Residuals vs x1 — funnel: spread rises with x1 (by design)")
plt.xlabel("x1"); plt.ylabel("Residuals")
plt.show()

from statsmodels.stats import diagnostic as sm_diagnostic

bp_lm, bp_lm_p, _, _       = sm_diagnostic.het_breuschpagan(ols_classic.resid, X)
white_lm, white_lm_p, _, _ = sm_diagnostic.het_white(ols_classic.resid, X)

print({"BP_p": float(bp_lm_p), "White_p": float(white_lm_p)})

alpha = 0.05
print(f"Decision @ {alpha*100:.0f}%: "+ (
      "Reject homoskedasticity (variance not constant)"
      if (bp_lm_p < alpha or white_lm_p < alpha)
      else "No strong evidence against homoskedasticity"))

{'BP_p': 0.0517526213407861, 'White_p': 1.3035076700422066e-06}
Decision @ 5%: Reject homoskedasticity (variance not constant)

ECON320 Week 10 — Heteroskedasticity (Completed version)¶

📦 Required libraries¶

Part A. Intuition¶

🔄 A1) Quick recall: t-tests under MLR.6 and constant spread¶

🧠 A2) Bridge: What if the spread isn’t constant (fan/funnel shape)?¶

👀 A3) Tiny visual: what does it mean by funnel shape (income story)¶

Part B. Experiment¶

B1) Quick demo: OLS when variance grows with $|x_1|$¶

B2) Diagnostic: residuals vs $x_1$¶

B3) BP & White tests — what they do¶

Takeaways¶

References & Acknowledgments¶