# Let's install and import the required libraries together!
!pip install numpy statsmodels matplotlib --quiet

# Core libraries
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

# For 3D plotting
from mpl_toolkits.mplot3d import Axes3D  

# For animations in Jupyter
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

# Create the dataset

educ = np.array([10, 12, 14, 16, 18])   # x1
exper = np.array([ 1,  4,  5,  9,  7])   # x2
wage  = np.array([20, 24, 27, 31, 34])   # y

# SLR: wage on education only

# Step 1. Raw scatter plot
plt.scatter(educ, wage)
plt.title("Toy dataset: Wage vs Education (raw scatter)")
plt.xlabel("Education"); plt.ylabel("Wage")
plt.show()

# Step 2. Fit OLS line

# Add constant to X matrix (for intercept)
X_slr = sm.add_constant(educ)
# Fit the model
m_slr = sm.OLS(wage, X_slr).fit()
# Get coefficients
b0_slr, b1_slr = m_slr.params

# Step 3. Add fitted line + residuals

# Create line for plotting
xs = np.linspace(educ.min()-0.5, educ.max()+0.5, 200) # synthetic x values for line: from educ.min()-0.5 to educ.max()+0.5, 200 points.
ys = b0_slr + b1_slr*xs # corresponding y values on line (based on estimated coefficients)

plt.scatter(educ, wage, label="Data")
plt.plot(xs, ys, linewidth=2, label="OLS line")

plt.title("SLR: Wage vs Education (with residuals)")
plt.xlabel("Education"); plt.ylabel("Wage"); plt.legend(); plt.show()

# Step 3. Plot residuals (vertical red lines) into the graph we just made

# The graph we made above
plt.scatter(educ, wage, label="Data")
plt.plot(xs, ys, linewidth=2, label="OLS line")

# Predicted values and residuals
yhat_wage_slr = m_slr.predict(X_slr)
resid_slr = wage - yhat_wage_slr

mse_slr = np.mean(resid_slr**2)

# draw residuals
for i in range(len(educ)):
    plt.vlines(educ[i], yhat_wage_slr[i], wage[i], color="red", linewidth=1.5)

plt.title("SLR: Wage vs Education (with residuals)")
plt.xlabel("Education"); plt.ylabel("Wage"); plt.legend(); plt.show()

# Let's inspect more closely the estimated coefficients and the MSE
print(f"Intercept (β0): {b0_slr:.2f}")
print(f"Slope (β1): {b1_slr:.2f}")
print(f"MSE: {mse_slr:.2f}")

# Intuition: manually enter one point at a time with students
fig = plt.figure(figsize=(10,8))  # create a new figure
ax = fig.add_subplot(1, 1, 1, projection="3d")  # add a 3D subplot

# Individual  1
ax.scatter(10, 1, 20, color="royalblue", s=120, edgecolor="black")
ax.text(10+0.3, 1+0.3, 20+0.3, "Individual 1", color="royalblue")

# Individual  2
ax.scatter(12, 4, 24, color="seagreen", s=120, edgecolor="black")
ax.text(12+0.3, 4+0.3, 24+0.3, "Individual 2", color="seagreen")

# Individual  3
ax.scatter(14, 5, 27, color="darkorange", s=120, edgecolor="black")
ax.text(14+0.3, 5+0.3, 27+0.3, "Individual 3", color="darkorange")

# Individual  4
ax.scatter(16, 9, 31, color="purple", s=120, edgecolor="black")
ax.text(16+0.3, 9+0.3, 31+0.3, "Individual 4", color="purple")

# Individual  5
ax.scatter(18, 7, 34, color="crimson", s=120, edgecolor="black")
ax.text(18+0.3, 7+0.3, 34+0.3, "Individual 5", color="crimson")

# Labels and style
ax.set_xlabel("Education", labelpad=15)
ax.set_ylabel("Experience", labelpad=15)
ax.text(
    x=min(educ)-1, 
    y=min(exper)-1, 
    z=max(wage)+1,
    s="Wage", fontsize=11, rotation=90, color="black"
)

ax.view_init(elev=20, azim=120)

# Keep axis ranges consistent with padding
ax.set_xlim(min(educ)-1, max(educ)+1)
ax.set_ylim(min(exper)-1, max(exper)+1)
ax.set_zlim(min(wage)-1, max(wage)+1)

plt.subplots_adjust(left=0.1, right=0.95, top=0.9, bottom=0.1)
plt.show()

# Auto-rotate the exact 3D graph you built
import numpy as np
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

fig = plt.figure(figsize=(10,8))
ax = fig.add_subplot(1, 1, 1, projection="3d")

# --- Same points/labels as your static plot ---
ax.scatter(10, 1, 20, color="royalblue", s=120, edgecolor="black"); ax.text(10+0.3, 1+0.3, 20+0.3, "Individual 1", color="royalblue")
ax.scatter(12, 4, 24, color="seagreen", s=120, edgecolor="black");  ax.text(12+0.3, 4+0.3, 24+0.3, "Individual 2", color="seagreen")
ax.scatter(14, 5, 27, color="darkorange", s=120, edgecolor="black"); ax.text(14+0.3, 5+0.3, 27+0.3, "Individual 3", color="darkorange")
ax.scatter(16, 9, 31, color="purple", s=120, edgecolor="black");     ax.text(16+0.3, 9+0.3, 31+0.3, "Individual 4", color="purple")
ax.scatter(18, 7, 34, color="crimson", s=120, edgecolor="black");    ax.text(18+0.3, 7+0.3, 34+0.3, "Individual 5", color="crimson")

# Axes labels (with manual z-label placement)
ax.set_xlabel("Education", labelpad=15)
ax.set_ylabel("Experience", labelpad=15)
ax.text(x=min(educ)-1, y=min(exper)-1, z=max(wage)+1, s="Wage", fontsize=11, rotation=90, color="black")

# Ranges & layout exactly as before
ax.set_xlim(min(educ)-1, max(educ)+1)
ax.set_ylim(min(exper)-1, max(exper)+1)
ax.set_zlim(min(wage)-1, max(wage)+1)
ax.view_init(elev=20, azim=120)
plt.subplots_adjust(left=0.1, right=0.95, top=0.9, bottom=0.1)

# --- Animation: spin around azimuth ---
def update(angle):
    ax.view_init(elev=20, azim=angle)
    return ()

anim = FuncAnimation(fig, update, frames=np.linspace(0, 360, 181), interval=50, blit=False)

plt.close(fig) 
HTML(anim.to_jshtml())  # shows the animation inline in Jupyter

# MLR: wage on education and experience (3D plane + residual segments)
X_mlr = sm.add_constant(np.column_stack([educ, exper]))  # add constant & combine regressors
m_mlr = sm.OLS(wage, X_mlr).fit()
b0, b1, b2 = m_mlr.params

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(1, 1, 1, projection="3d")

# Data points
ax.scatter(educ, exper, wage, label="Data")

# Mesh for the fitted plane
E, K = np.meshgrid(
    np.linspace(educ.min()-0.5, educ.max()+0.5, 20),
    np.linspace(exper.min()-0.5, exper.max()+0.5, 20)
)
W = b0 + b1*E + b2*K
ax.plot_surface(E, K, W, alpha=0.35)

# Residual "vertical" segments
yhat3 = b0 + b1*educ + b2*exper
for i in range(len(educ)):
    ax.plot([educ[i], educ[i]], [exper[i], exper[i]], [yhat3[i], wage[i]], linewidth=1.5)

# Labels, ranges, view
ax.set_title("MLR: Fit a plane to 5 points (with residual segments)")
ax.set_xlabel("Education", labelpad=12)
ax.set_ylabel("Experience", labelpad=12)
ax.text(
    x=min(educ)-1, 
    y=min(exper)-1, 
    z=max(wage)+1,
    s="Wage", fontsize=11, rotation=90, color="black"
)
ax.set_xlim(educ.min()-1, educ.max()+1)
ax.set_ylim(exper.min()-1, exper.max()+1)
ax.set_zlim(wage.min()-1, wage.max()+1)
ax.view_init(elev=20, azim=120)

plt.show()

# Pretty print coefficients and fit quality
print(f"Intercept (β̂0): {b0:.2f}")
print(f"Educ slope (β̂1): {b1:.2f}")
print(f"Exper slope (β̂2): {b2:.2f}")

# Rotate the fitted plane + points to view from all angles (with ground projections)
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(1, 1, 1, projection="3d")

# Re-draw the same plot
ax.scatter(educ, exper, wage, label="Data")
E, K = np.meshgrid(
    np.linspace(educ.min()-0.5, educ.max()+0.5, 20),
    np.linspace(exper.min()-0.5, exper.max()+0.5, 20)
)
W = b0 + b1*E + b2*K
ax.plot_surface(E, K, W, alpha=0.35, shade=False)  # shade=False to avoid warnings

yhat3 = b0 + b1*educ + b2*exper
for i in range(len(educ)):
    ax.plot([educ[i], educ[i]], [exper[i], exper[i]], [yhat3[i], wage[i]], linewidth=1.5)

# --- Prediction at (educ*, exper*) and ground projections ---
educ_star  = 15    # <-- change live in class
exper_star = 6     # <-- change live in class
yhat_star  = b0 + b1*educ_star + b2*exper_star

# point on the plane
ax.scatter([educ_star], [exper_star], [yhat_star],
           s=120, color="crimson", edgecolor="black", label="Predicted ŷ on plane")

# choose a ground z (bottom of z-limits) for projections
z0 = wage.min() - 1

# vertical dotted line from ground up to the plane
ax.plot([educ_star, educ_star], [exper_star, exper_star],
        [z0, yhat_star], linestyle=":", linewidth=2, color="black")

# ground projection lines along axes (z = z0)
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()

# from experience axis to (educ*, exper*) on ground (move along education at fixed exper=exper*)
ax.plot([xmax, educ_star], [exper_star, exper_star],
        [z0, z0], linestyle=":", linewidth=1.8, color="gray")

# from education axis to (educ*, exper*) on ground (move along experience at fixed educ=educ*)
ax.plot([educ_star, educ_star], [ymax, exper_star],
        [z0, z0], linestyle=":", linewidth=1.8, color="gray")

# optional: mark the ground base point
ax.scatter([educ_star], [exper_star], [z0], s=50, color="gray")

# annotate predicted value
ax.text(educ_star+0.4, exper_star+0.4, yhat_star+0.4,
        f"ŷ = {yhat_star:.2f}", color="crimson")

# Labels, ranges, view
ax.set_xlabel("Education", labelpad=12)
ax.set_ylabel("Experience", labelpad=12)
ax.set_zlabel("Wage", labelpad=18)
ax.set_xlim(educ.min()-1, educ.max()+1)
ax.set_ylim(exper.min()-1, exper.max()+1)
ax.set_zlim(wage.min()-1, wage.max()+1)
ax.view_init(elev=20, azim=120)

def update(angle):
    ax.view_init(elev=20, azim=angle)
    return ()

anim = FuncAnimation(fig, update, frames=np.linspace(0, 360, 181), interval=50, blit=False)

plt.close(fig)  # hide static frame
HTML(anim.to_jshtml())

# Slice the plane at exper = 6, and rotate to view from all angles

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(1, 1, 1, projection="3d")

# Data and fitted plane
ax.scatter(educ, exper, wage, label="Data")
E, K = np.meshgrid(
    np.linspace(educ.min()-0.5, educ.max()+0.5, 20),
    np.linspace(exper.min()-0.5, exper.max()+0.5, 20)
)
W = b0 + b1*E + b2*K
ax.plot_surface(E, K, W, alpha=0.35, shade=False)

# --- Slice settings ---
exper_slice = 6.0  # ← choose the fixed experience level for the slice (e.g., median or 6)
z0, z1 = wage.min() - 1, wage.max() + 1
x0, x1 = educ.min() - 1, educ.max() + 1

# Draw a translucent vertical panel at exper = exper_slice (to visualize "holding exper fixed")
Xp, Zp = np.meshgrid(np.linspace(x0, x1, 2), np.linspace(z0, z1, 2))
Yp = np.full_like(Xp, exper_slice)
ax.plot_surface(Xp, Yp, Zp, alpha=0.10, color="gray", edgecolor="none")  # slice panel

# Intersection line: where the OLS plane meets the slice panel
x_line = np.linspace(educ.min()-0.5, educ.max()+0.5, 200)
y_line = np.full_like(x_line, exper_slice)
z_line = b0 + b1*x_line + b2*exper_slice
ax.plot(x_line, y_line, z_line, linewidth=3, color="crimson", label=f"Slice @ exper={exper_slice:g}")

# Optional: label the slice line
ax.text(x_line[-1], exper_slice, z_line[-1], "  slice (partial effect of educ)", color="crimson")

# Residuals (keep or comment out if you want cleaner view)
yhat3 = b0 + b1*educ + b2*exper
for i in range(len(educ)):
    ax.plot([educ[i], educ[i]], [exper[i], exper[i]], [yhat3[i], wage[i]], linewidth=1.2, color="black")

# Axes, limits, view
ax.set_xlabel("Education", labelpad=12)
ax.set_ylabel("Experience", labelpad=12)
ax.set_zlabel("Wage", labelpad=18)
ax.set_xlim(educ.min()-1, educ.max()+1)
ax.set_ylim(exper.min()-1, exper.max()+1)
ax.set_zlim(wage.min()-1, wage.max()+1)
ax.view_init(elev=20, azim=120)

def update(angle):
    ax.view_init(elev=20, azim=angle)
    return ()

anim = FuncAnimation(fig, update, frames=np.linspace(0, 360, 181), interval=50, blit=False)

plt.close(fig)  # hide static frame
HTML(anim.to_jshtml())

# Slice the plane at a fixed experience level, show the implied line
exper_slice = 6.0
xs2 = np.linspace(educ.min()-0.5, educ.max()+0.5, 200)
w_slice = b0 + b1*xs2 + b2*exper_slice

plt.scatter(educ, wage, label="Data", color="blue")
plt.plot(xs2, w_slice, color="crimson", linewidth=2,
         label=f"Slice @ exper = {exper_slice:g}")

plt.title("Slice of the plane ⇒ line (partial effect of education)")
plt.xlabel("Education")
plt.ylabel("Wage")
plt.legend()
plt.show()

print("Partial effect of education (β̂1) =", round(b1, 6))

Lecture 4: OLS Estimator for Multiple Linear Regression (MLR) (Completed version)¶

🌱 Warm-up – Connecting to Last Week¶

📦 Required libraries¶

🎯 Tiny Toy Dataset (5 points) — used in all sections¶

1) Simple OLS in 2D: draw a line through a scatter of points¶

📊 Interpretation of coefficients¶

2) Multiple regression in 3D: fit a plane through points in space¶

📊 Interpretation of the MLR fit (5-point dataset)¶

🔮 Reading a prediction from the regression plane¶

3) Slicing the plane ⇒ a line: reading a partial effect¶

🔎 Comparing MLR vs SLR¶

References & Acknowledgments¶