# Let's install and import the required libraries together!
!pip install wooldridge pandas numpy matplotlib seaborn

import wooldridge as wr
import matplotlib.pyplot as plt
import seaborn as sns

Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: wooldridge in /home/kayan/.local/lib/python3.10/site-packages (0.4.5)
Requirement already satisfied: pandas in /home/kayan/.local/lib/python3.10/site-packages (2.2.1)
Requirement already satisfied: numpy in /home/kayan/.local/lib/python3.10/site-packages (1.26.4)
Requirement already satisfied: matplotlib in /home/kayan/.local/lib/python3.10/site-packages (3.8.3)
Requirement already satisfied: seaborn in /home/kayan/.local/lib/python3.10/site-packages (0.13.2)
Requirement already satisfied: python-dateutil>=2.8.2 in /home/kayan/.local/lib/python3.10/site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas) (2022.1)
Requirement already satisfied: tzdata>=2022.7 in /home/kayan/.local/lib/python3.10/site-packages (from pandas) (2024.1)
Requirement already satisfied: contourpy>=1.0.1 in /home/kayan/.local/lib/python3.10/site-packages (from matplotlib) (1.2.0)
Requirement already satisfied: cycler>=0.10 in /home/kayan/.local/lib/python3.10/site-packages (from matplotlib) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /home/kayan/.local/lib/python3.10/site-packages (from matplotlib) (4.49.0)
Requirement already satisfied: kiwisolver>=1.3.1 in /home/kayan/.local/lib/python3.10/site-packages (from matplotlib) (1.4.5)
Requirement already satisfied: packaging>=20.0 in /home/kayan/.local/lib/python3.10/site-packages (from matplotlib) (24.0)
Requirement already satisfied: pillow>=8 in /home/kayan/.local/lib/python3.10/site-packages (from matplotlib) (11.1.0)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/lib/python3/dist-packages (from matplotlib) (2.4.7)
Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)

[notice] A new release of pip is available: 25.0.1 -> 25.2
[notice] To update, run: pip install --upgrade pip

import wooldridge as wr

# Load the dataset
df = wr.data("<dataset_name>")

# Preview the first few rows
df.head()

# Look at the summary of a specific dataset

# For example, let's use the "campus" dataset from Wooldridge's collection
# 
wr.data('campus', description=True)

name of dataset: campus
no of variables: 7
no of observations: 97

+----------+-----------------------+
| variable | label                 |
+----------+-----------------------+
| enroll   | total enrollment      |
| priv     | =1 if private college |
| police   | employed officers     |
| crime    | total campus crimes   |
| lcrime   | log(crime)            |
| lenroll  | log(enroll)           |
| lpolice  | log(police)           |
+----------+-----------------------+

These data were collected by Daniel Martin, a former MSU
undergraduate, for a final project. They come from the FBI Uniform
Crime Reports and are for the year 1992.

# Load the dataset campus

campus_ts_df_raw = wr.data('campus') 
campus_ts_df_raw.head()  # Preview the first few rows of the dataset

# Let's clean up the data a bit and focus on the variable(s) we are interested in.
# For this example, let's focus on 'police'(number of employed police officers) and 'crime' (total campus crimes)

campus_ts_df= campus_ts_df_raw[['police', 'crime']]

# Compute the descriptive statistics for the selected variables

campus_ts_df.describe().T

# Plot the scatter plot to visualize the relationship between police and crime
plt.figure(figsize=(10, 6))
sns.scatterplot(data=campus_ts_df, x='police', y='crime')
plt.title('Scatter Plot of Police vs Crime')
plt.xlabel('Number of Police Officers')
plt.ylabel('Total Campus Crimes')
plt.grid(True)
plt.show()

# Scatter plot of police officers vs crime, with a best-fit line
sns.lmplot(x="police", y="crime", data=campus_ts_df,
           scatter_kws={"alpha":0.6}, line_kws={"color":"red"})

plt.title("Preview: Police Officers vs Crime (with Best-Fit Line)")
plt.xlabel("Number of Police Officers")
plt.ylabel("Crime Rate")
plt.show()

	enroll	police	crime	lcrime	lenroll	lpolice
0	21836.0	24	446	6.100319	9.991315	3.178054
1	6485.0	13	1	0.000000	8.777247	2.564949
2	2123.0	3	1	0.000000	7.660585	1.098612
3	8240.0	17	121	4.795791	9.016756	2.833213
4	19793.0	30	470	6.152733	9.893084	3.401197

	count	mean	std	min	25%	50%	75%	max
police	97.0	20.494845	15.630581	1.0	9.0	16.0	27.0	74.0
crime	97.0	394.453608	460.783866	1.0	85.0	187.0	491.0	2052.0

Lecture 2: Understanding and Presenting Data (Completed version)¶

📦 Required libraries¶

Part A — Major Types of Data in Econometrics 🔍¶

Part B — Identify Data Types & Using the Wooldridge Econometrics Datasets in Python¶

📥 Example: Import a dataset using `wooldridge`¶

Ex. Campus Crime Data¶

Step 0. Inspecting the dataset summary¶

Step 2. Loading the data¶

Step 3. Cleaning the data¶

Step 4. Descriptive statistics¶

Step 5. Visualizing the data¶

Preview: Adding a Best-Fit Line¶

⚠️ Correlation does not equal Causation 🧠🧠🧠¶

Part C — More on: Choosing the Right Visualization 🎨¶

Common Goals and Good Plot Choices¶

🛠️ How to Make These Plots¶

References & Acknowledgments¶

Lecture 2: Understanding and Presenting Data (Completed version)¶

📦 Required libraries¶

Part A — Major Types of Data in Econometrics 🔍¶

Part B — Identify Data Types & Using the Wooldridge Econometrics Datasets in Python¶

📥 Example: Import a dataset using wooldridge¶

Ex. Campus Crime Data¶

Step 0. Inspecting the dataset summary¶

Step 2. Loading the data¶

Step 3. Cleaning the data¶

Step 4. Descriptive statistics¶

Step 5. Visualizing the data¶

Preview: Adding a Best-Fit Line¶

⚠️ Correlation does not equal Causation 🧠🧠🧠¶

Part C — More on: Choosing the Right Visualization 🎨¶

Common Goals and Good Plot Choices¶

🛠️ How to Make These Plots¶

References & Acknowledgments¶

📥 Example: Import a dataset using `wooldridge`¶