DSAN 5650: Causal Inference for Computational Social Science
Summer 2025, Georgetown University
Wednesday, July 9, 2025
Today’s Planned Schedule:
Start | End | Topic | |
---|---|---|---|
Lecture | 6:30pm | 7:00pm | Final Project Pep Talk → |
7:00pm | 7:50pm | Controlling For Things vs. Matching/Weighting → | |
Break! | 7:50pm | 8:00pm | |
8:00pm | 9:00pm | Propensity Score Lab → |
\[ \DeclareMathOperator*{\argmax}{argmax} \DeclareMathOperator*{\argmin}{argmin} \newcommand{\bigexp}[1]{\exp\mkern-4mu\left[ #1 \right]} \newcommand{\bigexpect}[1]{\mathbb{E}\mkern-4mu \left[ #1 \right]} \newcommand{\definedas}{\overset{\small\text{def}}{=}} \newcommand{\definedalign}{\overset{\phantom{\text{defn}}}{=}} \newcommand{\eqeventual}{\overset{\text{eventually}}{=}} \newcommand{\Err}{\text{Err}} \newcommand{\expect}[1]{\mathbb{E}[#1]} \newcommand{\expectsq}[1]{\mathbb{E}^2[#1]} \newcommand{\fw}[1]{\texttt{#1}} \newcommand{\given}{\mid} \newcommand{\green}[1]{\color{green}{#1}} \newcommand{\heads}{\outcome{heads}} \newcommand{\iid}{\overset{\text{\small{iid}}}{\sim}} \newcommand{\lik}{\mathcal{L}} \newcommand{\loglik}{\ell} \DeclareMathOperator*{\maximize}{maximize} \DeclareMathOperator*{\minimize}{minimize} \newcommand{\mle}{\textsf{ML}} \newcommand{\nimplies}{\;\not\!\!\!\!\implies} \newcommand{\orange}[1]{\color{orange}{#1}} \newcommand{\outcome}[1]{\textsf{#1}} \newcommand{\param}[1]{{\color{purple} #1}} \newcommand{\pgsamplespace}{\{\green{1},\green{2},\green{3},\purp{4},\purp{5},\purp{6}\}} \newcommand{\pedge}[2]{\require{enclose}\enclose{circle}{~{#1}~} \rightarrow \; \enclose{circle}{\kern.01em {#2}~\kern.01em}} \newcommand{\pnode}[1]{\require{enclose}\enclose{circle}{\kern.1em {#1} \kern.1em}} \newcommand{\ponode}[1]{\require{enclose}\enclose{box}[background=lightgray]{{#1}}} \newcommand{\pnodesp}[1]{\require{enclose}\enclose{circle}{~{#1}~}} \newcommand{\purp}[1]{\color{purple}{#1}} \newcommand{\sign}{\text{Sign}} \newcommand{\spacecap}{\; \cap \;} \newcommand{\spacewedge}{\; \wedge \;} \newcommand{\tails}{\outcome{tails}} \newcommand{\Var}[1]{\text{Var}[#1]} \newcommand{\bigVar}[1]{\text{Var}\mkern-4mu \left[ #1 \right]} \]
intervention
in the dataset: a seminar on growth mindset for high school studentsachievement_score
in the dataset: performance on state’s standardized test\[ \mathbb{E}[Y \mid T = 1] - \mathbb{E}[Y \mid T = 0] \]
intervention | mean_score |
---|---|
0 | -0.1538030 |
1 | 0.3184686 |
term | estimate |
---|---|
(Intercept) | -0.1538030 |
intervention | 0.4722717 |
library(tidyverse)
library(Rlab)
set.seed(5650)
n <- 250
motiv_vals <- runif(n, 0, 1)
enroll_vals <- ifelse(
motiv_vals < 0.25,
0,
# We know motiv > 0.25
ifelse(
motiv_vals > 0.75,
1,
# We know 0.25 < motiv < 0.75
rbern(n, prob=(motiv_vals - 0.125)*1.5)
)
)
ncigs_vals <- rbinom(n, size=30, prob=0.6-0.2*enroll_vals)
smoke_df <- tibble(
motiv=motiv_vals,
enroll=enroll_vals,
ncigs=ncigs_vals
)
(smoke_mean_df <- smoke_df |> group_by(enroll) |> summarize(mean_ncigs=mean(ncigs)))
enroll | mean_ncigs |
---|---|
0 | 17.88060 |
1 | 12.26724 |
term | estimate |
---|---|
(Intercept) | 17.880597 |
enroll | -5.613356 |
eprop_model <- glm(enroll ~ motiv, family='binomial', data=smoke_df)
eprop_preds <- predict(eprop_model, type="response")
smoke_df <- smoke_df |> mutate(pred=eprop_preds)
# Use the preds to compute IPW
smoke_df <- smoke_df |> rowwise() |> mutate(
ipw=ifelse(enroll, 1/pred, 1/(1-pred))
) |> arrange(pred)
#smoke_df
smoke_df |> mutate(enroll=factor(enroll)) |>
ggplot(aes(x=motiv, y=ncigs, color=enroll)) +
geom_point() +
theme_dsan(base_size=24) +
labs(title="Before Weighting")
smoke_df |>
ggplot(aes(x=motiv)) +
# Predictions
geom_point(
aes(y=enroll, color=factor(enroll))
) +
# Values
geom_point(
aes(y=pred, color=factor(enroll))
) +
labs(color="enroll") +
theme_dsan(base_size=24) +
labs(title="Propensity to Enroll")
ipw_min <- min(smoke_df$ipw)
ipw_max <- max(smoke_df$ipw)
smoke_df <- smoke_df |> mutate(
ipw_scaled = (ipw - ipw_min) / (ipw_max - ipw_min)
)
smoke_df |>
ggplot(aes(x=motiv)) +
# Predictions
geom_point(
aes(y=enroll, color=factor(enroll))
) +
# Values
geom_point(
aes(y=ipw_scaled, color=factor(enroll))
) +
theme_dsan(base_size=24) +
labs(
title="Inverse Probability-of-Treatment Weights (IPTW)",
color="enroll"
)
↓
→
↑
term | estimate | std.error |
---|---|---|
(Intercept) | 18.10133 | 0.2466712 |
enroll | -5.66453 | 0.3571696 |
Call:
lm_weightit(formula = ncigs ~ enroll, data = smoke_df, weightit = W)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 18.1013 0.2479 73.02 <1e-06 ***
enroll -5.6645 0.5463 -10.37 <1e-06 ***
Standard error: HC0 robust
Call:
lm_weightit(formula = ncigs ~ enroll, data = smoke_df, weightit = W_default)
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 18.1013 0.2441 74.16 <1e-06 ***
enroll -5.6645 0.5444 -10.41 <1e-06 ***
Standard error: HC0 robust (adjusted for estimation of weights)
DSAN 5650 Week 8: Propensity Score Weighting