DSAN 5650: Causal Inference for Computational Social Science
Summer 2025, Georgetown University
Wednesday, July 9, 2025
Today’s Planned Schedule:
| Start | End | Topic | |
|---|---|---|---|
| Lecture | 6:30pm | 7:00pm | Final Project Pep Talk → | 
| 7:00pm | 7:50pm | Controlling For Things vs. Matching/Weighting → | |
| Break! | 7:50pm | 8:00pm | |
| 8:00pm | 9:00pm | Propensity Score Lab → | 
\[ \DeclareMathOperator*{\argmax}{argmax} \DeclareMathOperator*{\argmin}{argmin} \newcommand{\bigexp}[1]{\exp\mkern-4mu\left[ #1 \right]} \newcommand{\bigexpect}[1]{\mathbb{E}\mkern-4mu \left[ #1 \right]} \newcommand{\definedas}{\overset{\small\text{def}}{=}} \newcommand{\definedalign}{\overset{\phantom{\text{defn}}}{=}} \newcommand{\eqeventual}{\overset{\text{eventually}}{=}} \newcommand{\Err}{\text{Err}} \newcommand{\expect}[1]{\mathbb{E}[#1]} \newcommand{\expectsq}[1]{\mathbb{E}^2[#1]} \newcommand{\fw}[1]{\texttt{#1}} \newcommand{\given}{\mid} \newcommand{\green}[1]{\color{green}{#1}} \newcommand{\heads}{\outcome{heads}} \newcommand{\iid}{\overset{\text{\small{iid}}}{\sim}} \newcommand{\lik}{\mathcal{L}} \newcommand{\loglik}{\ell} \DeclareMathOperator*{\maximize}{maximize} \DeclareMathOperator*{\minimize}{minimize} \newcommand{\mle}{\textsf{ML}} \newcommand{\nimplies}{\;\not\!\!\!\!\implies} \newcommand{\orange}[1]{\color{orange}{#1}} \newcommand{\outcome}[1]{\textsf{#1}} \newcommand{\param}[1]{{\color{purple} #1}} \newcommand{\pgsamplespace}{\{\green{1},\green{2},\green{3},\purp{4},\purp{5},\purp{6}\}} \newcommand{\pedge}[2]{\require{enclose}\enclose{circle}{~{#1}~} \rightarrow \; \enclose{circle}{\kern.01em {#2}~\kern.01em}} \newcommand{\pnode}[1]{\require{enclose}\enclose{circle}{\kern.1em {#1} \kern.1em}} \newcommand{\ponode}[1]{\require{enclose}\enclose{box}[background=lightgray]{{#1}}} \newcommand{\pnodesp}[1]{\require{enclose}\enclose{circle}{~{#1}~}} \newcommand{\purp}[1]{\color{purple}{#1}} \newcommand{\sign}{\text{Sign}} \newcommand{\spacecap}{\; \cap \;} \newcommand{\spacewedge}{\; \wedge \;} \newcommand{\tails}{\outcome{tails}} \newcommand{\Var}[1]{\text{Var}[#1]} \newcommand{\bigVar}[1]{\text{Var}\mkern-4mu \left[ #1 \right]} \]
intervention in the dataset: a seminar on growth mindset for high school studentsachievement_score in the dataset: performance on state’s standardized test\[ \mathbb{E}[Y \mid T = 1] - \mathbb{E}[Y \mid T = 0] \]
| intervention | mean_score | 
|---|---|
| 0 | -0.1538030 | 
| 1 | 0.3184686 | 
| term | estimate | 
|---|---|
| (Intercept) | -0.1538030 | 
| intervention | 0.4722717 | 

library(tidyverse)
library(Rlab)
set.seed(5650)
n <- 250
motiv_vals <- runif(n, 0, 1)
enroll_vals <- ifelse(
  motiv_vals < 0.25,
  0,
  # We know motiv > 0.25
  ifelse(
    motiv_vals > 0.75,
    1,
    # We know 0.25 < motiv < 0.75
    rbern(n, prob=(motiv_vals - 0.125)*1.5)
  )
)
ncigs_vals <- rbinom(n, size=30, prob=0.6-0.2*enroll_vals)
smoke_df <- tibble(
  motiv=motiv_vals,
  enroll=enroll_vals,
  ncigs=ncigs_vals
)
(smoke_mean_df <- smoke_df |> group_by(enroll) |> summarize(mean_ncigs=mean(ncigs)))| enroll | mean_ncigs | 
|---|---|
| 0 | 17.88060 | 
| 1 | 12.26724 | 
| term | estimate | 
|---|---|
| (Intercept) | 17.880597 | 
| enroll | -5.613356 | 
eprop_model <- glm(enroll ~ motiv, family='binomial', data=smoke_df)
eprop_preds <- predict(eprop_model, type="response")
smoke_df <- smoke_df |> mutate(pred=eprop_preds)
# Use the preds to compute IPW
smoke_df <- smoke_df |> rowwise() |> mutate(
  ipw=ifelse(enroll, 1/pred, 1/(1-pred))
) |> arrange(pred)
#smoke_df
smoke_df |> mutate(enroll=factor(enroll)) |>
  ggplot(aes(x=motiv, y=ncigs, color=enroll)) +
  geom_point() +
  theme_dsan(base_size=24) +
  labs(title="Before Weighting")
smoke_df |>
  ggplot(aes(x=motiv)) +
  # Predictions
  geom_point(
    aes(y=enroll, color=factor(enroll))
  ) +
  # Values
  geom_point(
    aes(y=pred, color=factor(enroll))
  ) +
  labs(color="enroll") +
  theme_dsan(base_size=24) +
  labs(title="Propensity to Enroll")
ipw_min <- min(smoke_df$ipw)
ipw_max <- max(smoke_df$ipw)
smoke_df <- smoke_df |> mutate(
  ipw_scaled = (ipw - ipw_min) / (ipw_max - ipw_min)
)
smoke_df |>
  ggplot(aes(x=motiv)) +
  # Predictions
  geom_point(
    aes(y=enroll, color=factor(enroll))
  ) +
  # Values
  geom_point(
    aes(y=ipw_scaled, color=factor(enroll))
  ) +
  theme_dsan(base_size=24) +
  labs(
    title="Inverse Probability-of-Treatment Weights (IPTW)",
    color="enroll"
  )↓

→
↑

| term | estimate | std.error | 
|---|---|---|
| (Intercept) | 18.10133 | 0.2466712 | 
| enroll | -5.66453 | 0.3571696 | 
Call:
lm_weightit(formula = ncigs ~ enroll, data = smoke_df, weightit = W)
Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  18.1013     0.2479   73.02   <1e-06 ***
enroll       -5.6645     0.5463  -10.37   <1e-06 ***
Standard error: HC0 robust
Call:
lm_weightit(formula = ncigs ~ enroll, data = smoke_df, weightit = W_default)
Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  18.1013     0.2441   74.16   <1e-06 ***
enroll       -5.6645     0.5444  -10.41   <1e-06 ***
Standard error: HC0 robust (adjusted for estimation of weights)DSAN 5650 Week 8: Propensity Score Weighting