Code
source("../dsan-globals/_globals.r")
DSAN 5650: Causal Inference for Computational Social Science
Summer 2025, Georgetown University
source("../dsan-globals/_globals.r")
(There are a lot of words in HW2 that I haven’t had the chance to explain yet!)
0100101
) so computer can…Super-charge your EDA/modeling | Estimate |
---|---|
library(tidyverse)
library(ggExtra)
<- function(walk_data, a=0.0075) {
gen_walk_plot # print(end_df)
<- rgb(0, 0, 0, 0.1)
grid_color # And plot!
<- ggplot() +
walkplot geom_line(
data = walk_data$long_df,
aes(x = t, y = pos, group = pid),
linewidth = g_linewidth,
alpha = a,
#color = cb_palette[2]
#color = "#cf8f00"
color = "black"
+
) geom_point(
data = walk_data$end_df,
aes(x = t, y = endpos),
alpha = 0
+
) scale_x_continuous(
breaks = seq(
0,
$num_steps,
walk_data$num_steps / 4
walk_data
)+
) scale_y_continuous(
breaks = seq(-20, 20, 10)
+
) theme_dsan(base_size=24) +
theme(
legend.position = "none",
# title = element_text(size = 16)
+
) theme(
panel.grid.major.y = element_line(
color = grid_color,
linewidth = 1,
linetype = 1
)+
) labs(
title = paste0(
$num_people, " Random Walks, ",
walk_data$num_steps, " Steps"
walk_data
),x = "Number of Steps",
y = "Position"
)
}<- readRDS("assets/walk_data.rds")
walk_data # 16 steps
# wp1 <- gen_walkplot(500, 16, 0.05)
# ggMarginal(wp1, margins = "y", type = "histogram", yparams = list(binwidth = 1))
<- gen_walk_plot(walk_data) + ylim(-30,30)
wp ggMarginal(
margins = "y",
wp, type = "histogram",
yparams = list(binwidth = 1)
)
Prior Distribution:
What can I guess about values of my parameters from background knowledge of the world? e.g.:
Prior Predictive Distribution:
What could the outcomes look like if I ran my guesses through the DGP?
100 simulated heights, none are negative
1K sim bar-goers; 80% have this haircut
Posterior Distribution:
Now we observe data:
Posterior Predictive Distribution:
Now that we’ve fit
library(tidyverse)
<- tibble(x=seq(0, 1, 0.1), y=0)
flat_df |> ggplot(aes(x=x, y=y)) +
flat_df geom_line(
color=cb_palette[1],
linewidth=g_linewidth
+
) ylim(0, 1) +
labs(
title="Flat Prior on Pr(Heads)",
y="Density"
+
) theme_dsan(base_size=28) +
theme(title=element_text(size=20))
library(tidyverse)
<- tibble(x=1, y=1)
data_df |> ggplot(aes(x=x, y=y)) +
data_df geom_point(size=5) +
geom_segment(
x=1, y=0, yend=1, linewidth=g_linewidth
+
) xlim(0, 1) +
ylim(0, 1) +
labs(
title="Observed Data",
y="Density"
+
) theme_dsan(base_size=28) +
theme(title=element_text(size=20))
library(tidyverse)
library(latex2exp)
<- TeX("Width = $1/n$")
w_label <- TeX("Height = $n$")
h_label <- tibble(x=1, y=1)
data_df |> ggplot(aes(x=x, y=y)) +
data_df geom_segment(
x=1, y=0, yend=1, linewidth=g_linewidth,
color=cb_palette[1], arrow=arrow()
+
) geom_segment(
x=0, y=0, xend=1, linewidth=g_linewidth,
color=cb_palette[1]
+
) xlim(0, 1) +
ylim(0, 1) +
labs(
title="Posterior of Pr(Heads)",
y = "Density"
+
) theme_dsan(base_size=28) +
theme(title=element_text(size=20)) +
annotate(
geom = "text", x = 0.5, y = 0.8,
label = w_label, hjust = 0, vjust = 1, size = 8
+
) annotate(
geom = "text", x = 0.5, y = 0.7,
label = h_label, hjust = 0, vjust = 1, size = 8
)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ lubridate 1.9.4 ✔ tibble 3.3.0
✔ purrr 1.0.4 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Warning in is.na(x): is.na() applied to non-(list or vector) of type
'expression'
Warning in is.na(x): is.na() applied to non-(list or vector) of type
'expression'
library(tidyverse)
<- tibble(x=seq(0, 1, 0.1), y=1)
unif_df |> ggplot(aes(x=x, y=y)) +
unif_df geom_line(
color="#e69f00", linewidth=g_linewidth
+
) annotate('rect', xmin=0, xmax=1, ymin=0, ymax=1, fill='#e69f00', alpha=0.3) +
xlim(0, 1) + ylim(0, 1) +
labs(title="Uniform Prior on Pr(Heads)") +
theme_dsan(base_size=28) +
theme(title=element_text(size=20))
library(tidyverse)
<- tibble(x=1, y=1)
data_df |> ggplot(aes(x=x, y=y)) +
data_df geom_point(size=5) +
geom_segment(
x=1, y=0, yend=1, linewidth=g_linewidth
+
) xlim(0, 1) +
ylim(0, 1) +
labs(title="Observed Data") +
theme_dsan(base_size=28) +
theme(title=element_text(size=20))
library(tidyverse)
<- tibble(x=1, y=1)
data_df <- seq(0, 1, 0.01)
x_vals <- function(x) exp(1-1/(x^2))
my_exp <- sapply(x_vals, my_exp)
y_vals <- tibble(x=x_vals, y=y_vals)
data_df <- tibble(x=x_vals, ymax=y_vals, ymin=0)
rib_df ggplot() +
# stat_function(fun=my_exp, linewidth=g_linewidth, color=cb_palette[1]) +
geom_line(
data=data_df,
aes(x=x, y=y),
linewidth=g_linewidth, color=cb_palette[1]
+
) geom_ribbon(
data=rib_df,
aes(x=x, ymin=ymin, ymax=ymax),
fill=cb_palette[1], alpha=0.3
+
) # geom_segment(
# x=1, y=0, yend=1, linewidth=g_linewidth,
# color=cb_palette[1], arrow=arrow()
# ) +
# geom_segment(
# x=0, y=0, xend=1, linewidth=g_linewidth,
# color=cb_palette[1]
# ) +
xlim(0, 1) +
ylim(0, 1) +
labs(title="Posterior of Pr(Heads)") +
theme_dsan(base_size=28) +
theme(title=element_text(size=20))