Code
source("../dsan-globals/_globals.r")
set.seed(5300)
DSAN 5300: Statistical Learning
Spring 2025, Georgetown University
Today’s Planned Schedule:
Start | End | Topic | |
---|---|---|---|
Lecture | 6:30pm | 7:00pm | Extended Recap / Clarification → |
7:00pm | 7:20pm | “Manual” Model Selection: Subsets → | |
7:20pm | 7:40pm | Key Regularization Building Block: |
|
7:40pm | 8:00pm | Regularized Regression Intro → | |
Break! | 8:00pm | 8:10pm | |
8:10pm | 8:50pm | Basically Lasso is the Coolest Thing Ever → | |
8:50pm | 9:00pm | Scary-Looking But Actually-Fun W07 Preview → |
source("../dsan-globals/_globals.r")
set.seed(5300)
Low Variance | High Variance | |
---|---|---|
Low Bias | ||
High Bias |
Note the icons! Test set = Lake monster: pulling out of water to evaluate kills it 😵
CV plots will often look like (complexity on
library(tidyverse) |> suppressPackageStartupMessages()
library(latex2exp) |> suppressPackageStartupMessages()
<- TeX("$M_0$")
cpl_label <- tibble(
sim1k_delta_df complexity=1:7,
cv_err=c(8, 2, 1, 1, 1, 1, 2),
label=c("","",TeX("$M_3$"),"","",TeX("$M_6$"),"")
)|> ggplot(aes(x=complexity, y=cv_err, label=label)) +
sim1k_delta_df geom_line(linewidth=1) +
geom_point(size=(2/3)*g_pointsize) +
geom_text(vjust=-0.7, size=10, parse=TRUE) +
scale_x_continuous(
breaks=seq(from=1,to=7,by=1)
+
) theme_dsan(base_size=22) +
labs(
title="Generic CV Error Plot",
x = "Complexity",
y = "CV Error"
)
(We will take it back out later, I promise!)
![]() |
||
---|---|---|
1 | rating |
rating |
2 | rating , income |
rating , income |
3 | rating , income , student |
rating , income , student |
4 | cards , income , student , limit |
rating , income , student , limit |
General Form:
Ridge Regression:
LASSO:
Elastic Net:
(Does anyone recognize
Ridge Regression:
LASSO:
Elastic Net:
library(tidyverse) |> suppressPackageStartupMessages()
library(latex2exp) |> suppressPackageStartupMessages()
library(ggforce) |> suppressPackageStartupMessages()
library(patchwork) |> suppressPackageStartupMessages()
# Bounding the space
<- c(-1, 1)
xbound <- c(0, 1.65)
ybound <- 0.05
stepsize <- 0.605
dx <- 1.6
dy # The actual function we're plotting contours for
<- 1.5
b_inter <- function(x,y) 8^(b_inter*(x-dx)*(y-dy) - (x-dx)^2 - (y-dy)^2)
my_f <- seq(from=xbound[1], to=xbound[2], by=stepsize)
x_vals <- seq(from=ybound[1], to=ybound[2], by=stepsize)
y_vals <- expand_grid(x=x_vals, y=y_vals)
data_df <- data_df |> mutate(
data_df z = my_f(x, y)
)# Optimal beta df
<- tibble(
beta_opt_df x=121/200, y=8/5, label=c(TeX("$\\beta^*_{OLS}$"))
)# Ridge optimal beta
<- tibble(
ridge_opt_df x=0.111, y=0.998, label=c(TeX("$\\beta^*_{ridge}$"))
)# Lasso diamond
<- tibble(x=c(1,0,-1,0,1), y=c(0,1,0,-1,0), z=c(1,1,1,1,1))
lasso_df <- tibble(x=0, y=1, label=c(TeX("$\\beta^*_{lasso}$")))
lasso_opt_df
# And plot
<- ggplot() +
base_plot geom_contour_filled(
data=data_df, aes(x=x, y=y, z=z),
alpha=0.8, binwidth = 0.04, color='black', linewidth=0.65
+
) # y-axis
geom_segment(aes(x=0, xend=0, y=-Inf, yend=Inf), color='white', linewidth=0.5, linetype="solid") +
# Unconstrained optimal beta
geom_point(data=beta_opt_df, aes(x=x, y=y), size=2) +
geom_label(
data=beta_opt_df, aes(x=x, y=y, label=label),
hjust=-0.45, vjust=0.65, parse=TRUE, alpha=0.9
+
) scale_fill_viridis_d(option="C") +
#coord_equal() +
labs(
#title = "Model Selection: Ridge vs. Lasso Constraints",
x = TeX("$\\beta_1$"),
y = TeX("$\\beta_2$")
)<- base_plot +
ridge_plot geom_circle(
aes(x0=0, y0=0, r=1, alpha=I(0.1), linetype="circ", color='circ'), fill=NA, linewidth=0.5
)# geom_point(
# data=data.frame(x=0, y=0), aes(x=x, y=y),
# shape=21, size=135.8, color='white', stroke=1.2, linestyle="dashed"
# )
<- ridge_plot +
lasso_plot geom_polygon(
data=lasso_df, aes(x=x, y=y, linetype="diamond", color="diamond"),
fill='white',
alpha=0.5,
linewidth=1
+
) # Ridge beta
geom_point(data=ridge_opt_df, aes(x=x, y=y), size=2) +
geom_label(
data=ridge_opt_df, aes(x=x, y=y, label=label),
hjust=2, vjust=-0.15, parse=TRUE, alpha=0.9
+
) # Lasso beta
geom_point(data=lasso_opt_df, aes(x=x, y=y), size=2) +
geom_label(
data=lasso_opt_df, aes(x=x, y=y, label=label),
hjust=-0.75, vjust=-0.15, parse=TRUE, alpha=0.9
+
) ylim(ybound[1], ybound[2]) +
# xlim(xbound[1], xbound[2]) +
scale_linetype_manual("Line", values=c("diamond"="solid", "circ"="dashed"), labels=c("a","b")) +
scale_color_manual("Color", values=c("diamond"="white", "circ"="white"), labels=c("c","d")) +
# scale_fill_manual("Test") +
# x-axis
geom_segment(aes(x=-Inf, xend=Inf, y=0, yend=0), color='white') +
theme_dsan(base_size=16) +
coord_fixed() +
theme(
legend.position = "none",
axis.line = element_blank(),
axis.ticks = element_blank()
) lasso_plot
library(tidyverse) |> suppressPackageStartupMessages()
library(latex2exp) |> suppressPackageStartupMessages()
<- labs(
prior_labs x = TeX("$\\beta_j$"),
y = TeX("$f(\\beta_j)$")
)ggplot() +
stat_function(fun=dnorm, linewidth=1) +
xlim(-3, 3) +
theme_dsan(base_size=28) +
prior_labs
library(tidyverse) |> suppressPackageStartupMessages()
library(latex2exp) |> suppressPackageStartupMessages()
library(extraDistr) |> suppressPackageStartupMessages()
ggplot() +
stat_function(fun=dlaplace, linewidth=1) +
xlim(-3, 3) +
theme_dsan(base_size=28) +
prior_labs
Piecewise regression:
Choose
Let
Q: What do all these types of regression have in common?
A: They can all be written in the form
Where