Week 8: Null Models and Marked Point Processes

PPOL 6805 / DSAN 6750: GIS for Spatial Data Science
Fall 2024

Class Sessions
Author
Affiliation

Jeff Jacobs

Published

Wednesday, October 16, 2024

Open slides in new tab →

HW4 Tips!

  • ppp Objects
  • Converting between ppp and sf
  • Plotting

[From Last Week] Our New Library: spatstat!

ppp Objects

  • The main datatype used to represent Planar Point Patterns [spatstat book p. 41]
  • Unlike sf objects, which contain data+geometries for any desired collection of \(N\) entities, ppp objects are required to have at least an observation window!

sf Creation:

Code
tree_df <- tibble::tibble(lon=runif(100,0,1), lat=runif(100,0,1), age=runif(100,0,1))
tree_sf <- sf::st_as_sf(
  tree_df,
  coords = c('lon', 'lat')
)
tree_sf |> head(4)
age geometry
0.0233144 POINT (0.1487039 0.5640943)
0.6882371 POINT (0.1248064 0.8612631)
0.3889197 POINT (0.6370261 0.5268693)
0.6638135 POINT (0.5211425 0.4551448)

ppp Creation:

Code
pois_ppp <- spatstat.random::rpoispp(
  lambda=100, win=spatstat.geom::square(1)
)
pois_ppp
Planar point pattern: 91 points
window: rectangle = [0, 1] x [0, 1] units
Code
attributes(pois_ppp)$names
[1] "window"     "n"          "x"          "y"          "markformat"
Code
pois_ppp$x |> head(4)
[1] 0.5735346 0.7526621 0.8129999 0.7419696

ppp \(\leftrightarrow\) sf Conversion

ppp to sf Conversion:

Code
pois_sf <- pois_ppp |> sf::st_as_sf()
pois_sf |> head(4)
label geom
window POLYGON ((0 0, 1 0, 1 1, 0 …
point POINT (0.5735346 0.6204983)
point POINT (0.7526621 0.4205887)
point POINT (0.8129999 0.6844816)
Code
pois_sf |> ggplot() +
  geom_sf(data=pois_sf |> filter(label=="window"), aes(fill='grey')) +
  geom_sf(data=pois_sf |> filter(label != "window"), aes(color='black')) +
  md_theme_classic(base_size=26) +
  scale_fill_manual(name=NULL, values=c("gray90"), labels=c("<span style='font-family: mono'>label == 'window'</span>")) +
  scale_color_manual(name=NULL, values=c("black"), labels=c("<span style='font-family: mono'>label == 'point'</span>")) +
  labs(title = "<span style='font-family: mono'>ppp</span> &rarr; <span style='font-family: mono'>sf</span> Result")

sf to ppp Conversion:

Code
square_sfc <- sf::st_polygon(list(
  matrix(c(0,0,1,0,1,1,0,1,0,0), nrow=5, byrow=TRUE)
)) |> sf::st_sfc()
tree_ppp <- as.ppp(
  sf::st_as_sfc(tree_sf),
  W=as.owin(square_sfc)
)
tree_ppp
Planar point pattern: 100 points
window: polygonal boundary
enclosing rectangle: [0, 1] x [0, 1] units
Code
tree_ppp_sf <- tree_ppp |> sf::st_as_sf()
tree_ppp_sf |> ggplot() +
  geom_sf(aes(fill='gray90')) +
  geom_sf(data=tree_ppp_sf |> filter(label != "window"), aes(color='black')) +
  md_theme_classic(base_size=26) +
  scale_fill_manual(name=NULL, values=c("gray90"), labels=c("<span style='font-family: mono'>tree_ppp$window</span>")) +
  scale_color_manual(name=NULL, values=c("black"), labels=c("<span style='font-family: mono'>tree_ppp${x,y}</span>")) +
  labs(
    title = "<span style='font-family: mono'>sf</span> &rarr; <span style='font-family: mono'>ppp</span> Result",
    x="<span style='font-family: mono'>tree_ppp$x</span>",
    y="<span style='font-family: mono'>tree_ppp$y</span>"
  ) + 
  guides(fill = guide_legend(order = 1), 
              color = guide_legend(order = 2))

Summary Statistics vs. Models

  • Moran’s \(I\) is to GISers what a thermometer is to doctors
  • Measures symptoms; many possible underlying causes!

The Tree-Grid Mystery

  • You’ve been hired as an archaeologist—congratulations! Your job: determine whether arrangement of trees formed:
    • Naturally, via a process of resource competition, or
    • Artificially, via an ancient civilization planting in a grid

Two Possible Histories…

Hypothesis \(\mathcal{H}_{\textsf{Art}}\): Artificial Formation
Code (Step 1: Grid Creation)
ha_base <- 28
square_sf <- sf::st_as_sf(spatstat.geom::square(1))
grid_sf <- sf::st_as_sf(sf::st_make_grid(square_sf))
grid_buffer_sf <- grid_sf |> sf::st_buffer(dist=-0.01, singleSide = TRUE)
grid_buffer_sf |> ggplot() +
  geom_sf() +
  theme_classic(base_size=ha_base)

Code (Step 2: Point Generation)
grid_points <- sf::st_sample(grid_buffer_sf, size=rep(1,100))
grid_buffer_sf |> ggplot() +
  geom_sf() +
  geom_sf(data=grid_points) +
  theme_classic(base_size=ha_base)

Code (Step 3: Observed Result)
grid_ppp <- as.ppp(grid_points, W=spatstat.geom::square(1))
grid_ppp |> sf::st_as_sf() |> ggplot() +
  geom_sf() +
  theme_classic(base_size=ha_base)

Hypothesis \(\mathcal{H}_{\textsf{Nat}}\): Natural Formation
Code (Step 1: Tree Generation)
hn_base <- 28
r <- 0.05
pois_ppp <- rpoispp(150)
pois_sf <- pois_ppp |> sf::st_as_sf()
pois_sf |> ggplot() +
  geom_sf() +
  theme_classic(base_size=hn_base)

Code (Step 2: Competition)
age <- runif(npoints(pois_ppp))
pair_dists <- pairdist(pois_ppp)
close <- (pair_dists < r)
later <- outer(age, age, ">")
killed <- apply(close & later, 1, any)
killed_ppp <- pois_ppp[killed]
alive_ppp <- pois_ppp[!killed]
pois_window_sf <- pois_ppp |> sf::st_as_sf() |> filter(label=="window")
pois_killed_sf <- killed_ppp |> sf::st_as_sf() |> filter(label=="point")
pois_alive_sf <- alive_ppp |> sf::st_as_sf() |> filter(label=="point")
alive_buff_sf <- pois_alive_sf |> sf::st_buffer(r) |> sf::st_union() |> sf::st_intersection(pois_window_sf)
ggplot() +
  geom_sf(data=pois_window_sf) +
  geom_sf(data=alive_buff_sf, aes(color='Inhibition', shape='Inhibition'), linetype='dashed') +
  geom_sf(data=pois_killed_sf, aes(color='Dead', shape='Dead'), size=2, stroke=2) +
  geom_sf(data=pois_alive_sf, aes(color='Alive', shape='Alive'), size=1, stroke=1) +
  scale_shape_manual(name=NULL, values=c("Alive"=19, "Dead"=4, 'Inhibition'=21), labels=c("Alive", "Dead", "Inhibition")) +
  scale_color_manual(name=NULL, values=c("Alive"="black", "Dead"=cb_palette[1], "Inhibition"="black"), labels=c("Alive", "Dead", "Inhibition")) +
  guides(shape=guide_legend(override.aes=list(fill = "white"))) +
  theme_classic(base_size = hn_base) +
  theme(plot.margin = unit(c(0,0,0,0), "cm"))

Code (Step 3: Observed Result)
alive_ppp |> sf::st_as_sf() |> ggplot() +
  geom_sf() +
  theme_classic(base_size=hn_base)

What Tools Do We Have for Distinguishing Between These Cases?

Why Do Events Appear Where They Do?

Code
library(tidyverse)
library(spatstat)
set.seed(6809)
N <- 60
r_core <- 0.05
obs_window <- square(1)
### Clustered data
clust_ppp <- rMatClust(
  kappa=6,
  scale=r_core,
  mu=10
)
clust_sf <- clust_ppp |> sf::st_as_sf()
clust_plot <- clust_sf |>
  ggplot() +
  geom_sf(size=2) +
  theme_classic(base_size=18)
ggsave("images/clust_ppp.png", clust_plot, width=3, height=3)
# Intensity fn
clust_intensity <- density(clust_ppp, sigma = 0.1)
png("images/clust_intensity.png")
par(mar=c(0,0,0,2), las=2, oma=c(0,0,0,0), cex=2)
plot(clust_intensity, main=NULL)
contour(clust_intensity, add = TRUE)
dev.off()
### PCF
clust_pcf <- spatstat.explore::pcf(
  clust_ppp, divisor="d",
  r=seq(from=0.00, to=0.50, by=0.01)
)
clust_pcf_plot <- clust_pcf |> ggplot(aes(x=r, y=iso)) +
  geom_hline(yintercept=1, linetype='dashed', linewidth=1) +
  geom_area(color='black', fill=cb_palette[1], alpha=0.75) +
  scale_x_continuous(breaks=seq(from=0.0, to=1.0, by=0.1)) +
  labs(x="Distance", y="Density") +
  theme_classic(base_size=14)
ggsave("images/clust_pcf.png", clust_pcf_plot, width=3, height=3)
Original Data First-Order Second-Order
\(N = 60\) Events Events modeled individually
\(\implies\) Intensity function \(\lambda(\mathbf{s})\)
Events modeled pairwise \(\implies\) \(K\)-function \(K(\vec{h})\)

What Do These Functions “Detect”?

Code
sq_base <- 16
sq_psize <- 2.5
obs_window <- square(1)
r0 <- 0.2
sq_df <- tibble::tribble(
  ~x, ~y,
  0.5-r0,0.5-r0,
  0.5+r0,0.5+r0,
  0.5-r0,0.5+r0,
  0.5+r0,0.5-r0
)
sq_sf <- sf::st_as_sf(
  sq_df,
  coords = c("x","y")
)
sq_ppp <- as.ppp(sq_sf, W=obs_window)
sq_ppp |> sf::st_as_sf() |> ggplot() +
  geom_sf(size=sq_psize) +
  theme_classic(base_size=sq_base)

Code
par(mar=c(0,0,0,2), las=2, oma=c(0,0,1,0))
sq_density <- density(sq_ppp)
plot(sq_density, main=NULL, xaxs='i', yaxs='i')
contour(sq_density, xaxs='i', yaxs='i', add = TRUE)

Code
### PCF
pcf_result <- spatstat.explore::pcf(
  sq_ppp,
  divisor="d",
  r=seq(from=0.00, to=0.8, by=0.01)
)
pcf_result |> ggplot(aes(x=r, y=iso)) +
  geom_hline(yintercept=1, linetype='dashed', linewidth=1.5) +
  geom_area(color='black', fill=cb_palette[1], alpha=0.75) +
  scale_x_continuous(breaks=seq(from=0.0, to=1.0, by=0.1)) +
  theme_classic(base_size=sq_base)

Code
csr_ppp <- spatstat.random::rpoispp(
  lambda = 60,
  win=obs_window
)
csr_ppp |> sf::st_as_sf() |> ggplot() +
  geom_sf(size=sq_psize) +
  theme_classic(base_size=sq_base)

Code
par(
  mar=c(0,0,0,2),
  las=2,
  oma=c(0,0,1,0)
)
csr_density <- density(csr_ppp)
plot(csr_density, main=NULL, xaxs="i", yaxs="i")
contour(csr_density, xaxs="i", yaxs="i", add = TRUE, lwd=1.5)

Code
csr_pcf_result <- spatstat.explore::pcf(
  csr_ppp,
  divisor="d",
  r=seq(from=0.00, to=0.8, by=0.01)
)
csr_pcf_result |> ggplot(aes(x=r, y=iso)) +
  geom_hline(yintercept=1, linetype='dashed', linewidth=1.5) +
  geom_area(color='black', fill=cb_palette[1], alpha=0.75) +
  scale_x_continuous(breaks=seq(from=0.0, to=1.0, by=0.1)) +
  theme_classic(base_size=sq_base)

A Menagerie of Models

Poisson Point Processes (CSR)

spatstat.random::rpoispp(lambda, win)
  • \(N \sim \text{Pois}(\lambda)\)
  • For \(i \in \{1, \ldots, N\}\):
    • Generate \(X_i, Y_i \sim \mathcal{U}(\texttt{win})\)
Code
sim_base <- 22
sim_psize <- 2
sim_xticks <- seq(from=0.0, to=1.0, by=0.2)
sim_yticks <- seq(from=0.0, to=1.0, by=0.2)
gen_pois_df <- function(num_sims=1) {
  pois_sims <- spatstat.random::rpoispp(
    lambda = 60, nsim=num_sims
  )
  return(tibble::as_tibble(pois_sims))
}
#pois_dfs <- gen_pois_df()
#pois_dfs |> head()
pois_sims <- spatstat.random::rpoispp(
  lambda = 60, nsim=3
)
to_sim_df <- function(cur_sim, sim_name) {
  cur_df <- tibble::as_tibble(cur_sim) |> mutate(sim=sim_name)
  return(cur_df)
}
combined_df <- imap(.x=pois_sims, .f=to_sim_df) |> bind_rows()
combined_df |> ggplot(aes(x=x, y=y)) +
  geom_point(size=sim_psize) +
  facet_wrap(vars(sim)) +
  coord_equal() +
  theme_classic(base_size=sim_base) +
  theme(panel.spacing.x = unit(2, "lines")) +
  scale_x_continuous(breaks=sim_xticks) +
  scale_y_continuous(breaks=sim_yticks)

Simple Sequential Inhibition (SSI)

spatstat.random::rSSI(r, n=Inf, giveup=1000, win)
  • \(\mathbf{S} = \varnothing\)
  • While not done:
    • Generate \(\mathbf{E} = (X, Y) \sim \mathcal{U}(\texttt{win})\)
    • Check if \(\mathbf{E}\) within r units of any existing point in \(\mathbf{S}\)
      • If it is, throw \(\mathbf{E}\) away. Otherwise, add \(\mathbf{E}\) to \(\mathbf{S}\)
    • done=TRUE if \(\mathbf{S}\) has n points OR has been the same for giveup steps
Code
capture.output(ssi_sims <- spatstat.random::rSSI(
  r = 0.05, n=60, nsim=3
), file=nullfile())
combined_df <- imap(.x=ssi_sims, .f=to_sim_df) |> bind_rows()
combined_df |> ggplot(aes(x=x, y=y)) +
  geom_point(size=sim_psize) +
  facet_wrap(vars(sim)) +
  coord_equal() +
  theme_classic(base_size=24) +
  theme(panel.spacing.x = unit(3, "lines")) +
  scale_x_continuous(breaks=sim_xticks) +
  scale_y_continuous(breaks=sim_yticks)

Matérn Cluster Process

spatstat.random::rMatClust(kappa, scale, mu, win)
  • Generate \(K(\kappa)\) parent points via Poisson Point Process with intensity \(\lambda = \kappa\)
  • For each parent point \(\mathbf{s}_i \in \left\{\mathbf{s}_1, \ldots, \mathbf{s}_{K(\kappa)}\right\}\):
    • Generate \(N(\mu)\) offspring points via Poisson Point Process with intensity \(\lambda = \mu\), distributed uniformly within a circle of radius scale centered at \(\mathbf{s}_i\)
  • Offspring points form the outcome (parent points are thrown away)
Code
matclust_sims <- rMatClust(
  kappa = 6,
  scale = 0.075,
  mu = 10,
  nsim = 3
)
matclust_df <- imap(.x=matclust_sims, .f=to_sim_df) |> bind_rows()
matclust_plot <- matclust_df |> ggplot(aes(x=x, y=y)) +
  geom_point(size=sim_psize) +
  facet_wrap(vars(sim), nrow=1) +
  coord_equal() +
  theme_classic(base_size=sim_base) +
  theme(panel.spacing.x = unit(2, "lines")) +
  scale_x_continuous(breaks=sim_xticks) +
  scale_y_continuous(breaks=sim_yticks)
matclust_plot

Matérn Inhibition Process (I and II)

spatstat.random::rMaternI(kappa, r, win)
spatstat.random::rMaternII(kappa, r, win)
rMaternI() [Docs] rMaternII() [Docs]
  • Generate events \(\mathbf{S} = \{\mathbf{s}_1, \ldots, \mathbf{s}_{N(\lambda)}\}\) via Poisson point process with \(\lambda = \kappa\)
  • Generate events \(\mathbf{S} = \{\mathbf{s}_1, \ldots, \mathbf{s}_{N(\lambda)}\}\) via Poisson point process with \(\lambda = \kappa\), plus timestamp \(t_i \sim \mathcal{U}(0,1)\) for each \(\mathbf{s}_i\)
  • Delete all pairs of points \(\mathbf{s}_i\), \(\mathbf{s}_j\) for which \(\textsf{dist}(\mathbf{s}_i, \mathbf{s}_j) < \texttt{r}\)
  • For each pair of points \(\mathbf{s}_i\), \(\mathbf{s}_j\): if \(\textsf{dist}(\mathbf{s}_i, \mathbf{s}_j) < \texttt{r}\), delete the later point
Code
mI_sims <- rMaternI(
  kappa = 60, r = 0.075, nsim=2
)
mII_sims <- rMaternII(
  kappa = 60, r = 0.075, nsim=2
)
mI_combined_df <- imap(.x=mI_sims, .f=to_sim_df) |> bind_rows() |> mutate(sim=paste0("MI ",sim))
mII_combined_df <- imap(.x=mII_sims, .f=to_sim_df) |> bind_rows() |> mutate(sim=paste0("MII ",sim))
m_combined_df <- bind_rows(mI_combined_df, mII_combined_df)
m_plot <- m_combined_df |> ggplot(aes(x=x, y=y)) +
  geom_point(size=sim_psize) +
  facet_wrap(vars(sim), nrow=1) +
  coord_equal() +
  theme_classic(base_size=sim_base) +
  theme(panel.spacing.x = unit(2, "lines")) +
  scale_x_continuous(breaks=sim_xticks) +
  scale_y_continuous(breaks=sim_yticks)
m_plot

Cox Processes: Random Intensity → Random Events

spatstat.random::rLGCP(model, mu, param, win)
models=c("exponential", "gauss", "stable", "gencauchy", "matern")
Code
cox_pcol <- "black"
cox_bg <- "grey90"
cox_pch <- 21
# inhomogeneous LGCP with Gaussian covariance function
m <- as.im(function(x, y){
  5 - 1.5 * (x - 0.5)^2 + 2 * (y - 0.5)^2
}, W=owin())
lgcp_sims <- rLGCP("gauss", m, var=0.15, scale =0.5, nsim=3)
Warning: 32750 out of 65536 terms (50%) in FFT calculation of matrix square
root were negative, and were set to zero. Range: [-2.96, 1910]
Code
# lgcp_combined_df <- imap(.x=ssi_sims, .f=to_sim_df) |> bind_rows()
plot_lgcp <- function(lgcp_sim) {
  plot(attr(lgcp_sim, "Lambda"), main=NULL)
  points(lgcp_sim, col=cox_pcol, bg=cox_bg, pch=cox_pch)
}
par(mfrow=c(1,3), mar=c(0,0,0,2), oma=c(0,0,0,0), las=2)
nulls <- lapply(X=lgcp_sims, FUN=plot_lgcp)

The Point of All This: Null Models via Base Rates!

  • Given an intensity function, we can compute a bunch of simulated point patterns (inhomogeneous Poisson Point Process)…
  • How does an observed point pattern differ from the simulated point patterns?

Constant Risk Hypothesis: Motivation

  • Here is a (fictional) map of flu cases in the US “Midwest”
  • Are people in Chicago and Detroit equally “at risk”?
Code
city_df <- tibble::tribble(
  ~city, ~lon, ~lat, ~pop,
  "Chicago", 41.950567516553896, -87.93011127491978, 2746388,
  "Detroit", 42.45123004999075, -83.18402319217698, 631524
)
city_sf <- sf::st_as_sf(
  city_df,
  coords = c("lat", "lon"),
  crs=4326
)
city_buf_sf <- city_sf |> sf::st_buffer(20000)
city_cases_sf <- city_buf_sf |> sf::st_sample(size=rep(10,2)) |> sf::st_as_sf()
city_cases_sf$city <- "Detroit (10 Cases)"
city_cases_sf[1:10, 'city'] <- "Chicago (10 Cases)"
city_cases_sf$sample <- "Flu"
mapview(city_cases_sf, zcol="city")

Constant Risk Hypothesis: Base Rates

Chicago Detroit
Population 2,746,388 631,524
Code
city_pop_sf <- city_buf_sf |> sf::st_sample(size=c(16, 4)) |> sf::st_as_sf()
city_pop_sf$city <- "Detroit"
city_pop_sf[1:16, 'city'] <- "Chicago"
city_pop_sf$sample <- "People"
city_combined_sf <- bind_rows(city_cases_sf, city_pop_sf)
# mapview(city_combined_sf, zcol="city", marker="sample")
Flu = makeIcon(
    "https://upload.wikimedia.org/wikipedia/commons/thumb/c/c3/Maki2-danger-24.svg/240px-Maki2-danger-24.svg.png",
    "https://upload.wikimedia.org/wikipedia/commons/thumb/c/c3/Maki2-danger-24.svg/24px-Maki2-danger-24.svg.png",
    20,
    20
)
People = makeIcon(
  "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Maki2-pitch-24.svg/24px-Maki2-pitch-24.svg.png",
  "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Maki2-pitch-24.svg/24px-Maki2-pitch-24.svg.png",
  20,
  20
)

city_combined_sf$r <- ifelse(city_combined_sf$sample == "Flu", 0, 4)
city_flu_sf <- city_combined_sf |> filter(sample == "Flu")
city_ppl_sf <- city_combined_sf |> filter(sample == "People")
leaflet(city_flu_sf) |>
  addProviderTiles("CartoDB.Positron") |>
  addMarkers(data=city_flu_sf, icon = Flu) |>
  addMarkers(data=city_ppl_sf, icon = People)

References

Baddeley, Adrian, Ege Rubak, and Rolf Turner. 2015. Spatial Point Patterns: Methodology and Applications with R. CRC Press.