Code
source("../dsan-globals/_globals.r")
<- FALSE final_render
PPOL 6805 / DSAN 6750: GIS for Spatial Data Science
Fall 2025
source("../dsan-globals/_globals.r")
<- FALSE final_render
(In alphabetical order by surname wohoo)
Christy Hsuth1010@georgetown.edu
Yumi Lixl794@georgetown.edu
sf
Cheatsheetsf
and terra
.shp
et al.)A shape“file” is actually (at least) three separate files bundled together:
.shp
: Containing feature geometries.shx
: Positional indices.dbf
: Data attributes.prj
: Coordinate reference system.xml
: MetadataLet’s see what’s inside the shapefile we first saw in Week 1, containing data on DC’s Census Tracts: Census Tracts in 2020
.geojson
).csv
my_data.geojson
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[30, 20], [45, 40],
[10, 40], [30, 20]
]
]
},
"properties": {
"color": "green",
"area": 3565747
}
},
{
"type": "Feature",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[15, 5], [40, 10],
[10, 20], [5, 10],
[15, 5]
]
]
},
"properties": {
"color": "red",
"area": 3272386
}
}
]
}
.gpkg
).tif
or .tiff
)
.nc4
)
EPSG (European Petroleum Survey Group) Registry: Most common way to specify a CRS
PROJ: Rather than opaque numeric code like EPSG, uses plaintext “proj-strings” containing parameter info: datum, ellipsoid, projection, and units (e.g. meters). Example: PROJ4
code EPSG:4326
is represented as
+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs
Using rnaturalearth
with mapview
set.seed(6805)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ lubridate 1.9.4 ✔ tibble 3.3.0
✔ purrr 1.0.4 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(rnaturalearth)
library(mapview)
<- ne_countries(country = "France", scale = 50)
france_sf <- mapview(france_sf, label = "geounit", legend = FALSE)) (france_map
<- sf::st_centroid(france_sf) france_cent_sf
Warning: st_centroid assumes attributes are constant over geometries
+ mapview(france_cent_sf, label = "Centroid", legend = FALSE) france_map
Computing the union of all geometries in the sf
via sf::st_union()
library(leaflet.extras2)
Loading required package: leaflet
<- ne_countries(continent = "Africa", scale = 50)
africa_sf <- sf::st_union(africa_sf)
africa_union_sf <- mapview(africa_sf, label="geounit", legend=FALSE)
africa_map <- mapview(africa_union_sf, label="st_union(africa)", legend=FALSE)
africa_union_map | africa_union_map africa_map
<- sf::st_bbox(africa_sf)
africa_bbox_sf <- mapview(africa_bbox_sf, label="st_bbox(africa)", legend=FALSE)
africa_bbox_map | africa_bbox_map africa_map
<- sf::st_convex_hull(africa_sf)
africa_countries_cvx <- mapview(africa_countries_cvx, label="geounit", legend=FALSE)
africa_countries_cvx_map | africa_countries_cvx_map africa_map
Use st_union()
first:
<- africa_sf |> st_union() |> st_convex_hull()
africa_cvx <- mapview(africa_cvx, label="geounit", legend=FALSE)
africa_cvx_map | africa_cvx_map africa_map
Computing the centroid of all geometries in the sf
via sf::st_centroid()
<- sf::st_centroid(africa_sf) africa_cents_sf
Warning: st_centroid assumes attributes are constant over geometries
<- mapview(africa_cents_sf, label="geounit", legend=FALSE)
africa_cents_map | africa_cents_map africa_map
<- system.file("shape/nc.shp", package="sf") |>
nc read_sf() |>
st_transform('EPSG:2264')
<- st_sf(
gr label = apply(expand.grid(1:10, LETTERS[10:1])[,2:1], 1, paste0, collapse = ""),
geom = st_make_grid(nc))
$col <- sf.colors(10, categorical = TRUE, alpha = .3)
gr# cut, to verify that NA's work out:
<- gr[-(1:30),]
gr suppressWarnings(nc_j <- st_join(nc, gr, largest = TRUE))
par(mfrow = c(2,1), mar = rep(0,4))
plot(st_geometry(nc_j), border = 'grey')
plot(st_geometry(gr), add = TRUE, col = gr$col)
text(st_coordinates(st_centroid(st_geometry(gr))), labels = gr$label, cex = .85)
# the joined dataset:
plot(st_geometry(nc_j), border = 'grey', col = nc_j$col)
text(st_coordinates(st_centroid(st_geometry(nc_j))), labels = nc_j$label, cex = .7)
plot(st_geometry(gr), border = '#88ff88aa', add = TRUE)
# Sample random points
<- sf::st_sample(africa_union_sf, 10)
africa_points_list <- sf::st_sf(africa_points_list)
africa_points_sf <- mapview(africa_points_sf, label="Random Point", col.regions=cb_palette[1], legend=FALSE) africa_points_map
Warning in cbind(`Feature ID` = fid, mat): number of rows of result is not a
multiple of vector length (arg 1)
+ africa_points_map africa_map
st_intersects
<- africa_sf[africa_points_sf,]
countries_w_points mapview(countries_w_points, label="geounit", legend=FALSE) + africa_points_map
lengths()
<- sf::st_intersects(africa_sf, africa_points_sf)
country_inter # Computes point counts for each polygon
<- lengths(country_inter)) (num_intersections
[1] 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0
[39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2
<- africa_sf |> mutate(
africa_sf num_points = num_intersections
|> arrange(geounit)
) |> select(geounit, num_points) |> head() africa_sf
geounit | num_points | geometry |
---|---|---|
Algeria | 2 | MULTIPOLYGON (((8.576563 36… |
Angola | 2 | MULTIPOLYGON (((13.07275 -4… |
Benin | 0 | MULTIPOLYGON (((1.622656 6…. |
Botswana | 0 | MULTIPOLYGON (((25.25879 -1… |
Burkina Faso | 0 | MULTIPOLYGON (((0.9004883 1… |
Burundi | 0 | MULTIPOLYGON (((30.55361 -2… |
mapview
mapview(africa_sf, zcol="num_points")
ggplot2
Since we’re starting to get into data attributes rather than geometric features, switching to ggplot2
is recommended!
|> ggplot(aes(fill=num_points)) +
africa_sf geom_sf() +
theme_classic()
library(sf)
<- po <- st_polygon(list(rbind(c(0,0), c(1,0), c(1,1), c(0,1), c(0,0))))
polygon <- st_polygon(list(rbind(c(-1,-1), c(2,-1), c(2,2), c(-1,2), c(-1,-1))))
p0 <- li <- st_linestring(rbind(c(.5, -.5), c(.5, 0.5)))
line <- st_sfc(po, li)
s
par(mfrow = c(3,3))
par(mar = c(1,1,1,1))
# "1020F1102"
# 1: 1
plot(s, col = c(NA, 'darkgreen'), border = 'blue', main = expression(paste("I(pol)",intersect(),"I(line) = 1")))
lines(rbind(c(.5,0), c(.5,.495)), col = 'red', lwd = 2)
points(0.5, 0.5, pch = 1)
# 2: 0
plot(s, col = c(NA, 'darkgreen'), border = 'blue', main = expression(paste("I(pol)",intersect(),"B(line) = 0")))
points(0.5, 0.5, col = 'red', pch = 16)
# 3: 2
plot(s, col = c(NA, 'darkgreen'), border = 'blue', main = expression(paste("I(pol)",intersect(),"E(line) = 2")))
plot(po, col = '#ff8888', add = TRUE)
plot(s, col = c(NA, 'darkgreen'), border = 'blue', add = TRUE)
# 4: 0
plot(s, col = c(NA, 'darkgreen'), border = 'blue', main = expression(paste("B(pol)",intersect(),"I(line) = 0")))
points(.5, 0, col = 'red', pch = 16)
# 5: F
plot(s, col = c(NA, 'darkgreen'), border = 'blue', main = expression(paste("B(pol)",intersect(),"B(line) = F")))
# 6: 1
plot(s, col = c(NA, 'darkgreen'), border = 'blue', main = expression(paste("B(pol)",intersect(),"E(line) = 1")))
plot(po, border = 'red', col = NA, add = TRUE, lwd = 2)
# 7: 1
plot(s, col = c(NA, 'darkgreen'), border = 'blue', main = expression(paste("E(pol)",intersect(),"I(line) = 1")))
lines(rbind(c(.5, -.5), c(.5, 0)), col = 'red', lwd = 2)
# 8: 0
plot(s, col = c(NA, 'darkgreen'), border = 'blue', main = expression(paste("E(pol)",intersect(),"B(line) = 0")))
points(.5, -.5, col = 'red', pch = 16)
# 9: 2
plot(s, col = c(NA, 'darkgreen'), border = 'blue', main = expression(paste("E(pol)",intersect(),"E(line) = 2")))
plot(p0 / po, col = '#ff8888', add = TRUE)
plot(s, col = c(NA, 'darkgreen'), border = 'blue', add = TRUE)
equals
corresponds to the DE-9IM string "T*F**FFF*"
. If any two geometries obey this relationship, they are (topologically) equal!