"""
Helpers for downloading and categorizing OpenStreetMap data via osmnx.
Two concerns, separated:
1. **Categorized POI downloads** — accessibility analyses typically classify
OSM features into user-defined categories (`groceries`, `schools`, …) that
may bundle multiple OSM `tag:value` pairs, optionally with per-pair
weights (a `convenience` shop might count as 0.5 of a `supermarket` for
the `groceries` category). `fetch_pois` automates the full pipeline:
build the OSM tag query from the category map, download via osmnx, tag
each feature with per-category count + weighted-count columns, drop
features matching no category.
2. **Network downloads** — `fetch_network` wraps
`osmnx.graph_from_polygon` + `osmnx.project_graph` so the standard
"fetch in EPSG:4326, project to a metric CRS" pattern is one line.
Categorization without download (when POIs come from another source —
cached, a different provider, etc.) is also exposed as `categorize_pois`.
`osmnx` is an optional dependency (`aperta[osm]` or `aperta[examples]`);
calls to `fetch_*` import it lazily and raise a helpful error if missing.
Example category map::
POI_CATEGORIES = {
'groceries': [
('shop:supermarket', 1.0),
('shop:convenience', 0.5),
('shop:bakery', 0.5),
],
'education_school': [('amenity:school', 1.0)],
'transit_rail': [('railway:station', 3), ('railway:halt', 1)],
}
pois = osm_helpers.fetch_pois(
polygon=dest_polygon, polygon_crs='EPSG:2056',
category_map=POI_CATEGORIES, target_crs='EPSG:2056',
)
# pois now has columns: geometry, name, amenity, shop, railway, ...
# plus per-category columns: groceries, groceries_weight,
# education_school, education_school_weight, transit_rail, transit_rail_weight.
"""
from __future__ import annotations
import geopandas as gpd
import networkx as nx
import shapely.geometry.base
# A category map: `{user_category -> [(osm_tag_pair, weight), ...]}`
# where `osm_tag_pair` is a `'key:value'` string like `'shop:supermarket'`.
CategoryMap = dict[str, list[tuple[str, float]]]
# ---------------------------------------------------------------------------
# Pure logic — no network access
# ---------------------------------------------------------------------------
[docs]
def osm_tag_query_for_categories(category_map: CategoryMap) -> dict[str, bool | str | list[str]]:
"""Build the osmnx `tags=` argument from a category map.
Unions every `key:value` pair across all categories and groups by key.
The result is the minimal query that returns *every* feature any
category could match.
Args:
category_map: `{user_category -> [(tag_pair, weight), ...]}` where
`tag_pair` is a `'key:value'` string.
Returns:
`{osm_tag_key -> [values, ...]}` with values sorted (deterministic
for caching / hashing). Pass directly as the `tags` argument to
`osmnx.features_from_polygon` / `osmnx.features_from_place`.
"""
out: dict[str, set[str]] = {}
for tags in category_map.values():
for tag_pair, _weight in tags:
if ":" not in tag_pair:
raise ValueError(
f"Tag pair {tag_pair!r} must be 'key:value' (e.g. 'shop:supermarket')."
)
key, value = tag_pair.split(":", 1)
out.setdefault(key, set()).add(value)
return {k: sorted(v) for k, v in out.items()}
[docs]
def categorize_pois(
pois: gpd.GeoDataFrame,
category_map: CategoryMap,
*,
weight_suffix: str = "_weight",
drop_unmatched: bool = True,
) -> gpd.GeoDataFrame:
"""Add per-category count + weighted-count columns to a POI GeoDataFrame.
For each `(category, [(tag:value, weight), …])` entry, two new columns
are appended:
- **`{category}`** (int): number of listed `(tag:value)` pairs this row
matches. Usually 0 or 1; can be ≥ 2 if multiple of the listed pairs
match for one feature (a feature with both `amenity=school` and
`school=primary` for a `schools` category that lists both).
- **`{category}{weight_suffix}`** (float): sum of weights across all
matching pairs. Equal to the count if every weight is 1.
Features matching no category at all are dropped if
`drop_unmatched=True` (the typical case — saves carrying around OSM
features that aren't of interest).
Args:
pois: GeoDataFrame containing the OSM tag columns referenced by
`category_map` (e.g. `amenity`, `shop`, `leisure`). Tags missing
from the DataFrame are silently treated as never-matching, so
partial input works.
category_map: `{category -> [(tag:value, weight), …]}`.
weight_suffix: suffix for the per-category weight column. Default
`'_weight'`. Set to e.g. `'_w'` for shorter columns.
drop_unmatched: drop rows matching no listed `(tag:value)` pair.
Default `True`.
Returns:
A copy of `pois` with two new columns per category. Original
columns + index preserved.
"""
pois = pois.copy()
count_cols: list[str] = []
for category, tags in category_map.items():
weight_col = f"{category}{weight_suffix}"
if category in pois.columns or weight_col in pois.columns:
raise ValueError(
f"Category {category!r} would overwrite an existing column "
f"(have {category!r} / {weight_col!r}). Rename the category "
f"or use a different `weight_suffix`."
)
pois[category] = 0
pois[weight_col] = 0.0
for tag_pair, weight in tags:
key, value = tag_pair.split(":", 1)
if key not in pois.columns:
continue
match = pois[key] == value
pois.loc[match, category] += 1
pois.loc[match, weight_col] += float(weight)
count_cols.append(category)
if drop_unmatched and count_cols:
any_match = pois[count_cols].sum(axis=1) > 0
pois = pois[any_match]
return pois
# ---------------------------------------------------------------------------
# End-to-end fetchers — require osmnx
# ---------------------------------------------------------------------------
[docs]
def fetch_pois(
polygon: shapely.geometry.base.BaseGeometry,
polygon_crs: str,
category_map: CategoryMap,
*,
target_crs: str | None = None,
use_centroid: bool = True,
weight_suffix: str = "_weight",
drop_unmatched: bool = True,
) -> gpd.GeoDataFrame:
"""Fetch OSM POIs within `polygon` and tag them with category columns.
End-to-end pipeline:
1. Reproject `polygon` to EPSG:4326 for the osmnx query.
2. Build the OSM tag query via `osm_tag_query_for_categories`.
3. Call `osmnx.features_from_polygon`.
4. Drop non-Point/Polygon geometries (lines, etc.).
5. (Optional) reduce polygon footprints to point centroids.
6. (Optional) reproject to `target_crs`.
7. Categorize via `categorize_pois`.
Args:
polygon: shapely polygon (or multipolygon) describing the fetch area.
polygon_crs: CRS of `polygon` (e.g. `'EPSG:2056'`).
category_map: `{category -> [(tag:value, weight), …]}`.
target_crs: optional CRS to project the result to. `None` keeps the
EPSG:4326 of the OSM source.
use_centroid: reduce polygon footprints to point centroids. Centroid
is computed in `target_crs` if given (geometrically meaningful)
else in EPSG:4326 (warns).
weight_suffix, drop_unmatched: as in `categorize_pois`.
Returns:
GeoDataFrame indexed by OSM ID with the original OSM tag columns
plus per-category count + weight columns.
"""
import osmnx as ox
polygon_4326 = gpd.GeoSeries([polygon], crs=polygon_crs).to_crs("EPSG:4326").iloc[0]
tags_query = osm_tag_query_for_categories(category_map)
raw = ox.features_from_polygon(polygon_4326, tags=tags_query)
raw = raw[raw.geometry.type.isin(["Point", "Polygon", "MultiPolygon"])]
if target_crs is not None:
raw = raw.to_crs(target_crs)
if use_centroid:
# Now (after the optional reprojection) centroid is computed in
# target_crs (typically metric) — geometrically meaningful.
raw = raw.copy()
raw["geometry"] = raw.geometry.centroid
return categorize_pois(
raw,
category_map,
weight_suffix=weight_suffix,
drop_unmatched=drop_unmatched,
)
[docs]
def fetch_network(
polygon: shapely.geometry.base.BaseGeometry,
polygon_crs: str,
network_type: str,
*,
target_crs: str | None = None,
simplify: bool = True,
) -> nx.MultiDiGraph:
"""Fetch an OSM network within `polygon` and project to `target_crs`.
One-line wrapper around `osmnx.graph_from_polygon` + `project_graph`.
Removes the standard reproject-polygon-to-4326, fetch, reproject-graph
boilerplate.
Args:
polygon: shapely polygon describing the fetch area.
polygon_crs: CRS of `polygon`.
network_type: passed through to `osmnx.graph_from_polygon` —
`'walk'`, `'bike'`, `'drive'`, `'all'`, `'all_public'`, etc.
target_crs: optional CRS to project the resulting graph to. `None`
keeps EPSG:4326.
simplify: passed through. `True` (default) consolidates degree-2
nodes — usually what you want for routing.
Returns:
`networkx.MultiDiGraph`. Nodes carry `x` / `y` attributes in
`target_crs` (or EPSG:4326 if no target).
"""
import osmnx as ox
polygon_4326 = gpd.GeoSeries([polygon], crs=polygon_crs).to_crs("EPSG:4326").iloc[0]
graph = ox.graph_from_polygon(
polygon_4326,
network_type=network_type,
simplify=simplify,
)
if target_crs is not None:
graph = ox.project_graph(graph, to_crs=target_crs)
return graph