Changing the zone system for a model

Changing the zone system for a model#

In this example we show how to change the zone system for a Polaris model.

import shutil

import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import Polygon, MultiPolygon
from polaris import Polaris
from polaris.network.network import Network
from polaris.project.project_restorer import create_db_from_csv
from polaris.utils.database.db_utils import commit_and_close
from polaris.utils.database.standard_database import DatabaseType
from polaris.utils.path_utils import tempdirpath
from polaris.utils.testing.temp_model import TempModel

sphinx_gallery_thumbnail_path = ‘../../examples/editing_models/zoning.png’

original_model = TempModel("Bloomington")
target = original_model.parent / "bloomington_new_zones_from_census_block"

shutil.copytree(original_model, target, dirs_exist_ok=True)

tract_path = tempdirpath() / "illinois_census_block_groups_portion.gpkg"
model = Polaris.from_dir(original_model)
model.upgrade()
from_net = model.network
db_name = model.run_config.db_name
zones = from_net.tables.get("zone")

Read the tracts and put them in the same CRS as the zones

tracts = gpd.read_file(tract_path)
tracts = tracts.to_crs(zones.crs)

# We will use GEOID because we know it is an unique identifier, but we could use any unique field in this table.
id_field = "GEOID"

Compute the overlay between the zone and the block group geometries

ovrl = zones.overlay(tracts, how="intersection")[["zone", id_field, "geometry"]]

We compute the area for each of the overlaps We will use the largest overlap for each new zone to copy the data for a few fields from the original zones Most fields will be aggregated with a sum, however

ovrl = ovrl.assign(overlay_area=ovrl.area)
ovrl_data = ovrl.assign(area_overlay=ovrl.area)[["zone", id_field, "area_overlay"]]
zones_with_tract = zones.merge(ovrl_data, on="zone", how="left")
zones_with_tract = zones_with_tract.assign(factor=zones_with_tract.area_overlay / zones_with_tract.area)
zones_with_tract = zones_with_tract.drop(columns=["geo", "x", "y", "z", "zone"])
sumproduct = ["employment_", "_area", "pop_"]
weighted = ["percent_", "hh_inc"]
most_significative = ["electric_grid_transmission", "electricity_provider", "area_type"]

Sumproduct

cols = [x for x in zones_with_tract.columns if any(y in x for y in sumproduct)]
data = zones_with_tract[[id_field, "factor"] + cols].copy()
data[cols] = data[cols].astype(float)
for col in cols:
    data.loc[:, col] *= data.factor
data1 = data.drop(columns=["factor"]).groupby(id_field).sum()

weighted

cols = [x for x in zones_with_tract.columns if any(y in x for y in weighted)]
data = zones_with_tract[[id_field, "area_overlay"] + cols]

for col in cols:
    data.loc[:, col] *= data.area_overlay
# Step 2 & 3: Group by and calculate sums

data = data.groupby(id_field).sum()
for col in cols:
    data.loc[:, col] /= data.area_overlay

data2 = data.drop(columns=["area_overlay"])
data2.head()
percent_white percent_black hh_inc_avg
GEOID
171130001051 0.891415 0.065061 83934.343297
171130001052 0.947425 0.028648 93090.236681
171130001061 0.838100 0.111241 62294.852121
171130001062 0.830827 0.118077 62852.794114
171130001071 0.830957 0.118020 62852.875579


Most significative

idx = zones_with_tract.groupby(id_field)["area_overlay"].idxmax()
data = zones_with_tract.loc[idx].reset_index(drop=True)
data3 = data.set_index(id_field)[most_significative]
zone_data = data1.join(data2).join(data3).reset_index()
new_zones = tracts[[id_field, "geometry"]].merge(zone_data, on=id_field)
new_zones = new_zones.rename(columns={"geometry": "geo"}).assign(zone=1 + np.arange(new_zones.shape[0]))
new_zones.drop(columns=[id_field], inplace=True)

new_zones.geo = new_zones.geo.apply(lambda geom: MultiPolygon([geom]) if isinstance(geom, Polygon) else geom)
zone_data = new_zones.assign(geo_wkt=new_zones.geo.to_wkt(), area=round(new_zones.geo.area, 8))
zone_data = zone_data.assign(x=round(zone_data.geo.centroid.x, 8), y=round(zone_data.geo.centroid.y, 8), z=0)
zone_data.drop(columns=["geo"], inplace=True)

with commit_and_close(from_net.path_to_file, spatial=True) as conn:
    schema = pd.read_sql_query("pragma table_info(zone)", conn)
    fields = ["geo_wkt" if fld == "geo" else fld for fld in schema["name"].to_list()]

zone_data[fields].to_csv(target / "supply" / "zone.csv", index=False)

We rebuild the new supply file

create_db_from_csv(
    target / f"Bloomington-Supply.sqlite", target / "supply", DatabaseType.Supply, overwrite=True, upgrade=True
)
new_net = Network.from_file(target / from_net.path_to_file.name)
new_net.tables.get("zone").explore()
# It is HIGHLY advisable to inspect the result of this procedure, as
# undesirable artifacts from the spatial operations may have been introduced
Make this Notebook Trusted to load map: File -> Trust Notebook


and update geo_consistency –> THIS IS A CRITICAL STEP

new_net.geo_consistency.update_all()
/venv-py312/lib/python3.12/site-packages/geopandas/tools/overlay.py:358: UserWarning: `keep_geom_type=True` in overlay resulted in 11 dropped geometries of different geometry types than df1 has. Set `keep_geom_type=False` to retain all geometries
  result = _collection_extract(result, geom_type, keep_geom_type_warning)

Total running time of the script: (1 minutes 16.736 seconds)

Gallery generated by Sphinx-Gallery