Note
Go to the end to download the full example code.
Changing the zone system for a model#
In this example we show how to change the zone system for a Polaris model.
import shutil
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import Polygon, MultiPolygon
from polaris import Polaris
from polaris.network.network import Network
from polaris.project.project_restorer import create_db_from_csv
from polaris.utils.database.db_utils import commit_and_close
from polaris.utils.database.standard_database import DatabaseType
from polaris.utils.path_utils import tempdirpath
from polaris.utils.testing.temp_model import TempModel
sphinx_gallery_thumbnail_path = ‘../../examples/editing_models/zoning.png’
original_model = TempModel("Bloomington")
target = original_model.parent / "bloomington_new_zones_from_census_block"
shutil.copytree(original_model, target, dirs_exist_ok=True)
tract_path = tempdirpath() / "illinois_census_block_groups_portion.gpkg"
model = Polaris.from_dir(original_model)
model.upgrade()
from_net = model.network
db_name = model.run_config.db_name
zones = from_net.tables.get("zone")
Read the tracts and put them in the same CRS as the zones
tracts = gpd.read_file(tract_path)
tracts = tracts.to_crs(zones.crs)
# We will use GEOID because we know it is an unique identifier, but we could use any unique field in this table.
id_field = "GEOID"
Compute the overlay between the zone and the block group geometries
ovrl = zones.overlay(tracts, how="intersection")[["zone", id_field, "geometry"]]
We compute the area for each of the overlaps We will use the largest overlap for each new zone to copy the data for a few fields from the original zones Most fields will be aggregated with a sum, however
ovrl = ovrl.assign(overlay_area=ovrl.area)
ovrl_data = ovrl.assign(area_overlay=ovrl.area)[["zone", id_field, "area_overlay"]]
zones_with_tract = zones.merge(ovrl_data, on="zone", how="left")
zones_with_tract = zones_with_tract.assign(factor=zones_with_tract.area_overlay / zones_with_tract.area)
zones_with_tract = zones_with_tract.drop(columns=["geo", "x", "y", "z", "zone"])
sumproduct = ["employment_", "_area", "pop_"]
weighted = ["percent_", "hh_inc"]
most_significative = ["electric_grid_transmission", "electricity_provider", "area_type"]
Sumproduct
cols = [x for x in zones_with_tract.columns if any(y in x for y in sumproduct)]
data = zones_with_tract[[id_field, "factor"] + cols].copy()
data[cols] = data[cols].astype(float)
for col in cols:
data.loc[:, col] *= data.factor
data1 = data.drop(columns=["factor"]).groupby(id_field).sum()
weighted
cols = [x for x in zones_with_tract.columns if any(y in x for y in weighted)]
data = zones_with_tract[[id_field, "area_overlay"] + cols]
for col in cols:
data.loc[:, col] *= data.area_overlay
# Step 2 & 3: Group by and calculate sums
data = data.groupby(id_field).sum()
for col in cols:
data.loc[:, col] /= data.area_overlay
data2 = data.drop(columns=["area_overlay"])
data2.head()
Most significative
idx = zones_with_tract.groupby(id_field)["area_overlay"].idxmax()
data = zones_with_tract.loc[idx].reset_index(drop=True)
data3 = data.set_index(id_field)[most_significative]
zone_data = data1.join(data2).join(data3).reset_index()
new_zones = tracts[[id_field, "geometry"]].merge(zone_data, on=id_field)
new_zones = new_zones.rename(columns={"geometry": "geo"}).assign(zone=1 + np.arange(new_zones.shape[0]))
new_zones.drop(columns=[id_field], inplace=True)
new_zones.geo = new_zones.geo.apply(lambda geom: MultiPolygon([geom]) if isinstance(geom, Polygon) else geom)
zone_data = new_zones.assign(geo_wkt=new_zones.geo.to_wkt(), area=round(new_zones.geo.area, 8))
zone_data = zone_data.assign(x=round(zone_data.geo.centroid.x, 8), y=round(zone_data.geo.centroid.y, 8), z=0)
zone_data.drop(columns=["geo"], inplace=True)
with commit_and_close(from_net.path_to_file, spatial=True) as conn:
schema = pd.read_sql_query("pragma table_info(zone)", conn)
fields = ["geo_wkt" if fld == "geo" else fld for fld in schema["name"].to_list()]
zone_data[fields].to_csv(target / "supply" / "zone.csv", index=False)
We rebuild the new supply file
create_db_from_csv(
target / f"Bloomington-Supply.sqlite", target / "supply", DatabaseType.Supply, overwrite=True, upgrade=True
)
new_net = Network.from_file(target / from_net.path_to_file.name)
new_net.tables.get("zone").explore()
# It is HIGHLY advisable to inspect the result of this procedure, as
# undesirable artifacts from the spatial operations may have been introduced
and update geo_consistency –> THIS IS A CRITICAL STEP
new_net.geo_consistency.update_all()
/venv-py312/lib/python3.12/site-packages/geopandas/tools/overlay.py:358: UserWarning: `keep_geom_type=True` in overlay resulted in 11 dropped geometries of different geometry types than df1 has. Set `keep_geom_type=False` to retain all geometries
result = _collection_extract(result, geom_type, keep_geom_type_warning)
Total running time of the script: (0 minutes 31.712 seconds)