Data Pipeline performance improvements for Census GeoJson and Score file

This commit is contained in:
Carlos Felix 2025-01-13 09:28:14 -05:00 committed by Carlos Felix
commit c32bd1f363
37 changed files with 1305 additions and 1413 deletions

View file

@ -26,10 +26,7 @@ def get_tract_geojson(
census_etl.extract()
census_etl.transform()
census_etl.load()
tract_data = gpd.read_file(
GEOJSON_PATH,
include_fields=["GEOID10"],
)
tract_data = gpd.read_parquet(GEOJSON_PATH)
tract_data = tract_data.rename(
columns={"GEOID10": "GEOID10_TRACT"}, errors="raise"
)