updating with tract area

This commit is contained in:
Emma Nechamkin 2022-10-13 12:20:21 -04:00
parent bfb08e455e
commit 74bf4979f8
2 changed files with 17 additions and 1 deletions

View file

@ -49,6 +49,7 @@ class GeoCorrAlternativesETL(ExtractTransformLoad):
ZIP_CODE_INPUT_FIELD = "ZCTA5CE20"
AREA_JOINED_FIELD = "area_joined"
AREA_ZIP_FIELD = "area_zip"
TRACT_AREA = "area_tract"
def __init__(self):
self.COLUMNS_TO_KEEP = [
@ -118,4 +119,15 @@ class GeoCorrAlternativesETL(ExtractTransformLoad):
joined_gdf[self.AREA_JOINED_FIELD] / joined_gdf[self.AREA_ZIP_FIELD]
)
# Calculating "size of tract" that is relevant, e.g., the sum of all overlapping
# area between the tract and the zip
joined_gdf[self.TRACT_AREA] = joined_gdf.groupby(
field_names.GEOID_TRACT_FIELD
)[self.AREA_JOINED_FIELD].transform(sum)
# Calculating share of tract in the zipcode (ordered at tract, zip level)
joined_gdf[field_names.PERCENT_OF_TRACT_IN_ZIP] = (
joined_gdf[self.AREA_JOINED_FIELD] / joined_gdf[self.TRACT_AREA]
)
self.output_df = joined_gdf

View file

@ -363,7 +363,11 @@ PERCENT_OF_TRIBAL_AREA_IN_TRACT = (
# GeoCorr alternatives variables
ZIP_CODE = "Zip code tabulation area (ZCTA)"
PERCENT_OF_ZIP_CODE_IN_TRACT = "Percent of zip code tabulation area (ZCTA) in tract"
PERCENT_OF_ZIP_CODE_IN_TRACT = (
"Percent of zip code tabulation area (ZCTA) in tract"
)
PERCENT_OF_TRACT_IN_ZIP = "Percent of tract in zip code tabulation area (ZCTA)"
#####
# Names for individual factors being exceeded