Adding HOLC indicator (#1579)

Added HOLC indicator (Historic Redlining Score) from NCRC work; included 3.25 cutoff and low income as part of the housing burden category.
This commit is contained in:
Emma Nechamkin 2022-05-12 12:07:08 -04:00 committed by Emma Nechamkin
commit 1782d022a9
10 changed files with 202 additions and 40 deletions

View file

@ -205,7 +205,8 @@ TILES_SCORE_COLUMNS = {
field_names.M_HEALTH: "M_HLTH",
# temporarily update this so that it's the Narwhal score that gets visualized on the map
field_names.SCORE_N_COMMUNITIES: "SM_C",
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
field_names.SCORE_N_COMMUNITIES
+ field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI",

View file

@ -1,5 +1,6 @@
import functools
from collections import namedtuple
from attr import field
import numpy as np
import pandas as pd
@ -36,6 +37,7 @@ class ScoreETL(ExtractTransformLoad):
self.census_decennial_df: pd.DataFrame
self.census_2010_df: pd.DataFrame
self.child_opportunity_index_df: pd.DataFrame
self.hrs_df: pd.DataFrame
def extract(self) -> None:
logger.info("Loading data sets from disk.")
@ -172,6 +174,17 @@ class ScoreETL(ExtractTransformLoad):
low_memory=False,
)
# Load HRS data
hrs_csv = (
constants.DATA_PATH / "dataset" / "historic_redlining" / "usa.csv"
)
self.hrs_df = pd.read_csv(
hrs_csv,
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
low_memory=False,
)
def _join_tract_dfs(self, census_tract_dfs: list) -> pd.DataFrame:
logger.info("Joining Census Tract dataframes")
@ -376,6 +389,7 @@ class ScoreETL(ExtractTransformLoad):
self.census_decennial_df,
self.census_2010_df,
self.child_opportunity_index_df,
self.hrs_df,
]
# Sanity check each data frame before merging.
@ -405,7 +419,6 @@ class ScoreETL(ExtractTransformLoad):
df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD]
)
# QQ: why don't we just filter to the numeric columns by type?
numeric_columns = [
field_names.HOUSING_BURDEN_FIELD,
field_names.TOTAL_POP_FIELD,
@ -465,6 +478,7 @@ class ScoreETL(ExtractTransformLoad):
non_numeric_columns = [
self.GEOID_TRACT_FIELD_NAME,
field_names.PERSISTENT_POVERTY_FIELD,
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
]
# For some columns, high values are "good", so we want to reverse the percentile

View file

@ -46,10 +46,11 @@ class GeoScoreETL(ExtractTransformLoad):
self.DATA_PATH / "census" / "geojson" / "us.json"
)
# Import the shortened name for Score M percentile ("SM_PFS") that's used on the
# Import the shortened name for Score N percentile ("SM_PFS") that's used on the
# tiles.
## TEMPORARY update
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX
field_names.SCORE_N + field_names.PERCENTILE_FIELD_SUFFIX
]
self.TARGET_SCORE_RENAME_TO = "M_SCORE"
@ -284,21 +285,28 @@ class GeoScoreETL(ExtractTransformLoad):
def create_esri_codebook(codebook):
"""temporary: helper to make a codebook for esri shapefile only"""
<<<<<<< HEAD
shapefile_column_field = "shapefile_column"
internal_column_name_field = "column_name"
column_description_field = "column_description"
=======
>>>>>>> 8c255f0e (Adding HOLC indicator (#1579))
logger.info("Creating a codebook that uses the csv names")
codebook = (
pd.Series(codebook)
.reset_index()
.rename(
# kept as strings because no downstream impacts
<<<<<<< HEAD
columns={
0: internal_column_name_field,
"index": shapefile_column_field,
}
=======
columns={0: "column_name", "index": "shapefile_column"}
>>>>>>> 8c255f0e (Adding HOLC indicator (#1579))
)
)
@ -374,7 +382,7 @@ class GeoScoreETL(ExtractTransformLoad):
for task in [
write_high_to_file,
write_low_to_file,
write_esri_shapefile,
# write_esri_shapefile,
]
}