mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 13:41:17 -07:00
Adding HOLC indicator (#1579)
Added HOLC indicator (Historic Redlining Score) from NCRC work; included 3.25 cutoff and low income as part of the housing burden category.
This commit is contained in:
parent
f047ca9d83
commit
1782d022a9
10 changed files with 202 additions and 40 deletions
|
@ -205,7 +205,8 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.M_HEALTH: "M_HLTH",
|
||||
# temporarily update this so that it's the Narwhal score that gets visualized on the map
|
||||
field_names.SCORE_N_COMMUNITIES: "SM_C",
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
|
||||
field_names.SCORE_N_COMMUNITIES
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI",
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import functools
|
||||
from collections import namedtuple
|
||||
from attr import field
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
@ -36,6 +37,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.census_decennial_df: pd.DataFrame
|
||||
self.census_2010_df: pd.DataFrame
|
||||
self.child_opportunity_index_df: pd.DataFrame
|
||||
self.hrs_df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Loading data sets from disk.")
|
||||
|
@ -172,6 +174,17 @@ class ScoreETL(ExtractTransformLoad):
|
|||
low_memory=False,
|
||||
)
|
||||
|
||||
# Load HRS data
|
||||
hrs_csv = (
|
||||
constants.DATA_PATH / "dataset" / "historic_redlining" / "usa.csv"
|
||||
)
|
||||
|
||||
self.hrs_df = pd.read_csv(
|
||||
hrs_csv,
|
||||
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
def _join_tract_dfs(self, census_tract_dfs: list) -> pd.DataFrame:
|
||||
logger.info("Joining Census Tract dataframes")
|
||||
|
||||
|
@ -376,6 +389,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.census_decennial_df,
|
||||
self.census_2010_df,
|
||||
self.child_opportunity_index_df,
|
||||
self.hrs_df,
|
||||
]
|
||||
|
||||
# Sanity check each data frame before merging.
|
||||
|
@ -405,7 +419,6 @@ class ScoreETL(ExtractTransformLoad):
|
|||
df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD]
|
||||
)
|
||||
|
||||
# QQ: why don't we just filter to the numeric columns by type?
|
||||
numeric_columns = [
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
|
@ -465,6 +478,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
non_numeric_columns = [
|
||||
self.GEOID_TRACT_FIELD_NAME,
|
||||
field_names.PERSISTENT_POVERTY_FIELD,
|
||||
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
|
||||
]
|
||||
|
||||
# For some columns, high values are "good", so we want to reverse the percentile
|
||||
|
|
|
@ -46,10 +46,11 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
||||
)
|
||||
|
||||
# Import the shortened name for Score M percentile ("SM_PFS") that's used on the
|
||||
# Import the shortened name for Score N percentile ("SM_PFS") that's used on the
|
||||
# tiles.
|
||||
## TEMPORARY update
|
||||
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
field_names.SCORE_N + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
self.TARGET_SCORE_RENAME_TO = "M_SCORE"
|
||||
|
||||
|
@ -284,21 +285,28 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
|
||||
def create_esri_codebook(codebook):
|
||||
"""temporary: helper to make a codebook for esri shapefile only"""
|
||||
<<<<<<< HEAD
|
||||
|
||||
shapefile_column_field = "shapefile_column"
|
||||
internal_column_name_field = "column_name"
|
||||
column_description_field = "column_description"
|
||||
|
||||
=======
|
||||
>>>>>>> 8c255f0e (Adding HOLC indicator (#1579))
|
||||
logger.info("Creating a codebook that uses the csv names")
|
||||
codebook = (
|
||||
pd.Series(codebook)
|
||||
.reset_index()
|
||||
.rename(
|
||||
# kept as strings because no downstream impacts
|
||||
<<<<<<< HEAD
|
||||
columns={
|
||||
0: internal_column_name_field,
|
||||
"index": shapefile_column_field,
|
||||
}
|
||||
=======
|
||||
columns={0: "column_name", "index": "shapefile_column"}
|
||||
>>>>>>> 8c255f0e (Adding HOLC indicator (#1579))
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -374,7 +382,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
for task in [
|
||||
write_high_to_file,
|
||||
write_low_to_file,
|
||||
write_esri_shapefile,
|
||||
# write_esri_shapefile,
|
||||
]
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue