mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-29 20:41:17 -07:00
Score F, testing methodology (#510)
* fixing dependency issue * fixing more dependencies * including fraction of state AMI * wip * nitpick whitespace * etl working now * wip on scoring * fix rename error * reducing metrics * fixing score f * fixing readme * adding dependency * passing tests; * linting/black * removing unnecessary sample * fixing error * adding verify flag on etl/base Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
This commit is contained in:
parent
043ed983ea
commit
65ceb7900f
23 changed files with 557 additions and 153 deletions
|
@ -9,16 +9,16 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
def __init__(self):
|
||||
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "hud_housing"
|
||||
self.GEOID_TRACT_FIELD_NAME = "GEOID10_TRACT"
|
||||
self.HOUSING_FTP_URL = (
|
||||
"https://www.huduser.gov/portal/datasets/cp/2012thru2016-140-csv.zip"
|
||||
)
|
||||
self.HOUSING_FTP_URL = "https://www.huduser.gov/portal/datasets/cp/2012thru2016-140-csv.zip"
|
||||
self.HOUSING_ZIP_FILE_DIR = self.TMP_PATH / "hud_housing"
|
||||
|
||||
# We measure households earning less than 80% of HUD Area Median Family Income by county
|
||||
# and paying greater than 30% of their income to housing costs.
|
||||
self.HOUSING_BURDEN_FIELD_NAME = "Housing burden (percent)"
|
||||
self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME = "HOUSING_BURDEN_NUMERATOR"
|
||||
self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME = "HOUSING_BURDEN_DENOMINATOR"
|
||||
self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME = (
|
||||
"HOUSING_BURDEN_DENOMINATOR"
|
||||
)
|
||||
|
||||
# Note: some variable definitions.
|
||||
# HUD-adjusted median family income (HAMFI).
|
||||
|
@ -55,7 +55,9 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
)
|
||||
|
||||
# Rename and reformat block group ID
|
||||
self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)
|
||||
self.df.rename(
|
||||
columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True
|
||||
)
|
||||
|
||||
# The CHAS data has census tract ids such as `14000US01001020100`
|
||||
# Whereas the rest of our data uses, for the same tract, `01001020100`.
|
||||
|
@ -273,7 +275,9 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
# TODO: add small sample size checks
|
||||
self.df[self.HOUSING_BURDEN_FIELD_NAME] = self.df[
|
||||
self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME
|
||||
].astype(float) / self.df[self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME].astype(
|
||||
].astype(float) / self.df[
|
||||
self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME
|
||||
].astype(
|
||||
float
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue