mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 17:44:20 -08:00
Add median house value to Definition L (#882)
* Added house value to ETL * Adding house value to score formula and comp tool
This commit is contained in:
parent
54bdda0f02
commit
05ebf9b48c
7 changed files with 90 additions and 48 deletions
|
@ -312,6 +312,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.HT_INDEX_FIELD,
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD,
|
||||
]
|
||||
|
||||
non_numeric_columns = [
|
||||
|
|
|
@ -50,6 +50,11 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
"Percent of individuals < 200% Federal Poverty Line"
|
||||
)
|
||||
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD = "B25077_001E"
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME = (
|
||||
"Median value ($) of owner-occupied housing units"
|
||||
)
|
||||
|
||||
self.STATE_GEOID_FIELD_NAME = "GEOID2"
|
||||
self.df: pd.DataFrame
|
||||
|
||||
|
@ -78,7 +83,10 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
# Emploment fields
|
||||
"B23025_005E",
|
||||
"B23025_003E",
|
||||
# Income field
|
||||
self.MEDIAN_INCOME_FIELD,
|
||||
# House value
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD,
|
||||
]
|
||||
+ self.LINGUISTIC_ISOLATION_FIELDS
|
||||
+ self.POVERTY_FIELDS,
|
||||
|
@ -94,22 +102,27 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
def transform(self) -> None:
|
||||
logger.info("Starting Census ACS Transform")
|
||||
|
||||
# Rename median income
|
||||
self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[
|
||||
self.MEDIAN_INCOME_FIELD
|
||||
]
|
||||
# Rename two fields.
|
||||
self.df = self.df.rename(
|
||||
columns={
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD: self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||
self.MEDIAN_INCOME_FIELD: self.MEDIAN_INCOME_FIELD_NAME,
|
||||
}
|
||||
)
|
||||
|
||||
# Handle null values for CBG median income, which are `-666666666`.
|
||||
missing_value_count = sum(
|
||||
self.df[self.MEDIAN_INCOME_FIELD_NAME] == -666666666
|
||||
)
|
||||
# Handle null values for various fields, which are `-666666666`.
|
||||
for field in [
|
||||
self.MEDIAN_INCOME_FIELD_NAME,
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||
]:
|
||||
missing_value_count = sum(self.df[field] == -666666666)
|
||||
logger.info(
|
||||
f"There are {missing_value_count} ({int(100*missing_value_count/self.df[self.MEDIAN_INCOME_FIELD_NAME].count())}%) values of "
|
||||
+ f"`{self.MEDIAN_INCOME_FIELD_NAME}` being marked as null values."
|
||||
f"There are {missing_value_count} ({int(100*missing_value_count/self.df[field].count())}%) values of "
|
||||
+ f"`{field}` being marked as null values."
|
||||
)
|
||||
self.df[field] = self.df[field].replace(
|
||||
to_replace=-666666666, value=None
|
||||
)
|
||||
self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[
|
||||
self.MEDIAN_INCOME_FIELD_NAME
|
||||
].replace(to_replace=-666666666, value=None)
|
||||
|
||||
# Calculate percent unemployment.
|
||||
# TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
|
||||
|
@ -133,8 +146,6 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
/ self.df["C16002_001E"]
|
||||
)
|
||||
|
||||
self.df[self.LINGUISTIC_ISOLATION_FIELD_NAME].describe()
|
||||
|
||||
# Calculate percent at different poverty thresholds
|
||||
self.df[self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME] = (
|
||||
self.df["C17002_002E"] + self.df["C17002_003E"]
|
||||
|
@ -170,6 +181,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME,
|
||||
self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME,
|
||||
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||
]
|
||||
|
||||
self.df[columns_to_include].to_csv(
|
||||
|
|
|
@ -56,13 +56,17 @@ class CensusDecennialETL(ExtractTransformLoad):
|
|||
|
||||
self.MALE_HIGH_SCHOOL_ED_FIELD = "PBG026005"
|
||||
self.MALE_HIGH_SCHOOL_ED_VI_FIELD = "PCT032011"
|
||||
self.MALE_HIGH_SCHOOL_ED_FIELD_NAME = "Total!!Male!!High school graduate, GED, or alternative; "\
|
||||
self.MALE_HIGH_SCHOOL_ED_FIELD_NAME = (
|
||||
"Total!!Male!!High school graduate, GED, or alternative; "
|
||||
"SEX BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER"
|
||||
)
|
||||
|
||||
self.FEMALE_HIGH_SCHOOL_ED_FIELD = "PBG026012"
|
||||
self.FEMALE_HIGH_SCHOOL_ED_VI_FIELD = "PCT032028"
|
||||
self.FEMALE_HIGH_SCHOOL_ED_FIELD_NAME = "Total!!Female!!High school graduate, GED, or alternative; "\
|
||||
self.FEMALE_HIGH_SCHOOL_ED_FIELD_NAME = (
|
||||
"Total!!Female!!High school graduate, GED, or alternative; "
|
||||
"SEX BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER"
|
||||
)
|
||||
|
||||
self.PERCENTAGE_HIGH_SCHOOL_ED_FIELD_NAME = (
|
||||
"PERCENTAGE_HIGH_SCHOOL_ED_FIELD_NAME"
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4899d2ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import censusdata\n",
|
||||
|
@ -29,28 +32,34 @@
|
|||
"# Some display settings to make pandas outputs more readable.\n",
|
||||
"pd.set_option(\"display.expand_frame_repr\", False)\n",
|
||||
"pd.set_option(\"display.precision\", 2)"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4dd8feec",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Following the tutorial at https://jtleider.github.io/censusdata/example1.html.\n",
|
||||
"# Full list of fields is at https://www2.census.gov/programs-surveys/acs/summary_file/2019/documentation/user_tools/ACS2019_Table_Shells.xlsx\n",
|
||||
"censusdata.printtable(\n",
|
||||
" censusdata.censustable(src=\"acs5\", year=ACS_YEAR, table=\"B19013\")\n",
|
||||
")"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
}
|
||||
" censusdata.censustable(src=\"acs5\", year=ACS_YEAR, table=\"B25077\")\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# censusdata.search(src=\"acs5\", year=ACS_YEAR, field='label', criterion='Owner-occupied units!!Median')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7b40afd3",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fips_from_censusdata_censusgeo(censusgeo: censusdata.censusgeo) -> str:\n",
|
||||
" \"\"\"Create a FIPS code from the proprietary censusgeo index.\"\"\"\n",
|
||||
|
@ -82,15 +91,16 @@
|
|||
")\n",
|
||||
"\n",
|
||||
"df.head()"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "caa0b502",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"columns_to_include = [\"GEOID2\", \"Median household income (State)\"]\n",
|
||||
"\n",
|
||||
|
@ -103,18 +113,15 @@
|
|||
")\n",
|
||||
"\n",
|
||||
"# df[columns_to_include].to_csv(path_or_buf= \"/Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/needs_to_be_moved_to_s3/2014_to_2019_state_median_income.csv\", index=False)"
|
||||
],
|
||||
"outputs": [],
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"source": [],
|
||||
"id": "f2bddf6a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"metadata": {}
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
@ -86,6 +86,7 @@
|
|||
"BAD_HEALTH_FIELD = (\n",
|
||||
" \"Physical health not good for >=14 days among adults aged >=18 years\"\n",
|
||||
")\n",
|
||||
"MEDIAN_HOUSE_VALUE_FIELD = \"Median value ($) of owner-occupied housing units\"\n",
|
||||
"\n",
|
||||
"# Define some suffixes\n",
|
||||
"POPULATION_SUFFIX = \" (priority population)\""
|
||||
|
@ -186,6 +187,7 @@
|
|||
" \"Particulate matter (PM2.5) (percentile)\",\n",
|
||||
" \"Traffic proximity and volume (percentile)\",\n",
|
||||
" \"Percent of individuals < 200% Federal Poverty Line (percentile)\",\n",
|
||||
" MEDIAN_HOUSE_VALUE_FIELD,\n",
|
||||
"]:\n",
|
||||
" print(f\"\\n~~~~Analysis for field `{field}`~~~~\")\n",
|
||||
" print(cejst_df[field].describe())\n",
|
||||
|
|
|
@ -114,6 +114,8 @@ OVER_64_FIELD = "Individuals over 64 years old"
|
|||
# Urban Rural Map
|
||||
URBAN_HERUISTIC_FIELD = "Urban Heuristic Flag"
|
||||
|
||||
# Housing value
|
||||
MEDIAN_HOUSE_VALUE_FIELD = "Median value ($) of owner-occupied housing units"
|
||||
|
||||
# EJSCREEN Areas of Concern
|
||||
EJSCREEN_AREAS_OF_CONCERN_NATIONAL_70TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
|
|
|
@ -11,6 +11,7 @@ class ScoreL(Score):
|
|||
def __init__(self, df: pd.DataFrame) -> None:
|
||||
self.LOW_INCOME_THRESHOLD: float = 0.65
|
||||
self.ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
|
||||
self.MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
|
||||
super().__init__(df)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
|
@ -135,8 +136,12 @@ class ScoreL(Score):
|
|||
) & transportation_criteria
|
||||
|
||||
def _housing_factor(self) -> bool:
|
||||
# (
|
||||
# In Xth percentile or above for lead paint (Source: Census's American Community Survey’s
|
||||
# percent of housing units built pre-1960, used as an indicator of potential lead paint exposure in homes)
|
||||
# AND
|
||||
# In Yth percentile or below for Median House Value (Source: Census's American Community Survey)
|
||||
# )
|
||||
# or
|
||||
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
||||
# AND
|
||||
|
@ -144,11 +149,20 @@ class ScoreL(Score):
|
|||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
housing_criteria = (
|
||||
(
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
& (
|
||||
self.df[
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
< self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
) | (
|
||||
self.df[
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
|
|
Loading…
Add table
Reference in a new issue