mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 01:31:25 -08:00
Issue 1141: Definition M (#1151)
This commit is contained in:
parent
a07bf752b0
commit
18f299c5f8
21 changed files with 1000 additions and 143 deletions
|
@ -256,57 +256,57 @@ const AreaDetail = ({properties}:IAreaDetailProps) => {
|
|||
id: 'climate-change',
|
||||
titleText: intl.formatMessage(EXPLORE_COPY.SIDE_PANEL_CATEGORY.CLIMATE),
|
||||
indicators: [expAgLoss, expBldLoss, expPopLoss, lowInc],
|
||||
isDisadvagtaged: properties[constants.IS_CLIMATE_FACTOR_DISADVANTAGED_L] ?
|
||||
properties[constants.IS_CLIMATE_FACTOR_DISADVANTAGED_L] : null,
|
||||
isDisadvagtaged: properties[constants.IS_CLIMATE_FACTOR_DISADVANTAGED_M] ?
|
||||
properties[constants.IS_CLIMATE_FACTOR_DISADVANTAGED_M] : null,
|
||||
},
|
||||
{
|
||||
id: 'clean-energy',
|
||||
titleText: intl.formatMessage(EXPLORE_COPY.SIDE_PANEL_CATEGORY.CLEAN_ENERGY),
|
||||
indicators: [energyBurden, pm25, lowInc],
|
||||
isDisadvagtaged: properties[constants.IS_ENERGY_FACTOR_DISADVANTAGED_L] ?
|
||||
properties[constants.IS_ENERGY_FACTOR_DISADVANTAGED_L] : null,
|
||||
isDisadvagtaged: properties[constants.IS_ENERGY_FACTOR_DISADVANTAGED_M] ?
|
||||
properties[constants.IS_ENERGY_FACTOR_DISADVANTAGED_M] : null,
|
||||
},
|
||||
{
|
||||
id: 'clean-transport',
|
||||
titleText: intl.formatMessage(EXPLORE_COPY.SIDE_PANEL_CATEGORY.CLEAN_TRANSPORT),
|
||||
indicators: [dieselPartMatter, trafficVolume, lowInc],
|
||||
isDisadvagtaged: properties[constants.IS_TRANSPORT_FACTOR_DISADVANTAGED_L] ?
|
||||
properties[constants.IS_TRANSPORT_FACTOR_DISADVANTAGED_L] : null,
|
||||
isDisadvagtaged: properties[constants.IS_TRANSPORT_FACTOR_DISADVANTAGED_M] ?
|
||||
properties[constants.IS_TRANSPORT_FACTOR_DISADVANTAGED_M] : null,
|
||||
},
|
||||
{
|
||||
id: 'sustain-house',
|
||||
titleText: intl.formatMessage(EXPLORE_COPY.SIDE_PANEL_CATEGORY.SUSTAIN_HOUSE),
|
||||
indicators: [houseBurden, leadPaint, lowInc],
|
||||
isDisadvagtaged: properties[constants.IS_HOUSING_FACTOR_DISADVANTAGED_L] ?
|
||||
properties[constants.IS_HOUSING_FACTOR_DISADVANTAGED_L] : null,
|
||||
isDisadvagtaged: properties[constants.IS_HOUSING_FACTOR_DISADVANTAGED_M] ?
|
||||
properties[constants.IS_HOUSING_FACTOR_DISADVANTAGED_M] : null,
|
||||
},
|
||||
{
|
||||
id: 'leg-pollute',
|
||||
titleText: intl.formatMessage(EXPLORE_COPY.SIDE_PANEL_CATEGORY.LEG_POLLUTE),
|
||||
indicators: [proxHaz, proxNPL, proxRMP, lowInc],
|
||||
isDisadvagtaged: properties[constants.IS_POLLUTION_FACTOR_DISADVANTAGED_L] ?
|
||||
properties[constants.IS_POLLUTION_FACTOR_DISADVANTAGED_L] : null,
|
||||
isDisadvagtaged: properties[constants.IS_POLLUTION_FACTOR_DISADVANTAGED_M] ?
|
||||
properties[constants.IS_POLLUTION_FACTOR_DISADVANTAGED_M] : null,
|
||||
},
|
||||
{
|
||||
id: 'clean-water',
|
||||
titleText: intl.formatMessage(EXPLORE_COPY.SIDE_PANEL_CATEGORY.CLEAN_WATER),
|
||||
indicators: [wasteWater, lowInc],
|
||||
isDisadvagtaged: properties[constants.IS_WATER_FACTOR_DISADVANTAGED_L] ?
|
||||
properties[constants.IS_WATER_FACTOR_DISADVANTAGED_L] : null,
|
||||
isDisadvagtaged: properties[constants.IS_WATER_FACTOR_DISADVANTAGED_M] ?
|
||||
properties[constants.IS_WATER_FACTOR_DISADVANTAGED_M] : null,
|
||||
},
|
||||
{
|
||||
id: 'health-burdens',
|
||||
titleText: intl.formatMessage(EXPLORE_COPY.SIDE_PANEL_CATEGORY.HEALTH_BURDEN),
|
||||
indicators: [asthma, diabetes, heartDisease, lifeExpect, lowInc],
|
||||
isDisadvagtaged: properties[constants.IS_HEALTH_FACTOR_DISADVANTAGED_L] ?
|
||||
properties[constants.IS_HEALTH_FACTOR_DISADVANTAGED_L] : null,
|
||||
isDisadvagtaged: properties[constants.IS_HEALTH_FACTOR_DISADVANTAGED_M] ?
|
||||
properties[constants.IS_HEALTH_FACTOR_DISADVANTAGED_M] : null,
|
||||
},
|
||||
{
|
||||
id: 'work-dev',
|
||||
titleText: intl.formatMessage(EXPLORE_COPY.SIDE_PANEL_CATEGORY.WORK_DEV),
|
||||
indicators: [lowMedInc, lingIso, unemploy, poverty, highSchool],
|
||||
isDisadvagtaged: properties[constants.IS_WORKFORCE_FACTOR_DISADVANTAGED_L] ?
|
||||
properties[constants.IS_WORKFORCE_FACTOR_DISADVANTAGED_L] : null,
|
||||
isDisadvagtaged: properties[constants.IS_WORKFORCE_FACTOR_DISADVANTAGED_M] ?
|
||||
properties[constants.IS_WORKFORCE_FACTOR_DISADVANTAGED_M] : null,
|
||||
},
|
||||
];
|
||||
|
||||
|
|
|
@ -35,8 +35,8 @@ export const PERFORMANCE_MARKER_MAP_IDLE = 'MAP_IDLE';
|
|||
export type J40Properties = { [key: string]: any };
|
||||
|
||||
// Properties
|
||||
export const SCORE_PROPERTY_HIGH = 'SL_PFS';
|
||||
export const SCORE_PROPERTY_LOW = 'L_SCORE';
|
||||
export const SCORE_PROPERTY_HIGH = 'SM_PFS';
|
||||
export const SCORE_PROPERTY_LOW = 'M_SCORE';
|
||||
export const GEOID_PROPERTY = 'GEOID10';
|
||||
|
||||
// Indicator values:
|
||||
|
@ -70,14 +70,14 @@ export const PROXIMITY_RMP_SITES_PERCENTILE = 'RMP_PFS';
|
|||
export const PROXIMITY_TSDF_SITES_PERCENTILE = 'TSDF_PFS';
|
||||
|
||||
// Category booleans (disadvantaged or not):
|
||||
export const IS_CLIMATE_FACTOR_DISADVANTAGED_L = 'L_CLT';
|
||||
export const IS_ENERGY_FACTOR_DISADVANTAGED_L = 'L_ENY';
|
||||
export const IS_TRANSPORT_FACTOR_DISADVANTAGED_L = 'L_TRN';
|
||||
export const IS_HOUSING_FACTOR_DISADVANTAGED_L = 'L_HSG';
|
||||
export const IS_POLLUTION_FACTOR_DISADVANTAGED_L = 'L_PLN';
|
||||
export const IS_WATER_FACTOR_DISADVANTAGED_L = 'L_WTR';
|
||||
export const IS_HEALTH_FACTOR_DISADVANTAGED_L = 'L_HLTH';
|
||||
export const IS_WORKFORCE_FACTOR_DISADVANTAGED_L = 'L_WKFC';
|
||||
export const IS_CLIMATE_FACTOR_DISADVANTAGED_M = 'M_CLT';
|
||||
export const IS_ENERGY_FACTOR_DISADVANTAGED_M = 'M_ENY';
|
||||
export const IS_TRANSPORT_FACTOR_DISADVANTAGED_M = 'M_TRN';
|
||||
export const IS_HOUSING_FACTOR_DISADVANTAGED_M = 'M_HSG';
|
||||
export const IS_POLLUTION_FACTOR_DISADVANTAGED_M = 'M_PLN';
|
||||
export const IS_WATER_FACTOR_DISADVANTAGED_M = 'M_WTR';
|
||||
export const IS_HEALTH_FACTOR_DISADVANTAGED_M = 'M_HLTH';
|
||||
export const IS_WORKFORCE_FACTOR_DISADVANTAGED_M = 'M_WKFC';
|
||||
|
||||
// Total indicators values:
|
||||
export const TOTAL_NUMBER_OF_DISADVANTAGE_INDICATORS = 'TC';
|
||||
|
|
|
@ -90,8 +90,8 @@ DATASET_LIST = [
|
|||
"class_name": "HudRecapETL",
|
||||
},
|
||||
{
|
||||
"name": "epa_rsei_aggregate",
|
||||
"module_dir": "epa_rsei_aggregate",
|
||||
"name": "epa_rsei",
|
||||
"module_dir": "epa_rsei",
|
||||
"class_name": "EPARiskScreeningEnvironmentalIndicatorsETL",
|
||||
},
|
||||
{
|
||||
|
|
|
@ -120,16 +120,16 @@ TILES_SCORE_COLUMNS = {
|
|||
+ field_names.PERCENTILE_FIELD_SUFFIX: "UF_PFS",
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
|
||||
field_names.L_WATER: "L_WTR",
|
||||
field_names.L_WORKFORCE: "L_WKFC",
|
||||
field_names.L_CLIMATE: "L_CLT",
|
||||
field_names.L_ENERGY: "L_ENY",
|
||||
field_names.L_TRANSPORTATION: "L_TRN",
|
||||
field_names.L_HOUSING: "L_HSG",
|
||||
field_names.L_POLLUTION: "L_PLN",
|
||||
field_names.L_HEALTH: "L_HLTH",
|
||||
field_names.SCORE_L_COMMUNITIES: "SL_C",
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX: "SL_PFS",
|
||||
field_names.M_WATER: "M_WTR",
|
||||
field_names.M_WORKFORCE: "M_WKFC",
|
||||
field_names.M_CLIMATE: "M_CLT",
|
||||
field_names.M_ENERGY: "M_ENY",
|
||||
field_names.M_TRANSPORTATION: "M_TRN",
|
||||
field_names.M_HOUSING: "M_HSG",
|
||||
field_names.M_POLLUTION: "M_PLN",
|
||||
field_names.M_HEALTH: "M_HLTH",
|
||||
field_names.SCORE_M_COMMUNITIES: "SM_C",
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
|
||||
|
@ -151,8 +151,8 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD: "PLHSE",
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "LMILHSE",
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "ULHSE",
|
||||
field_names.LOW_HS_EDUCATION_FIELD: "LHE",
|
||||
field_names.FPL_200_SERIES: "FPL200S",
|
||||
field_names.LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD: "LHE",
|
||||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES: "FPL200S",
|
||||
field_names.THRESHOLD_COUNT: "TC",
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "ISPLHSE",
|
||||
|
@ -191,10 +191,10 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
|
||||
# Finally we augment with the GEOID10, county, and state
|
||||
|
@ -203,9 +203,9 @@ DOWNLOADABLE_SCORE_COLUMNS = [
|
|||
field_names.COUNTY_FIELD,
|
||||
field_names.STATE_FIELD,
|
||||
field_names.THRESHOLD_COUNT,
|
||||
field_names.SCORE_L_COMMUNITIES,
|
||||
field_names.SCORE_M_COMMUNITIES,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
field_names.FPL_200_SERIES,
|
||||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
|
|
@ -33,12 +33,12 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
||||
)
|
||||
|
||||
# Import the shortened name for Score L percentile ("SL_PFS") that's used on the
|
||||
# Import the shortened name for Score M percentile ("SM_PFS") that's used on the
|
||||
# tiles.
|
||||
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
self.TARGET_SCORE_RENAME_TO = "L_SCORE"
|
||||
self.TARGET_SCORE_RENAME_TO = "M_SCORE"
|
||||
|
||||
# Import the shortened name for tract ("GTF") that's used on the tiles.
|
||||
self.TRACT_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||
|
|
|
@ -323,7 +323,7 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
# Rename score column
|
||||
downloadable_df_copy = downloadable_df.rename(
|
||||
columns={
|
||||
field_names.SCORE_L_COMMUNITIES: "Identified as disadvantaged (v0.1)"
|
||||
field_names.SCORE_M_COMMUNITIES: "Identified as disadvantaged (v0.1)"
|
||||
},
|
||||
inplace=False,
|
||||
)
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -42,7 +42,7 @@ class CDCSVIIndex(ExtractTransformLoad):
|
|||
self.df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Extracting 43 MB CDC SVI INDEX")
|
||||
logger.info("Downloading 43 MB CDC SVI INDEX")
|
||||
self.df = pd.read_csv(
|
||||
filepath_or_buffer=self.CDC_SVI_INDEX_URL,
|
||||
dtype={self.CDC_SVI_INDEX_TRACTS_FIPS_CODE: "string"},
|
||||
|
|
|
@ -22,9 +22,7 @@ class EPARiskScreeningEnvironmentalIndicatorsETL(ExtractTransformLoad):
|
|||
def __init__(self):
|
||||
self.AGGREGATED_RSEI_SCORE_FILE_URL = "http://abt-rsei.s3.amazonaws.com/microdata2019/census_agg/CensusMicroTracts2019_2019_aggregated.zip"
|
||||
|
||||
self.OUTPUT_PATH: Path = (
|
||||
self.DATA_PATH / "dataset" / "epa_rsei_aggregated"
|
||||
)
|
||||
self.OUTPUT_PATH: Path = self.DATA_PATH / "dataset" / "epa_rsei"
|
||||
self.EPA_RSEI_SCORE_THRESHOLD_CUTOFF = 0.75
|
||||
self.TRACT_INPUT_COLUMN_NAME = "GEOID10"
|
||||
self.NUMBER_FACILITIES_INPUT_FIELD = "NUMFACS"
|
||||
|
@ -74,12 +72,12 @@ class EPARiskScreeningEnvironmentalIndicatorsETL(ExtractTransformLoad):
|
|||
unzip_file_from_url(
|
||||
file_url=self.AGGREGATED_RSEI_SCORE_FILE_URL,
|
||||
download_path=self.TMP_PATH,
|
||||
unzipped_file_path=self.TMP_PATH / "epa_rsei_aggregated",
|
||||
unzipped_file_path=self.TMP_PATH / "epa_rsei",
|
||||
)
|
||||
|
||||
self.df = pd.read_csv(
|
||||
filepath_or_buffer=self.TMP_PATH
|
||||
/ "epa_rsei_aggregated"
|
||||
/ "epa_rsei"
|
||||
/ "CensusMicroTracts2019_2019_aggregated.csv",
|
||||
# The following need to remain as strings for all of their digits, not get
|
||||
# converted to numbers.
|
|
@ -33,7 +33,7 @@ class MarylandEJScreenETL(ExtractTransformLoad):
|
|||
self.df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Downloading Maryland EJSCREEN Data")
|
||||
logger.info("Downloading 207MB Maryland EJSCREEN Data")
|
||||
super().extract(
|
||||
self.MARYLAND_EJSCREEN_URL,
|
||||
self.TMP_PATH,
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
"import requests\n",
|
||||
"import string\n",
|
||||
"import sys\n",
|
||||
"import time\n",
|
||||
"import typing\n",
|
||||
"import us\n",
|
||||
"import zipfile\n",
|
||||
|
@ -61,7 +62,10 @@
|
|||
"# Set some global parameters\n",
|
||||
"DATA_DIR = pathlib.Path.cwd().parent / \"data\"\n",
|
||||
"TEMP_DATA_DIR = DATA_DIR / \"tmp\"\n",
|
||||
"COMPARISON_OUTPUTS_DIR = DATA_DIR / \"comparison_outputs\"\n",
|
||||
"\n",
|
||||
"time_str = time.strftime(\"%Y%m%d-%H%M%S\")\n",
|
||||
"\n",
|
||||
"COMPARISON_OUTPUTS_DIR = DATA_DIR / \"comparison_outputs\" / time_str\n",
|
||||
"\n",
|
||||
"# Make the dirs if they don't exist\n",
|
||||
"TEMP_DATA_DIR.mkdir(parents=True, exist_ok=True)\n",
|
||||
|
@ -109,7 +113,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a251a0fb",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load EJSCREEN Areas of Concern data.\n",
|
||||
|
@ -143,7 +149,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e43a9e23",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Merge EJSCREEN AoCs into CEJST data.\n",
|
||||
|
@ -173,10 +181,13 @@
|
|||
"source": [
|
||||
"# Analyze one field at a time (useful for setting thresholds)\n",
|
||||
"\n",
|
||||
"quantile = 0.9\n",
|
||||
"quantile = 0.95\n",
|
||||
"\n",
|
||||
"for field in [\n",
|
||||
" field_names.MEDIAN_HOUSE_VALUE_FIELD,\n",
|
||||
" field_names.COLLEGE_ATTENDANCE_FIELD,\n",
|
||||
" field_names.HIGH_SCHOOL_ED_FIELD,\n",
|
||||
" field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,\n",
|
||||
" field_names.POVERTY_LESS_THAN_200_FPL_FIELD,\n",
|
||||
"]:\n",
|
||||
" print(f\"\\n~~~~Analysis for field `{field}`~~~~\")\n",
|
||||
" print(cejst_df[field].describe())\n",
|
||||
|
@ -223,7 +234,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d8ec43dc",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load persistent poverty data\n",
|
||||
|
@ -256,7 +269,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "81826d29",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load mapping inequality data\n",
|
||||
|
@ -314,7 +329,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "605af1ff",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load alternative energy-related definition\n",
|
||||
|
@ -333,7 +350,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fe4a2939",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load Michigan EJSCREEN\n",
|
||||
|
@ -356,15 +375,13 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# Load EPA RSEI EJSCREEN\n",
|
||||
"epa_rsei_aggregate_data_path = (\n",
|
||||
" DATA_DIR / \"dataset\" / \"epa_rsei_aggregated\" / \"usa.csv\"\n",
|
||||
")\n",
|
||||
"epa_rsei_aggregate_df = pd.read_csv(\n",
|
||||
" epa_rsei_aggregate_data_path,\n",
|
||||
"epa_rsei_data_path = DATA_DIR / \"dataset\" / \"epa_rsei\" / \"usa.csv\"\n",
|
||||
"epa_rsei_df = pd.read_csv(\n",
|
||||
" epa_rsei_data_path,\n",
|
||||
" dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"epa_rsei_aggregate_df.head()"
|
||||
"epa_rsei_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -382,7 +399,7 @@
|
|||
" calenviroscreen_df,\n",
|
||||
" persistent_poverty_df,\n",
|
||||
" mapping_inequality_df,\n",
|
||||
" epa_rsei_aggregate_df,\n",
|
||||
" epa_rsei_df,\n",
|
||||
" maryland_ejscreen_df,\n",
|
||||
" energy_definition_alternative_draft_df,\n",
|
||||
" michigan_ejscreen_df,\n",
|
||||
|
@ -416,7 +433,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2de78f71",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Special handling for HOLC.\n",
|
||||
|
@ -461,13 +480,41 @@
|
|||
" field_names.L_NON_WORKFORCE,\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"definition_m_factors = [\n",
|
||||
" field_names.M_CLIMATE,\n",
|
||||
" field_names.M_ENERGY,\n",
|
||||
" field_names.M_TRANSPORTATION,\n",
|
||||
" field_names.M_HOUSING,\n",
|
||||
" field_names.M_POLLUTION,\n",
|
||||
" field_names.M_WATER,\n",
|
||||
" field_names.M_HEALTH,\n",
|
||||
" field_names.M_WORKFORCE,\n",
|
||||
" # Also include a combined factor for all the non-workforce elements.\n",
|
||||
" field_names.M_NON_WORKFORCE,\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"census_tract_indices = (\n",
|
||||
" [\n",
|
||||
" Index(\n",
|
||||
" method_name=\"Definition M\",\n",
|
||||
" priority_communities_field=field_names.SCORE_M_COMMUNITIES,\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
" + [\n",
|
||||
" Index(\n",
|
||||
" method_name=\"Definition L\",\n",
|
||||
" priority_communities_field=field_names.SCORE_L_COMMUNITIES,\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
" # Insert indices for each of the factors from Definition M.\n",
|
||||
" # Note: since these involve no renaming, we write them using list comprehension.\n",
|
||||
" + [\n",
|
||||
" Index(\n",
|
||||
" method_name=factor,\n",
|
||||
" priority_communities_field=factor,\n",
|
||||
" )\n",
|
||||
" for factor in definition_m_factors\n",
|
||||
" ]\n",
|
||||
" # Insert indices for each of the factors from Definition L.\n",
|
||||
" # Note: since these involve no renaming, we write them using list comprehension.\n",
|
||||
" + [\n",
|
||||
|
@ -575,6 +622,7 @@
|
|||
"comparison_fields = [\n",
|
||||
" field_names.POVERTY_LESS_THAN_100_FPL_FIELD,\n",
|
||||
" field_names.POVERTY_LESS_THAN_200_FPL_FIELD,\n",
|
||||
" field_names.COLLEGE_ATTENDANCE_FIELD,\n",
|
||||
" field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,\n",
|
||||
" field_names.LINGUISTIC_ISO_FIELD,\n",
|
||||
" field_names.UNEMPLOYMENT_FIELD,\n",
|
||||
|
@ -584,6 +632,8 @@
|
|||
" field_names.LIFE_EXPECTANCY_FIELD,\n",
|
||||
" field_names.HEALTH_INSURANCE_FIELD,\n",
|
||||
" field_names.PHYS_HEALTH_NOT_GOOD_FIELD,\n",
|
||||
" field_names.DIABETES_FIELD,\n",
|
||||
" field_names.LOW_READING_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
|
@ -874,7 +924,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2bcbcabf",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"directory = COMPARISON_OUTPUTS_DIR / \"tracts_basic_stats\"\n",
|
||||
|
@ -1001,24 +1053,28 @@
|
|||
" E.g., it might show that tracts prioritized by A but not B have a higher average income,\n",
|
||||
" or that tracts prioritized by B but not A have a lower percent of unemployed people.\n",
|
||||
" \"\"\"\n",
|
||||
" df_subset = df[\n",
|
||||
" [\n",
|
||||
" method_a_priority_census_tracts_field,\n",
|
||||
" method_b_priority_census_tracts_field,\n",
|
||||
" ]\n",
|
||||
" + comparison_fields\n",
|
||||
" fields_to_group_by = [\n",
|
||||
" method_a_priority_census_tracts_field,\n",
|
||||
" method_b_priority_census_tracts_field,\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" df_subset = df[fields_to_group_by + comparison_fields]\n",
|
||||
"\n",
|
||||
" grouped_df = df_subset.groupby(\n",
|
||||
" [\n",
|
||||
" method_a_priority_census_tracts_field,\n",
|
||||
" method_b_priority_census_tracts_field,\n",
|
||||
" ],\n",
|
||||
" fields_to_group_by,\n",
|
||||
" dropna=False,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Run the comparison function on the groups.\n",
|
||||
" comparison_df = grouped_df.mean().reset_index()\n",
|
||||
" # Take the mean of all fields.\n",
|
||||
" comparison_df = grouped_df.mean()\n",
|
||||
"\n",
|
||||
" # Also add in the count of census tracts.\n",
|
||||
" count_field_name = \"Count of census tracts\"\n",
|
||||
" comparison_df[count_field_name] = grouped_df.size().to_frame(\n",
|
||||
" count_field_name\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" comparison_df = comparison_df.reset_index()\n",
|
||||
"\n",
|
||||
" criteria_description_field_name = \"Description of criteria\"\n",
|
||||
" comparison_df[criteria_description_field_name] = comparison_df.apply(\n",
|
||||
|
@ -1030,10 +1086,13 @@
|
|||
" )\n",
|
||||
"\n",
|
||||
" # Put criteria description column first.\n",
|
||||
" new_column_order = [criteria_description_field_name] + [\n",
|
||||
" col\n",
|
||||
" for col in comparison_df.columns\n",
|
||||
" if col != criteria_description_field_name\n",
|
||||
" columns_to_put_first = (\n",
|
||||
" [criteria_description_field_name]\n",
|
||||
" + fields_to_group_by\n",
|
||||
" + [count_field_name]\n",
|
||||
" )\n",
|
||||
" new_column_order = columns_to_put_first + [\n",
|
||||
" col for col in comparison_df.columns if col not in columns_to_put_first\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" comparison_df = comparison_df[new_column_order]\n",
|
||||
|
@ -1356,7 +1415,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7d095ebd",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note: this is helpful because this file is long-running, so it alerts the user when the\n",
|
||||
|
@ -1369,7 +1430,7 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
@ -1383,7 +1444,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -28,6 +28,8 @@ SCORE_I = "Score I"
|
|||
SCORE_I_COMMUNITIES = "Score I (communities)"
|
||||
SCORE_K = "NMTC (communities)"
|
||||
SCORE_K_COMMUNITIES = "Score K (communities)"
|
||||
|
||||
# Definition L fields
|
||||
SCORE_L = "Definition L"
|
||||
SCORE_L_COMMUNITIES = "Definition L (communities)"
|
||||
L_CLIMATE = "Climate Factor (Definition L)"
|
||||
|
@ -39,6 +41,20 @@ L_WATER = "Water Factor (Definition L)"
|
|||
L_HEALTH = "Health Factor (Definition L)"
|
||||
L_WORKFORCE = "Workforce Factor (Definition L)"
|
||||
L_NON_WORKFORCE = "Any Non-Workforce Factor (Definition L)"
|
||||
|
||||
# Definition M fields
|
||||
SCORE_M = "Definition M"
|
||||
SCORE_M_COMMUNITIES = "Definition M (communities)"
|
||||
M_CLIMATE = "Climate Factor (Definition M)"
|
||||
M_ENERGY = "Energy Factor (Definition M)"
|
||||
M_TRANSPORTATION = "Transportation Factor (Definition M)"
|
||||
M_HOUSING = "Housing Factor (Definition M)"
|
||||
M_POLLUTION = "Pollution Factor (Definition M)"
|
||||
M_WATER = "Water Factor (Definition M)"
|
||||
M_HEALTH = "Health Factor (Definition M)"
|
||||
M_WORKFORCE = "Workforce Factor (Definition M)"
|
||||
M_NON_WORKFORCE = "Any Non-Workforce Factor (Definition M)"
|
||||
|
||||
PERCENTILE = 90
|
||||
MEDIAN_HOUSE_VALUE_PERCENTILE = 90
|
||||
|
||||
|
@ -297,6 +313,8 @@ TRANSPORTATION_COSTS = "Transportation Costs"
|
|||
|
||||
#####
|
||||
# Names for individual factors being exceeded
|
||||
# TODO: for Definition M, create new output field names (different than those used by
|
||||
# Definition L) and change all output fields to say low income and low college
|
||||
# Climate Change
|
||||
EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile"
|
||||
|
@ -352,6 +370,8 @@ LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD = (
|
|||
)
|
||||
|
||||
# Workforce
|
||||
# TODO: for Definition M, create new output field names (different than those used by
|
||||
# Definition L) and change all output fields to say low HS and low college
|
||||
UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile for unemployment"
|
||||
" and has low HS education"
|
||||
|
@ -373,6 +393,9 @@ LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
|||
)
|
||||
|
||||
LOW_HS_EDUCATION_FIELD = "Low high school education"
|
||||
LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD = (
|
||||
"Low high school education and low college attendance"
|
||||
)
|
||||
|
||||
# Workforce for island areas
|
||||
ISLAND_AREAS_SUFFIX = " in 2009 (island areas)"
|
||||
|
@ -420,5 +443,8 @@ LOW_READING_LOW_HS_EDUCATION_FIELD = (
|
|||
THRESHOLD_COUNT = "Total threshold criteria exceeded"
|
||||
|
||||
FPL_200_SERIES = "Is low income?"
|
||||
FPL_200_AND_COLLEGE_ATTENDANCE_SERIES = (
|
||||
"Is low income and low college attendance?"
|
||||
)
|
||||
# End of names for individual factors being exceeded
|
||||
####
|
||||
|
|
|
@ -120,54 +120,6 @@ class ScoreL(Score):
|
|||
axis=1, skipna=True
|
||||
)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.info("Adding Score L")
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
self.df[field_names.FPL_200_SERIES] = self._create_low_income_threshold(
|
||||
self.df
|
||||
)
|
||||
self.df[field_names.L_CLIMATE] = self._climate_factor()
|
||||
self.df[field_names.L_ENERGY] = self._energy_factor()
|
||||
self.df[field_names.L_TRANSPORTATION] = self._transportation_factor()
|
||||
self.df[field_names.L_HOUSING] = self._housing_factor()
|
||||
self.df[field_names.L_POLLUTION] = self._pollution_factor()
|
||||
self.df[field_names.L_WATER] = self._water_factor()
|
||||
self.df[field_names.L_HEALTH] = self._health_factor()
|
||||
self.df[field_names.L_WORKFORCE] = self._workforce_factor()
|
||||
|
||||
factors = [
|
||||
field_names.L_CLIMATE,
|
||||
field_names.L_ENERGY,
|
||||
field_names.L_TRANSPORTATION,
|
||||
field_names.L_HOUSING,
|
||||
field_names.L_POLLUTION,
|
||||
field_names.L_WATER,
|
||||
field_names.L_HEALTH,
|
||||
field_names.L_WORKFORCE,
|
||||
]
|
||||
self.df[field_names.SCORE_L_COMMUNITIES] = self.df[factors].any(axis=1)
|
||||
|
||||
# Note: this is purely used for comparison tool analysis, and can be removed at a later date. - LMB.
|
||||
non_workforce_factors = [
|
||||
field_names.L_CLIMATE,
|
||||
field_names.L_ENERGY,
|
||||
field_names.L_TRANSPORTATION,
|
||||
field_names.L_HOUSING,
|
||||
field_names.L_POLLUTION,
|
||||
field_names.L_WATER,
|
||||
field_names.L_HEALTH,
|
||||
]
|
||||
self.df[field_names.L_NON_WORKFORCE] = self.df[
|
||||
non_workforce_factors
|
||||
].any(axis=1)
|
||||
|
||||
self.df[
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
] = self.df[field_names.SCORE_L_COMMUNITIES].astype(int)
|
||||
|
||||
return self.df
|
||||
|
||||
def _climate_factor(self) -> bool:
|
||||
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
||||
# AND
|
||||
|
@ -689,3 +641,51 @@ class ScoreL(Score):
|
|||
workforce_combined_criteria_for_states
|
||||
| workforce_combined_criteria_for_island_areas
|
||||
)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.info("Adding Score L")
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
self.df[field_names.FPL_200_SERIES] = self._create_low_income_threshold(
|
||||
self.df
|
||||
)
|
||||
self.df[field_names.L_CLIMATE] = self._climate_factor()
|
||||
self.df[field_names.L_ENERGY] = self._energy_factor()
|
||||
self.df[field_names.L_TRANSPORTATION] = self._transportation_factor()
|
||||
self.df[field_names.L_HOUSING] = self._housing_factor()
|
||||
self.df[field_names.L_POLLUTION] = self._pollution_factor()
|
||||
self.df[field_names.L_WATER] = self._water_factor()
|
||||
self.df[field_names.L_HEALTH] = self._health_factor()
|
||||
self.df[field_names.L_WORKFORCE] = self._workforce_factor()
|
||||
|
||||
factors = [
|
||||
field_names.L_CLIMATE,
|
||||
field_names.L_ENERGY,
|
||||
field_names.L_TRANSPORTATION,
|
||||
field_names.L_HOUSING,
|
||||
field_names.L_POLLUTION,
|
||||
field_names.L_WATER,
|
||||
field_names.L_HEALTH,
|
||||
field_names.L_WORKFORCE,
|
||||
]
|
||||
self.df[field_names.SCORE_L_COMMUNITIES] = self.df[factors].any(axis=1)
|
||||
|
||||
# Note: this is purely used for comparison tool analysis, and can be removed at a later date. - LMB.
|
||||
non_workforce_factors = [
|
||||
field_names.L_CLIMATE,
|
||||
field_names.L_ENERGY,
|
||||
field_names.L_TRANSPORTATION,
|
||||
field_names.L_HOUSING,
|
||||
field_names.L_POLLUTION,
|
||||
field_names.L_WATER,
|
||||
field_names.L_HEALTH,
|
||||
]
|
||||
self.df[field_names.L_NON_WORKFORCE] = self.df[
|
||||
non_workforce_factors
|
||||
].any(axis=1)
|
||||
|
||||
self.df[
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
] = self.df[field_names.SCORE_L_COMMUNITIES].astype(int)
|
||||
|
||||
return self.df
|
||||
|
|
770
data/data-pipeline/data_pipeline/score/score_m.py
Normal file
770
data/data-pipeline/data_pipeline/score/score_m.py
Normal file
|
@ -0,0 +1,770 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from data_pipeline.score.score import Score
|
||||
import data_pipeline.score.field_names as field_names
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreM(Score):
|
||||
"""Very similar to Score L, with a few minor modifications."""
|
||||
|
||||
def __init__(self, df: pd.DataFrame) -> None:
|
||||
self.LOW_INCOME_THRESHOLD: float = 0.65
|
||||
self.MAX_COLLEGE_ATTENDANCE_THRESHOLD: float = 0.20
|
||||
self.ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
|
||||
self.MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
|
||||
self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD: float = 0.10
|
||||
|
||||
super().__init__(df)
|
||||
|
||||
def _combine_island_areas_with_states_and_set_thresholds(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
column_from_island_areas: str,
|
||||
column_from_decennial_census: str,
|
||||
combined_column_name: str,
|
||||
threshold_cutoff_for_island_areas: float,
|
||||
) -> (pd.DataFrame, str):
|
||||
"""Steps to set thresholds for island areas.
|
||||
|
||||
This function is fairly logically complicated. It takes the following steps:
|
||||
|
||||
1. Combine the two different fields into a single field.
|
||||
2. Calculate the 90th percentile cutoff raw value for the combined field.
|
||||
3. Create a boolean series that is true for any census tract in the island
|
||||
areas (and only the island areas) that exceeds this cutoff.
|
||||
|
||||
For step one, it combines data that is either the island area's Decennial Census
|
||||
value in 2009 or the state's value in 5-year ACS ending in 2010.
|
||||
|
||||
This will be used to generate the percentile cutoff for the 90th percentile.
|
||||
|
||||
The stateside decennial census stopped asking economic comparisons,
|
||||
so this is as close to apples-to-apples as we get. We use 5-year ACS for data
|
||||
robustness over 1-year ACS.
|
||||
"""
|
||||
# Create the combined field.
|
||||
# TODO: move this combined field percentile calculation to `etl_score`,
|
||||
# since most other percentile logic is there.
|
||||
# There should only be one entry in either 2009 or 2019 fields, not one in both.
|
||||
# But just to be safe, we take the mean and ignore null values so if there
|
||||
# *were* entries in both, this result would make sense.
|
||||
df[combined_column_name] = df[
|
||||
[column_from_island_areas, column_from_decennial_census]
|
||||
].mean(axis=1, skipna=True)
|
||||
|
||||
logger.info(
|
||||
f"Combined field `{combined_column_name}` has "
|
||||
f"{df[combined_column_name].isnull().sum()} "
|
||||
f"({df[combined_column_name].isnull().sum() * 100 / len(df):.2f}%) "
|
||||
f"missing values for census tracts. "
|
||||
)
|
||||
|
||||
# Calculate the percentile threshold raw value.
|
||||
raw_threshold = np.nanquantile(
|
||||
a=df[combined_column_name], q=threshold_cutoff_for_island_areas
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"For combined field `{combined_column_name}`, "
|
||||
f"the {threshold_cutoff_for_island_areas*100:.0f} percentile cutoff is a "
|
||||
f"raw value of {raw_threshold:.3f}."
|
||||
)
|
||||
|
||||
threshold_column_name = (
|
||||
f"{column_from_island_areas} exceeds "
|
||||
f"{threshold_cutoff_for_island_areas*100:.0f}th percentile"
|
||||
)
|
||||
|
||||
df[threshold_column_name] = (
|
||||
df[column_from_island_areas] >= raw_threshold
|
||||
)
|
||||
|
||||
percent_of_tracts_highlighted = (
|
||||
100
|
||||
* df[threshold_column_name].sum()
|
||||
/ df[column_from_island_areas].notnull().sum()
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"For `{threshold_column_name}`, "
|
||||
f"{df[threshold_column_name].sum()} ("
|
||||
f"{percent_of_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
)
|
||||
|
||||
return df, threshold_column_name
|
||||
|
||||
def _create_low_income_and_low_college_attendance_threshold(
|
||||
self, df: pd.DataFrame
|
||||
) -> pd.Series:
|
||||
"""
|
||||
Returns a pandas series (really a numpy array)
|
||||
of booleans based on the condition of the FPL at 200%
|
||||
is at or more than some established threshold
|
||||
"""
|
||||
return (
|
||||
(
|
||||
df[
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.LOW_INCOME_THRESHOLD
|
||||
)
|
||||
) & (
|
||||
(
|
||||
df[field_names.COLLEGE_ATTENDANCE_FIELD]
|
||||
<= self.MAX_COLLEGE_ATTENDANCE_THRESHOLD
|
||||
)
|
||||
| (
|
||||
# If college attendance data is null for this tract, just rely on the
|
||||
# poverty data
|
||||
df[field_names.COLLEGE_ATTENDANCE_FIELD].isna()
|
||||
)
|
||||
)
|
||||
|
||||
def _increment_total_eligibility_exceeded(
|
||||
self, columns_for_subset: list
|
||||
) -> None:
|
||||
"""
|
||||
Increments the total eligible factors for a given tract
|
||||
"""
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] += self.df[columns_for_subset].sum(
|
||||
axis=1, skipna=True
|
||||
)
|
||||
|
||||
def _climate_factor(self) -> bool:
|
||||
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
climate_eligibility_columns = [
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
# field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
expected_population_loss_threshold = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
expected_agriculture_loss_threshold = (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
expected_building_loss_threshold = (
|
||||
self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
extreme_heat_and_median_house_value_threshold = (
|
||||
self.df[
|
||||
field_names.EXTREME_HEAT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
) & (
|
||||
self.df[
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_population_loss_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_agriculture_loss_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_building_loss_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD
|
||||
] = (
|
||||
extreme_heat_and_median_house_value_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(climate_eligibility_columns)
|
||||
|
||||
return self.df[climate_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _energy_factor(self) -> bool:
|
||||
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
energy_eligibility_columns = [
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
energy_burden_threshold = (
|
||||
self.df[
|
||||
field_names.ENERGY_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
pm25_threshold = (
|
||||
self.df[
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.PM25_EXPOSURE_LOW_INCOME_FIELD] = (
|
||||
pm25_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.ENERGY_BURDEN_LOW_INCOME_FIELD] = (
|
||||
energy_burden_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(energy_eligibility_columns)
|
||||
|
||||
return self.df[energy_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _transportation_factor(self) -> bool:
|
||||
# In Xth percentile or above for diesel particulate matter (Source: EPA National Air Toxics Assessment (NATA)
|
||||
# or
|
||||
# In Xth percentile or above for PM 2.5 (Source: EPA, Office of Air and Radiation (OAR) fusion of model and monitor data)]
|
||||
# or
|
||||
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
transportion_eligibility_columns = [
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
diesel_threshold = (
|
||||
self.df[
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
traffic_threshold = (
|
||||
self.df[
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD] = (
|
||||
diesel_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD] = (
|
||||
traffic_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
transportion_eligibility_columns
|
||||
)
|
||||
|
||||
return self.df[transportion_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _housing_factor(self) -> bool:
|
||||
# (
|
||||
# In Xth percentile or above for lead paint (Source: Census's American Community Survey’s
|
||||
# percent of housing units built pre-1960, used as an indicator of potential lead paint exposure in homes)
|
||||
# AND
|
||||
# In Yth percentile or below for Median House Value (Source: Census's American Community Survey)
|
||||
# )
|
||||
# or
|
||||
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
housing_eligibility_columns = [
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
lead_paint_median_home_value_threshold = (
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
) & (
|
||||
self.df[
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
|
||||
housing_burden_threshold = (
|
||||
self.df[
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
# series by series indicators
|
||||
self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = (
|
||||
lead_paint_median_home_value_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_BURDEN_LOW_INCOME_FIELD] = (
|
||||
housing_burden_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(housing_eligibility_columns)
|
||||
|
||||
return self.df[housing_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _pollution_factor(self) -> bool:
|
||||
# Proximity to Risk Management Plan sites is > X
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
pollution_eligibility_columns = [
|
||||
field_names.RMP_LOW_INCOME_FIELD,
|
||||
field_names.SUPERFUND_LOW_INCOME_FIELD,
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
rmp_sites_threshold = (
|
||||
self.df[field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
npl_sites_threshold = (
|
||||
self.df[field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
tsdf_sites_threshold = (
|
||||
self.df[
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
# individual series-by-series
|
||||
self.df[field_names.RMP_LOW_INCOME_FIELD] = (
|
||||
rmp_sites_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.SUPERFUND_LOW_INCOME_FIELD] = (
|
||||
npl_sites_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD] = (
|
||||
tsdf_sites_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
pollution_eligibility_columns
|
||||
)
|
||||
|
||||
return self.df[pollution_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _water_factor(self) -> bool:
|
||||
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
wastewater_threshold = (
|
||||
self.df[
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] = (
|
||||
wastewater_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
|
||||
)
|
||||
|
||||
return self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
|
||||
|
||||
def _health_factor(self) -> bool:
|
||||
# In Xth percentile or above for diabetes (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for asthma (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for heart disease
|
||||
# or
|
||||
# In Xth percentile or above for low life expectancy (Source: CDC Places)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
health_eligibility_columns = [
|
||||
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||
# field_names.HEALTHY_FOOD_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
diabetes_threshold = (
|
||||
self.df[
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
asthma_threshold = (
|
||||
self.df[
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
heart_disease_threshold = (
|
||||
self.df[
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
low_life_expectancy_threshold = (
|
||||
self.df[
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
healthy_food_threshold = (
|
||||
self.df[
|
||||
field_names.HEALTHY_FOOD_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.DIABETES_LOW_INCOME_FIELD] = (
|
||||
diabetes_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.ASTHMA_LOW_INCOME_FIELD] = (
|
||||
asthma_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = (
|
||||
heart_disease_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
||||
low_life_expectancy_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.HEALTHY_FOOD_LOW_INCOME_FIELD] = (
|
||||
healthy_food_threshold
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(health_eligibility_columns)
|
||||
|
||||
return self.df[health_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _workforce_factor(self) -> bool:
|
||||
# Where unemployment is above Xth percentile
|
||||
# or
|
||||
# Where median income as a percent of area median income is above Xth percentile
|
||||
# or
|
||||
# Where the percent of households at or below 100% of the federal poverty level
|
||||
# is above Xth percentile
|
||||
# or
|
||||
# Where linguistic isolation is above Xth percentile
|
||||
# AND
|
||||
# Where the high school degree achievement rates for adults 25 years and older
|
||||
# is less than Y%
|
||||
# (necessary to screen out university tracts)
|
||||
|
||||
# Workforce criteria for states fields.
|
||||
workforce_eligibility_columns = [
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD] = (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
) & (
|
||||
(
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_FIELD]
|
||||
<= self.MAX_COLLEGE_ATTENDANCE_THRESHOLD
|
||||
)
|
||||
| (
|
||||
# If college attendance data is null for this tract, just rely on the
|
||||
# poverty/AMI data
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_FIELD].isna()
|
||||
)
|
||||
)
|
||||
|
||||
unemployment_threshold = (
|
||||
self.df[
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
low_median_income_threshold = (
|
||||
self.df[
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
linguistic_isolation_threshold = (
|
||||
self.df[
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
poverty_threshold = (
|
||||
self.df[
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = (
|
||||
linguistic_isolation_threshold
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
poverty_threshold
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
|
||||
low_median_income_threshold
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = (
|
||||
unemployment_threshold
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_states = self.df[
|
||||
workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
workforce_eligibility_columns
|
||||
)
|
||||
|
||||
# Now, calculate workforce criteria for island territories.
|
||||
island_areas_workforce_eligibility_columns = [
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
# First, combine unemployment.
|
||||
(
|
||||
self.df,
|
||||
island_areas_unemployment_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# Next, combine poverty.
|
||||
(
|
||||
self.df,
|
||||
island_areas_poverty_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# Also check whether low area median income is 90th percentile or higher
|
||||
# within the islands.
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name = (
|
||||
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
||||
f"{field_names.PERCENTILE}th percentile"
|
||||
)
|
||||
self.df[
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name
|
||||
] = (
|
||||
self.df[
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[island_areas_unemployment_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[island_areas_poverty_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name
|
||||
]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_island_areas = self.df[
|
||||
island_areas_workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
island_areas_workforce_eligibility_columns
|
||||
)
|
||||
|
||||
percent_of_island_tracts_highlighted = (
|
||||
100
|
||||
* workforce_combined_criteria_for_island_areas.sum()
|
||||
# Choosing a random column from island areas to calculate the denominator.
|
||||
/ self.df[field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009]
|
||||
.notnull()
|
||||
.sum()
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"For workforce criteria in island areas, "
|
||||
f"{workforce_combined_criteria_for_island_areas.sum()} ("
|
||||
f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
)
|
||||
|
||||
# A tract is included if it meets either the states tract criteria or the
|
||||
# island areas tract criteria.
|
||||
return (
|
||||
workforce_combined_criteria_for_states
|
||||
| workforce_combined_criteria_for_island_areas
|
||||
)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.info("Adding Score M")
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
self.df[
|
||||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES
|
||||
] = self._create_low_income_and_low_college_attendance_threshold(
|
||||
self.df
|
||||
)
|
||||
self.df[field_names.M_CLIMATE] = self._climate_factor()
|
||||
self.df[field_names.M_ENERGY] = self._energy_factor()
|
||||
self.df[field_names.M_TRANSPORTATION] = self._transportation_factor()
|
||||
self.df[field_names.M_HOUSING] = self._housing_factor()
|
||||
self.df[field_names.M_POLLUTION] = self._pollution_factor()
|
||||
self.df[field_names.M_WATER] = self._water_factor()
|
||||
self.df[field_names.M_HEALTH] = self._health_factor()
|
||||
self.df[field_names.M_WORKFORCE] = self._workforce_factor()
|
||||
|
||||
factors = [
|
||||
field_names.M_CLIMATE,
|
||||
field_names.M_ENERGY,
|
||||
field_names.M_TRANSPORTATION,
|
||||
field_names.M_HOUSING,
|
||||
field_names.M_POLLUTION,
|
||||
field_names.M_WATER,
|
||||
field_names.M_HEALTH,
|
||||
field_names.M_WORKFORCE,
|
||||
]
|
||||
self.df[field_names.SCORE_M_COMMUNITIES] = self.df[factors].any(axis=1)
|
||||
|
||||
# Note: this is purely used for comparison tool analysis, and can be removed at a later date. - LMB.
|
||||
non_workforce_factors = [
|
||||
field_names.M_CLIMATE,
|
||||
field_names.M_ENERGY,
|
||||
field_names.M_TRANSPORTATION,
|
||||
field_names.M_HOUSING,
|
||||
field_names.M_POLLUTION,
|
||||
field_names.M_WATER,
|
||||
field_names.M_HEALTH,
|
||||
]
|
||||
self.df[field_names.M_NON_WORKFORCE] = self.df[
|
||||
non_workforce_factors
|
||||
].any(axis=1)
|
||||
|
||||
self.df[
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
] = self.df[field_names.SCORE_M_COMMUNITIES].astype(int)
|
||||
|
||||
return self.df
|
|
@ -9,6 +9,7 @@ from data_pipeline.score.score_h import ScoreH
|
|||
from data_pipeline.score.score_i import ScoreI
|
||||
from data_pipeline.score.score_k import ScoreK
|
||||
from data_pipeline.score.score_l import ScoreL
|
||||
from data_pipeline.score.score_m import ScoreM
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
@ -33,6 +34,7 @@ class ScoreRunner:
|
|||
self.df = ScoreI(df=self.df).add_columns()
|
||||
self.df = ScoreK(df=self.df).add_columns()
|
||||
self.df = ScoreL(df=self.df).add_columns()
|
||||
self.df = ScoreM(df=self.df).add_columns()
|
||||
|
||||
# TODO do this with each score instead of in a bundle
|
||||
# Create percentiles for these index scores
|
||||
|
|
Loading…
Add table
Reference in a new issue