mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Issue 970: reverse percentiles for AMI and life expectancy (#1018)
* switching to low * fixing score-etl-post * updating comments * fixing comparison * create separate field for clarity * comment fix * removing healthy food * fixing bug in score post * running black and adding comment * Update pickles and add a helpful notes to README Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
This commit is contained in:
parent
24bac56d9e
commit
7fcecaee42
11 changed files with 144 additions and 100 deletions
|
@ -309,7 +309,7 @@ If you update the score in any way, it is necessary to create new pickles so tha
|
||||||
|
|
||||||
It starts with the `data_pipeline/etl/score/tests/sample_data/score_data_initial.csv`, which is the first two rows of the `score/full/usa.csv`.
|
It starts with the `data_pipeline/etl/score/tests/sample_data/score_data_initial.csv`, which is the first two rows of the `score/full/usa.csv`.
|
||||||
|
|
||||||
To update this file, run a full score generation and then update the file as follows:
|
To update this file, run a full score generation, then open a Python shell from the `data-pipeline` directory (e.g. `poetry run python3`), and then update the file with the following commands:
|
||||||
```
|
```
|
||||||
import pickle
|
import pickle
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -322,6 +322,8 @@ score_initial_df = pd.read_csv(score_csv_path, dtype={"GEOID10_TRACT": "string"}
|
||||||
score_initial_df.to_csv(data_path / "data_pipeline" / "etl" / "score" / "tests" / "sample_data" /"score_data_initial.csv", index=False)
|
score_initial_df.to_csv(data_path / "data_pipeline" / "etl" / "score" / "tests" / "sample_data" /"score_data_initial.csv", index=False)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Now you can move on to updating inidvidual pickles for the tests. Note that it is helpful to do them in this order:
|
||||||
|
|
||||||
We have four pickle files that correspond to expected files:
|
We have four pickle files that correspond to expected files:
|
||||||
- `score_data_expected.pkl`: Initial score without counties
|
- `score_data_expected.pkl`: Initial score without counties
|
||||||
- `score_transformed_expected.pkl`: Intermediate score with `etl._extract_score` and `etl. _transform_score` applied. There's no file for this intermediate process, so we need to capture the pickle mid-process.
|
- `score_transformed_expected.pkl`: Intermediate score with `etl._extract_score` and `etl. _transform_score` applied. There's no file for this intermediate process, so we need to capture the pickle mid-process.
|
||||||
|
|
|
@ -81,7 +81,7 @@ TILES_SCORE_COLUMNS = [
|
||||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
@ -89,7 +89,7 @@ TILES_SCORE_COLUMNS = [
|
||||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
@ -115,7 +115,7 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
||||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
@ -123,7 +123,7 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
||||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
@ -137,7 +137,6 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
|
||||||
field_names.DIABETES_FIELD,
|
field_names.DIABETES_FIELD,
|
||||||
field_names.ASTHMA_FIELD,
|
field_names.ASTHMA_FIELD,
|
||||||
field_names.HEART_DISEASE_FIELD,
|
field_names.HEART_DISEASE_FIELD,
|
||||||
field_names.LIFE_EXPECTANCY_FIELD,
|
|
||||||
field_names.TRAFFIC_FIELD,
|
field_names.TRAFFIC_FIELD,
|
||||||
field_names.FEMA_RISK_FIELD,
|
field_names.FEMA_RISK_FIELD,
|
||||||
field_names.ENERGY_BURDEN_FIELD,
|
field_names.ENERGY_BURDEN_FIELD,
|
||||||
|
@ -149,11 +148,11 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
|
||||||
field_names.TOTAL_POP_FIELD,
|
field_names.TOTAL_POP_FIELD,
|
||||||
]
|
]
|
||||||
|
|
||||||
# For every indicator above, we want to include percentile and min-max normalized variants also
|
# For every indicator above, we want to include percentile also.
|
||||||
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
|
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
|
||||||
pd.core.common.flatten(
|
pd.core.common.flatten(
|
||||||
[
|
[
|
||||||
[p, f"{p} (percentile)"]
|
[p, f"{p}{field_names.PERCENTILE_FIELD_SUFFIX}"]
|
||||||
for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
|
for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
@ -165,8 +164,15 @@ DOWNLOADABLE_SCORE_COLUMNS = [
|
||||||
field_names.COUNTY_FIELD,
|
field_names.COUNTY_FIELD,
|
||||||
field_names.STATE_FIELD,
|
field_names.STATE_FIELD,
|
||||||
field_names.SCORE_G_COMMUNITIES,
|
field_names.SCORE_G_COMMUNITIES,
|
||||||
|
# Note: the reverse percentile fields get moved down here because
|
||||||
|
# we put the raw value in the download along with the *reversed* percentile.
|
||||||
|
# All other fields we put in f"{field_name}" and
|
||||||
|
# f"{field_name}{field_names.PERCENTILE_FIELD_SUFFIX}", which doesn't work for the
|
||||||
|
# reversed percentile fields.
|
||||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD
|
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.LIFE_EXPECTANCY_FIELD,
|
||||||
|
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
*DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
|
*DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
|
||||||
]
|
]
|
||||||
|
|
|
@ -404,9 +404,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.POVERTY_LESS_THAN_150_FPL_FIELD,
|
field_names.POVERTY_LESS_THAN_150_FPL_FIELD,
|
||||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
||||||
field_names.AMI_FIELD,
|
field_names.AMI_FIELD,
|
||||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
|
||||||
field_names.MEDIAN_INCOME_FIELD,
|
field_names.MEDIAN_INCOME_FIELD,
|
||||||
field_names.LIFE_EXPECTANCY_FIELD,
|
|
||||||
field_names.ENERGY_BURDEN_FIELD,
|
field_names.ENERGY_BURDEN_FIELD,
|
||||||
field_names.FEMA_RISK_FIELD,
|
field_names.FEMA_RISK_FIELD,
|
||||||
field_names.URBAN_HEURISTIC_FIELD,
|
field_names.URBAN_HEURISTIC_FIELD,
|
||||||
|
@ -439,7 +437,6 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||||
field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009,
|
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009,
|
||||||
field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
|
||||||
field_names.EXTREME_HEAT_FIELD,
|
field_names.EXTREME_HEAT_FIELD,
|
||||||
field_names.HEALTHY_FOOD_FIELD,
|
field_names.HEALTHY_FOOD_FIELD,
|
||||||
field_names.IMPENETRABLE_SURFACES_FIELD,
|
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||||
|
@ -468,7 +465,19 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
ReversePercentile(
|
ReversePercentile(
|
||||||
field_name=field_names.READING_FIELD,
|
field_name=field_names.READING_FIELD,
|
||||||
low_field_name=field_names.LOW_READING_FIELD,
|
low_field_name=field_names.LOW_READING_FIELD,
|
||||||
)
|
),
|
||||||
|
ReversePercentile(
|
||||||
|
field_name=field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||||
|
low_field_name=field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||||
|
),
|
||||||
|
ReversePercentile(
|
||||||
|
field_name=field_names.LIFE_EXPECTANCY_FIELD,
|
||||||
|
low_field_name=field_names.LOW_LIFE_EXPECTANCY_FIELD,
|
||||||
|
),
|
||||||
|
ReversePercentile(
|
||||||
|
field_name=field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
||||||
|
low_field_name=field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
columns_to_keep = (
|
columns_to_keep = (
|
||||||
|
@ -505,10 +514,6 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
max_value = df_copy[numeric_column].max(skipna=True)
|
max_value = df_copy[numeric_column].max(skipna=True)
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"For data set {numeric_column}, the min value is {min_value} and the max value is {max_value}."
|
|
||||||
)
|
|
||||||
|
|
||||||
df_copy[f"{numeric_column}{field_names.MIN_MAX_FIELD_SUFFIX}"] = (
|
df_copy[f"{numeric_column}{field_names.MIN_MAX_FIELD_SUFFIX}"] = (
|
||||||
df_copy[numeric_column] - min_value
|
df_copy[numeric_column] - min_value
|
||||||
) / (max_value - min_value)
|
) / (max_value - min_value)
|
||||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -8,7 +8,10 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import IPython\n",
|
"import IPython\n",
|
||||||
|
"import os\n",
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
|
"import pathlib\n",
|
||||||
|
"import sys\n",
|
||||||
"\n",
|
"\n",
|
||||||
"module_path = os.path.abspath(os.path.join(\"../..\"))\n",
|
"module_path = os.path.abspath(os.path.join(\"../..\"))\n",
|
||||||
"if module_path not in sys.path:\n",
|
"if module_path not in sys.path:\n",
|
||||||
|
@ -28,12 +31,8 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Load\n",
|
"# Load\n",
|
||||||
"path_to_score_file_1 = (\n",
|
"path_to_score_file_1 = DATA_DIR / \"compare_two_score_csvs/usa (pre 970).csv\"\n",
|
||||||
" DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa1.csv\"\n",
|
"path_to_score_file_2 = DATA_DIR / \"compare_two_score_csvs/usa (post 970).csv\"\n",
|
||||||
")\n",
|
|
||||||
"path_to_score_file_2 = (\n",
|
|
||||||
" DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa2.csv\"\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"score_1_df = pd.read_csv(\n",
|
"score_1_df = pd.read_csv(\n",
|
||||||
" path_to_score_file_1,\n",
|
" path_to_score_file_1,\n",
|
||||||
|
@ -68,11 +67,16 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# List rows in one but not the other\n",
|
"# List rows in one but not the other\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if len(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]) != len(score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]):\n",
|
"if len(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]) != len(\n",
|
||||||
|
" score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]\n",
|
||||||
|
"):\n",
|
||||||
" print(\"Different lengths!\")\n",
|
" print(\"Different lengths!\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Difference in tract IDs:\")\n",
|
"print(\"Difference in tract IDs:\")\n",
|
||||||
"print(set(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]) ^ set(score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]))\n"
|
"print(\n",
|
||||||
|
" set(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME])\n",
|
||||||
|
" ^ set(score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME])\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -83,7 +87,12 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Join\n",
|
"# Join\n",
|
||||||
"merged_df = score_1_df.merge(score_2_df, how=\"outer\", on=ExtractTransformLoad.GEOID_TRACT_FIELD_NAME, suffixes=('_1', '_2'))\n",
|
"merged_df = score_1_df.merge(\n",
|
||||||
|
" score_2_df,\n",
|
||||||
|
" how=\"outer\",\n",
|
||||||
|
" on=ExtractTransformLoad.GEOID_TRACT_FIELD_NAME,\n",
|
||||||
|
" suffixes=(\"_1\", \"_2\"),\n",
|
||||||
|
")\n",
|
||||||
"merged_df"
|
"merged_df"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -95,13 +104,31 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Check each duplicate column:\n",
|
"# Check each duplicate column:\n",
|
||||||
|
"# Remove the suffix \"_1\"\n",
|
||||||
"duplicate_columns = [x[:-2] for x in merged_df.columns if \"_1\" in x]\n",
|
"duplicate_columns = [x[:-2] for x in merged_df.columns if \"_1\" in x]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for duplicate_column in duplicate_columns:\n",
|
"columns_to_exclude_from_duplicates_check = [\n",
|
||||||
" print(f\"Checking duplicate column {duplicate_column}\")\n",
|
" \"Total threshold criteria exceeded\"\n",
|
||||||
" if not merged_df[f\"{duplicate_column}_1\"].equals(merged_df[f\"{duplicate_column}_2\"]):\n",
|
"]\n",
|
||||||
" print(merged_df[f\"{duplicate_column}_1\"].compare(merged_df[f\"{duplicate_column}_2\"]))\n",
|
"\n",
|
||||||
" raise ValueError(f\"Error! Different values in {duplicate_column}\")"
|
"columns_to_check = [column for column in duplicate_columns if column not in columns_to_exclude_from_duplicates_check]\n",
|
||||||
|
"\n",
|
||||||
|
"any_errors_found = False\n",
|
||||||
|
"for column_to_check in columns_to_check:\n",
|
||||||
|
" print(f\"Checking duplicate column {column_to_check}\")\n",
|
||||||
|
" if not merged_df[f\"{column_to_check}_1\"].equals(\n",
|
||||||
|
" merged_df[f\"{column_to_check}_2\"]\n",
|
||||||
|
" ):\n",
|
||||||
|
" print(f\"Error! Different values in {column_to_check}\")\n",
|
||||||
|
" print(\n",
|
||||||
|
" merged_df[f\"{column_to_check}_1\"].compare(\n",
|
||||||
|
" merged_df[f\"{column_to_check}_2\"]\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" any_errors_found = True\n",
|
||||||
|
"\n",
|
||||||
|
"if any_errors_found:\n",
|
||||||
|
" raise ValueError(f\"Error! Different values in one or more columns.\")"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|
|
@ -60,11 +60,15 @@ MEDIAN_INCOME_FIELD = "Median household income in the past 12 months"
|
||||||
MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD = (
|
MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD = (
|
||||||
"Median household income (% of state median household income)"
|
"Median household income (% of state median household income)"
|
||||||
)
|
)
|
||||||
MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = "Median household income (% of AMI)"
|
|
||||||
PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
|
PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
|
||||||
AMI_FIELD = "Area Median Income (State or metropolitan)"
|
AMI_FIELD = "Area Median Income (State or metropolitan)"
|
||||||
|
|
||||||
COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school"
|
COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school"
|
||||||
|
MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = (
|
||||||
|
"Median household income as a percent of area median income"
|
||||||
|
)
|
||||||
|
LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = (
|
||||||
|
"Low median household income as a percent of area median income"
|
||||||
|
)
|
||||||
|
|
||||||
# Climate
|
# Climate
|
||||||
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
|
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
|
||||||
|
@ -105,7 +109,6 @@ ENERGY_BURDEN_FIELD = "Energy burden"
|
||||||
DIABETES_FIELD = "Diagnosed diabetes among adults aged >=18 years"
|
DIABETES_FIELD = "Diagnosed diabetes among adults aged >=18 years"
|
||||||
ASTHMA_FIELD = "Current asthma among adults aged >=18 years"
|
ASTHMA_FIELD = "Current asthma among adults aged >=18 years"
|
||||||
HEART_DISEASE_FIELD = "Coronary heart disease among adults aged >=18 years"
|
HEART_DISEASE_FIELD = "Coronary heart disease among adults aged >=18 years"
|
||||||
LIFE_EXPECTANCY_FIELD = "Life expectancy (years)"
|
|
||||||
CANCER_FIELD = "Cancer (excluding skin cancer) among adults aged >=18 years"
|
CANCER_FIELD = "Cancer (excluding skin cancer) among adults aged >=18 years"
|
||||||
HEALTH_INSURANCE_FIELD = (
|
HEALTH_INSURANCE_FIELD = (
|
||||||
"Current lack of health insurance among adults aged 18-64 years"
|
"Current lack of health insurance among adults aged 18-64 years"
|
||||||
|
@ -113,6 +116,8 @@ HEALTH_INSURANCE_FIELD = (
|
||||||
PHYS_HEALTH_NOT_GOOD_FIELD = (
|
PHYS_HEALTH_NOT_GOOD_FIELD = (
|
||||||
"Physical health not good for >=14 days among adults aged >=18 years"
|
"Physical health not good for >=14 days among adults aged >=18 years"
|
||||||
)
|
)
|
||||||
|
LIFE_EXPECTANCY_FIELD = "Life expectancy (years)"
|
||||||
|
LOW_LIFE_EXPECTANCY_FIELD = "Low life expectancy"
|
||||||
|
|
||||||
# Other Demographics
|
# Other Demographics
|
||||||
TOTAL_POP_FIELD = "Total population"
|
TOTAL_POP_FIELD = "Total population"
|
||||||
|
@ -130,9 +135,6 @@ OVER_64_FIELD = "Individuals over 64 years old"
|
||||||
|
|
||||||
# Fields from 2010 decennial census (generally only loaded for the territories)
|
# Fields from 2010 decennial census (generally only loaded for the territories)
|
||||||
CENSUS_DECENNIAL_MEDIAN_INCOME_2009 = "Median household income in 2009 ($)"
|
CENSUS_DECENNIAL_MEDIAN_INCOME_2009 = "Median household income in 2009 ($)"
|
||||||
CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = (
|
|
||||||
"Median household income as a percent of territory median income in 2009"
|
|
||||||
)
|
|
||||||
CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009 = (
|
CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009 = (
|
||||||
"Percentage households below 100% of federal poverty line in 2009"
|
"Percentage households below 100% of federal poverty line in 2009"
|
||||||
)
|
)
|
||||||
|
@ -141,7 +143,10 @@ CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 = (
|
||||||
"Unemployed civilians (percent) in 2009"
|
"Unemployed civilians (percent) in 2009"
|
||||||
)
|
)
|
||||||
CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009 = "Total population in 2009"
|
CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009 = "Total population in 2009"
|
||||||
|
CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = (
|
||||||
|
"Median household income as a percent of territory median income in 2009"
|
||||||
|
)
|
||||||
|
LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = "Low median household income as a percent of territory median income in 2009"
|
||||||
# Fields from 2010 ACS (loaded for comparison with the territories)
|
# Fields from 2010 ACS (loaded for comparison with the territories)
|
||||||
CENSUS_UNEMPLOYMENT_FIELD_2010 = "Unemployed civilians (percent) in 2010"
|
CENSUS_UNEMPLOYMENT_FIELD_2010 = "Unemployed civilians (percent) in 2010"
|
||||||
CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = (
|
CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = (
|
||||||
|
@ -265,7 +270,10 @@ ASTHMA_LOW_INCOME_FIELD = (
|
||||||
)
|
)
|
||||||
HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for heart disease and is low income"
|
HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for heart disease and is low income"
|
||||||
|
|
||||||
LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income"
|
LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD = (
|
||||||
|
f"At or above the {PERCENTILE}th percentile "
|
||||||
|
f"for low life expectancy and is low income"
|
||||||
|
)
|
||||||
|
|
||||||
# Workforce
|
# Workforce
|
||||||
UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
|
UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
|
||||||
|
@ -288,9 +296,9 @@ LOW_READING_LOW_HS_EDUCATION_FIELD = (
|
||||||
" and has low HS education"
|
" and has low HS education"
|
||||||
)
|
)
|
||||||
|
|
||||||
MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
||||||
f"At or below the {PERCENTILE}th percentile for median income"
|
f"At or below the {PERCENTILE}th percentile for low median household income as a "
|
||||||
" and has low HS education"
|
f"percent of area median income and has low HS education"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Not currently used in a factor
|
# Not currently used in a factor
|
||||||
|
|
|
@ -44,6 +44,8 @@ class ScoreL(Score):
|
||||||
robustness over 1-year ACS.
|
robustness over 1-year ACS.
|
||||||
"""
|
"""
|
||||||
# Create the combined field.
|
# Create the combined field.
|
||||||
|
# TODO: move this combined field percentile calculation to `etl_score`,
|
||||||
|
# since most other percentile logic is there.
|
||||||
# There should only be one entry in either 2009 or 2019 fields, not one in both.
|
# There should only be one entry in either 2009 or 2019 fields, not one in both.
|
||||||
# But just to be safe, we take the mean and ignore null values so if there
|
# But just to be safe, we take the mean and ignore null values so if there
|
||||||
# *were* entries in both, this result would make sense.
|
# *were* entries in both, this result would make sense.
|
||||||
|
@ -169,7 +171,7 @@ class ScoreL(Score):
|
||||||
def _climate_factor(self) -> bool:
|
def _climate_factor(self) -> bool:
|
||||||
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
||||||
# AND
|
# AND
|
||||||
# Low income: In 60th percentile or above for percent of block group population
|
# Low income: In Nth percentile or above for percent of block group population
|
||||||
# of households where household income is less than or equal to twice the federal
|
# of households where household income is less than or equal to twice the federal
|
||||||
# poverty level. Source: Census's American Community Survey]
|
# poverty level. Source: Census's American Community Survey]
|
||||||
|
|
||||||
|
@ -225,7 +227,7 @@ class ScoreL(Score):
|
||||||
def _energy_factor(self) -> bool:
|
def _energy_factor(self) -> bool:
|
||||||
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
||||||
# AND
|
# AND
|
||||||
# Low income: In 60th percentile or above for percent of block group population
|
# Low income: In Nth percentile or above for percent of block group population
|
||||||
# of households where household income is less than or equal to twice the federal
|
# of households where household income is less than or equal to twice the federal
|
||||||
# poverty level. Source: Census's American Community Survey]
|
# poverty level. Source: Census's American Community Survey]
|
||||||
|
|
||||||
|
@ -268,7 +270,7 @@ class ScoreL(Score):
|
||||||
# or
|
# or
|
||||||
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
||||||
# AND
|
# AND
|
||||||
# Low income: In 60th percentile or above for percent of block group population
|
# Low income: In Nth percentile or above for percent of block group population
|
||||||
# of households where household income is less than or equal to twice the federal
|
# of households where household income is less than or equal to twice the federal
|
||||||
# poverty level. Source: Census's American Community Survey]
|
# poverty level. Source: Census's American Community Survey]
|
||||||
|
|
||||||
|
@ -315,7 +317,7 @@ class ScoreL(Score):
|
||||||
# or
|
# or
|
||||||
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
||||||
# AND
|
# AND
|
||||||
# Low income: In 60th percentile or above for percent of block group population
|
# Low income: In Nth percentile or above for percent of block group population
|
||||||
# of households where household income is less than or equal to twice the federal
|
# of households where household income is less than or equal to twice the federal
|
||||||
# poverty level. Source: Census's American Community Survey]
|
# poverty level. Source: Census's American Community Survey]
|
||||||
|
|
||||||
|
@ -363,7 +365,7 @@ class ScoreL(Score):
|
||||||
def _pollution_factor(self) -> bool:
|
def _pollution_factor(self) -> bool:
|
||||||
# Proximity to Risk Management Plan sites is > X
|
# Proximity to Risk Management Plan sites is > X
|
||||||
# AND
|
# AND
|
||||||
# Low income: In 60th percentile or above for percent of block group population
|
# Low income: In Nth percentile or above for percent of block group population
|
||||||
# of households where household income is less than or equal to twice the federal
|
# of households where household income is less than or equal to twice the federal
|
||||||
# poverty level. Source: Census's American Community Survey]
|
# poverty level. Source: Census's American Community Survey]
|
||||||
|
|
||||||
|
@ -410,7 +412,7 @@ class ScoreL(Score):
|
||||||
def _water_factor(self) -> bool:
|
def _water_factor(self) -> bool:
|
||||||
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
|
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
|
||||||
# AND
|
# AND
|
||||||
# Low income: In 60th percentile or above for percent of block group population
|
# Low income: In Nth percentile or above for percent of block group population
|
||||||
# of households where household income is less than or equal to twice the federal
|
# of households where household income is less than or equal to twice the federal
|
||||||
# poverty level. Source: Census's American Community Survey]
|
# poverty level. Source: Census's American Community Survey]
|
||||||
|
|
||||||
|
@ -441,7 +443,7 @@ class ScoreL(Score):
|
||||||
# or
|
# or
|
||||||
# In Xth percentile or above for low life expectancy (Source: CDC Places)
|
# In Xth percentile or above for low life expectancy (Source: CDC Places)
|
||||||
# AND
|
# AND
|
||||||
# Low income: In 60th percentile or above for percent of block group population
|
# Low income: In Nth percentile or above for percent of block group population
|
||||||
# of households where household income is less than or equal to twice the federal
|
# of households where household income is less than or equal to twice the federal
|
||||||
# poverty level. Source: Census's American Community Survey]
|
# poverty level. Source: Census's American Community Survey]
|
||||||
|
|
||||||
|
@ -449,8 +451,7 @@ class ScoreL(Score):
|
||||||
field_names.DIABETES_LOW_INCOME_FIELD,
|
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||||
field_names.HEALTHY_FOOD_LOW_INCOME_FIELD,
|
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||||
field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
|
||||||
]
|
]
|
||||||
|
|
||||||
diabetes_threshold = (
|
diabetes_threshold = (
|
||||||
|
@ -475,24 +476,14 @@ class ScoreL(Score):
|
||||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
healthy_food_threshold = (
|
low_life_expectancy_threshold = (
|
||||||
self.df[
|
self.df[
|
||||||
field_names.HEALTHY_FOOD_FIELD
|
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
]
|
]
|
||||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
life_expectancy_threshold = (
|
|
||||||
self.df[
|
|
||||||
field_names.LIFE_EXPECTANCY_FIELD
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
|
||||||
]
|
|
||||||
# Note: a high life expectancy is good, so take 1 minus the threshold to invert it,
|
|
||||||
# and then look for life expenctancies lower than that (not greater than).
|
|
||||||
<= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
|
||||||
)
|
|
||||||
|
|
||||||
self.df[field_names.DIABETES_LOW_INCOME_FIELD] = (
|
self.df[field_names.DIABETES_LOW_INCOME_FIELD] = (
|
||||||
diabetes_threshold & self.df[field_names.FPL_200_SERIES]
|
diabetes_threshold & self.df[field_names.FPL_200_SERIES]
|
||||||
)
|
)
|
||||||
|
@ -502,11 +493,8 @@ class ScoreL(Score):
|
||||||
self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = (
|
self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = (
|
||||||
heart_disease_threshold & self.df[field_names.FPL_200_SERIES]
|
heart_disease_threshold & self.df[field_names.FPL_200_SERIES]
|
||||||
)
|
)
|
||||||
self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
self.df[field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
||||||
life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
|
low_life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
|
||||||
)
|
|
||||||
self.df[field_names.HEALTHY_FOOD_LOW_INCOME_FIELD] = (
|
|
||||||
healthy_food_threshold & self.df[field_names.FPL_200_SERIES]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self._increment_total_eligibility_exceeded(health_eligibility_columns)
|
self._increment_total_eligibility_exceeded(health_eligibility_columns)
|
||||||
|
@ -514,23 +502,25 @@ class ScoreL(Score):
|
||||||
return self.df[health_eligibility_columns].any(axis="columns")
|
return self.df[health_eligibility_columns].any(axis="columns")
|
||||||
|
|
||||||
def _workforce_factor(self) -> bool:
|
def _workforce_factor(self) -> bool:
|
||||||
# Where unemployment is above X%
|
# Where unemployment is above Xth percentile
|
||||||
# or
|
# or
|
||||||
# Where median income is less than Y% of the area median income
|
# Where median income as a percent of area median income is above Xth percentile
|
||||||
# or
|
# or
|
||||||
# Where the percent of households at or below 100% of the federal poverty level is greater than Z%
|
# Where the percent of households at or below 100% of the federal poverty level
|
||||||
|
# is above Xth percentile
|
||||||
# or
|
# or
|
||||||
# Where linguistic isolation is greater than Y%
|
# Where linguistic isolation is above Xth percentile
|
||||||
# AND
|
# AND
|
||||||
# Where the high school degree achievement rates for adults 25 years and older is less than 95%
|
# Where the high school degree achievement rates for adults 25 years and older
|
||||||
# (necessary to screen out university block groups)
|
# is less than Y%
|
||||||
|
# (necessary to screen out university tracts)
|
||||||
|
|
||||||
# Workforce criteria for states fields.
|
# Workforce criteria for states fields.
|
||||||
workforce_eligibility_columns = [
|
workforce_eligibility_columns = [
|
||||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||||
field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||||
]
|
]
|
||||||
|
|
||||||
high_scool_achievement_rate_threshold = (
|
high_scool_achievement_rate_threshold = (
|
||||||
|
@ -546,14 +536,12 @@ class ScoreL(Score):
|
||||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
median_income_threshold = (
|
low_median_income_threshold = (
|
||||||
self.df[
|
self.df[
|
||||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
]
|
]
|
||||||
# Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
# and then look for median income lower than that (not greater than).
|
|
||||||
<= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
|
||||||
)
|
)
|
||||||
|
|
||||||
linguistic_isolation_threshold = (
|
linguistic_isolation_threshold = (
|
||||||
|
@ -581,8 +569,8 @@ class ScoreL(Score):
|
||||||
poverty_threshold & high_scool_achievement_rate_threshold
|
poverty_threshold & high_scool_achievement_rate_threshold
|
||||||
)
|
)
|
||||||
|
|
||||||
self.df[field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
|
self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
|
||||||
median_income_threshold & high_scool_achievement_rate_threshold
|
low_median_income_threshold & high_scool_achievement_rate_threshold
|
||||||
)
|
)
|
||||||
|
|
||||||
self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = (
|
self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = (
|
||||||
|
@ -624,23 +612,31 @@ class ScoreL(Score):
|
||||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Also check whether low area median income is 90th percentile or higher
|
||||||
|
# within the islands.
|
||||||
|
low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name = (
|
||||||
|
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
||||||
|
f"{field_names.PERCENTILE}th percentile"
|
||||||
|
)
|
||||||
|
self.df[
|
||||||
|
low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name
|
||||||
|
] = (
|
||||||
|
self.df[
|
||||||
|
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
|
]
|
||||||
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
|
)
|
||||||
|
|
||||||
workforce_combined_criteria_for_island_areas = (
|
workforce_combined_criteria_for_island_areas = (
|
||||||
self.df[unemployment_island_areas_criteria_field_name]
|
self.df[unemployment_island_areas_criteria_field_name]
|
||||||
| self.df[poverty_island_areas_criteria_field_name]
|
| self.df[poverty_island_areas_criteria_field_name]
|
||||||
# Also check whether area median income is 10th percentile or lower
|
| self.df[
|
||||||
# within the islands.
|
low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name
|
||||||
| (
|
|
||||||
self.df[
|
|
||||||
field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
|
||||||
]
|
]
|
||||||
# Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.
|
|
||||||
# and then look for median income lower than that (not greater than).
|
|
||||||
< 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
|
||||||
)
|
|
||||||
) & (
|
) & (
|
||||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||||
> self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
percent_of_island_tracts_highlighted = (
|
percent_of_island_tracts_highlighted = (
|
||||||
|
|
Loading…
Add table
Reference in a new issue