Add EJSCREEN Areas of Concern (#843)

* Adding ej screen areas of concern

* Uses it where user has local files, but not otherwise

Co-authored-by: VincentLaUSDS <vincent.la@omb.eop.gov>
This commit is contained in:
Lucas Merrill Brown 2021-11-02 15:38:42 -04:00 committed by GitHub
commit 1d541be447
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 2546 additions and 18 deletions

File diff suppressed because it is too large Load diff

View file

@ -34,7 +34,9 @@
"\n",
"from data_pipeline.utils import remove_all_from_dir, get_excel_column_name\n",
"from data_pipeline.etl.sources.census.etl_utils import get_state_information\n",
"\n",
"from data_pipeline.etl.sources.ejscreen_areas_of_concern.etl import (\n",
" EJSCREENAreasOfConcernETL,\n",
")\n",
"\n",
"# Turn on TQDM for pandas so that we can have progress bars when running `apply`.\n",
"tqdm_notebook.pandas()"
@ -77,6 +79,14 @@
"CEJST_PERCENTILE_FIELD = \"cejst_percentile\"\n",
"CEJST_PRIORITY_COMMUNITY_FIELD = \"cejst_priority_community\"\n",
"\n",
"LIFE_EXPECTANCY_FIELD = \"Life expectancy (years)\"\n",
"HEALTH_INSURANCE_FIELD = (\n",
" \"Current lack of health insurance among adults aged 18-64 years\"\n",
")\n",
"BAD_HEALTH_FIELD = (\n",
" \"Physical health not good for >=14 days among adults aged >=18 years\"\n",
")\n",
"\n",
"# Define some suffixes\n",
"POPULATION_SUFFIX = \" (priority population)\""
]
@ -108,6 +118,55 @@
"cejst_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b1083e8",
"metadata": {},
"outputs": [],
"source": [
"# Load EJSCREEN Areas of Concern data.\n",
"\n",
"# Load EJ Screen Areas of Concern\n",
"# Before attempting, check whether or not the EJSCREEN AoC data is available locally.\n",
"ejscreen_areas_of_concern_df: pd.DataFrame = None\n",
"\n",
"if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
" print(\"Loading EJSCREEN Areas of Concern data for score pipeline.\")\n",
" ejscreen_areas_of_concern_csv = (\n",
" DATA_DIR / \"dataset\" / \"ejscreen_areas_of_concern\" / \"usa.csv\"\n",
" )\n",
" ejscreen_areas_of_concern_df = pd.read_csv(\n",
" ejscreen_areas_of_concern_csv,\n",
" dtype={GEOID_FIELD_NAME: \"string\"},\n",
" low_memory=False,\n",
" )\n",
"else:\n",
" print(\n",
" \"EJSCREEN areas of concern data does not exist locally. Not attempting to load data into comparison tool.\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fec0ed63",
"metadata": {},
"outputs": [],
"source": [
"# Merge EJSCREEN AoCs into CEJST data.\n",
"# Before attempting, check whether or not the EJSCREEN AoC data is available locally.\n",
"if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
" # If available, merge EJSCREEN AoC data into CBG dfs.\n",
" cejst_df = cejst_df.merge(\n",
" ejscreen_areas_of_concern_df, on=GEOID_FIELD_NAME, how=\"outer\"\n",
" )\n",
"else:\n",
" pass\n",
"\n",
"cejst_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -343,11 +402,6 @@
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Poverty\",\n",
" priority_communities_field=\"Poverty (Less than 200% of federal poverty line) (top 25th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Persistent Poverty (CBG)\",\n",
" priority_communities_field=PERSISTENT_POVERTY_CBG_LEVEL_FIELD,\n",
" other_census_tract_fields_to_keep=[],\n",
@ -355,6 +409,34 @@
" ]\n",
")\n",
"\n",
"\n",
"ejscreen_areas_of_concern_census_block_group_indices = [\n",
" Index(\n",
" method_name=\"EJSCREEN Areas of Concern, National, 80th percentile\",\n",
" priority_communities_field=\"EJSCREEN Areas of Concern, National, 80th percentile (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"EJSCREEN Areas of Concern, National, 90th percentile\",\n",
" priority_communities_field=\"EJSCREEN Areas of Concern, National, 90th percentile (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"EJSCREEN Areas of Concern, National, 95th percentile\",\n",
" priority_communities_field=\"EJSCREEN Areas of Concern, National, 95th percentile (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
"]\n",
"\n",
"# Before including EJSCREEN AoC indicators are included, check whether or not the EJSCREEN AoC data is available locally.\n",
"if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
" # Add EJSCREEN AoCs to all of the CBG indices.\n",
" census_block_group_indices.extend(\n",
" ejscreen_areas_of_concern_census_block_group_indices\n",
" )\n",
"else:\n",
" pass\n",
"\n",
"census_tract_indices = [\n",
" Index(\n",
" method_name=\"Persistent Poverty\",\n",
@ -620,6 +702,17 @@
" for index in census_block_group_indices + census_tract_indices\n",
"]\n",
"\n",
"# Convert all indices to boolean\n",
"for field_to_analyze in fields_to_analyze:\n",
" if \"Areas of Concern\" in field_to_analyze:\n",
" print(f\"Converting {field_to_analyze} to boolean.\")\n",
"\n",
" merged_df[field_to_analyze] = merged_df[field_to_analyze].fillna(\n",
" value=0\n",
" )\n",
" merged_df[field_to_analyze] = merged_df[field_to_analyze].astype(bool)\n",
"\n",
"\n",
"state_fips_codes = get_state_information(DATA_DIR)\n",
"\n",
"merged_with_state_information_df = merged_df.merge(\n",
@ -835,6 +928,9 @@
" \"Unemployed civilians (percent)\",\n",
" \"Median household income in the past 12 months\",\n",
" URBAN_HEURISTIC_FIELD,\n",
" LIFE_EXPECTANCY_FIELD,\n",
" HEALTH_INSURANCE_FIELD,\n",
" BAD_HEALTH_FIELD,\n",
"]\n",
"\n",
"for (index_a, index_b) in itertools.combinations(census_block_group_indices, 2):\n",
@ -1495,7 +1591,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@ -1509,7 +1605,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.9.6"
}
},
"nbformat": 4,