mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-26 16:41:16 -07:00
Add EJSCREEN Areas of Concern (#843)
* Adding ej screen areas of concern * Uses it where user has local files, but not otherwise Co-authored-by: VincentLaUSDS <vincent.la@omb.eop.gov>
This commit is contained in:
parent
1795be6cb4
commit
1d541be447
10 changed files with 2546 additions and 18 deletions
2308
data/data-pipeline/data_pipeline/ipython/ejscreen_load.ipynb
Normal file
2308
data/data-pipeline/data_pipeline/ipython/ejscreen_load.ipynb
Normal file
File diff suppressed because it is too large
Load diff
|
@ -34,7 +34,9 @@
|
|||
"\n",
|
||||
"from data_pipeline.utils import remove_all_from_dir, get_excel_column_name\n",
|
||||
"from data_pipeline.etl.sources.census.etl_utils import get_state_information\n",
|
||||
"\n",
|
||||
"from data_pipeline.etl.sources.ejscreen_areas_of_concern.etl import (\n",
|
||||
" EJSCREENAreasOfConcernETL,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Turn on TQDM for pandas so that we can have progress bars when running `apply`.\n",
|
||||
"tqdm_notebook.pandas()"
|
||||
|
@ -77,6 +79,14 @@
|
|||
"CEJST_PERCENTILE_FIELD = \"cejst_percentile\"\n",
|
||||
"CEJST_PRIORITY_COMMUNITY_FIELD = \"cejst_priority_community\"\n",
|
||||
"\n",
|
||||
"LIFE_EXPECTANCY_FIELD = \"Life expectancy (years)\"\n",
|
||||
"HEALTH_INSURANCE_FIELD = (\n",
|
||||
" \"Current lack of health insurance among adults aged 18-64 years\"\n",
|
||||
")\n",
|
||||
"BAD_HEALTH_FIELD = (\n",
|
||||
" \"Physical health not good for >=14 days among adults aged >=18 years\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Define some suffixes\n",
|
||||
"POPULATION_SUFFIX = \" (priority population)\""
|
||||
]
|
||||
|
@ -108,6 +118,55 @@
|
|||
"cejst_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1b1083e8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load EJSCREEN Areas of Concern data.\n",
|
||||
"\n",
|
||||
"# Load EJ Screen Areas of Concern\n",
|
||||
"# Before attempting, check whether or not the EJSCREEN AoC data is available locally.\n",
|
||||
"ejscreen_areas_of_concern_df: pd.DataFrame = None\n",
|
||||
"\n",
|
||||
"if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
|
||||
" print(\"Loading EJSCREEN Areas of Concern data for score pipeline.\")\n",
|
||||
" ejscreen_areas_of_concern_csv = (\n",
|
||||
" DATA_DIR / \"dataset\" / \"ejscreen_areas_of_concern\" / \"usa.csv\"\n",
|
||||
" )\n",
|
||||
" ejscreen_areas_of_concern_df = pd.read_csv(\n",
|
||||
" ejscreen_areas_of_concern_csv,\n",
|
||||
" dtype={GEOID_FIELD_NAME: \"string\"},\n",
|
||||
" low_memory=False,\n",
|
||||
" )\n",
|
||||
"else:\n",
|
||||
" print(\n",
|
||||
" \"EJSCREEN areas of concern data does not exist locally. Not attempting to load data into comparison tool.\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fec0ed63",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Merge EJSCREEN AoCs into CEJST data.\n",
|
||||
"# Before attempting, check whether or not the EJSCREEN AoC data is available locally.\n",
|
||||
"if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
|
||||
" # If available, merge EJSCREEN AoC data into CBG dfs.\n",
|
||||
" cejst_df = cejst_df.merge(\n",
|
||||
" ejscreen_areas_of_concern_df, on=GEOID_FIELD_NAME, how=\"outer\"\n",
|
||||
" )\n",
|
||||
"else:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"cejst_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
@ -343,11 +402,6 @@
|
|||
" other_census_tract_fields_to_keep=[],\n",
|
||||
" ),\n",
|
||||
" Index(\n",
|
||||
" method_name=\"Poverty\",\n",
|
||||
" priority_communities_field=\"Poverty (Less than 200% of federal poverty line) (top 25th percentile)\",\n",
|
||||
" other_census_tract_fields_to_keep=[],\n",
|
||||
" ),\n",
|
||||
" Index(\n",
|
||||
" method_name=\"Persistent Poverty (CBG)\",\n",
|
||||
" priority_communities_field=PERSISTENT_POVERTY_CBG_LEVEL_FIELD,\n",
|
||||
" other_census_tract_fields_to_keep=[],\n",
|
||||
|
@ -355,6 +409,34 @@
|
|||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"ejscreen_areas_of_concern_census_block_group_indices = [\n",
|
||||
" Index(\n",
|
||||
" method_name=\"EJSCREEN Areas of Concern, National, 80th percentile\",\n",
|
||||
" priority_communities_field=\"EJSCREEN Areas of Concern, National, 80th percentile (communities)\",\n",
|
||||
" other_census_tract_fields_to_keep=[],\n",
|
||||
" ),\n",
|
||||
" Index(\n",
|
||||
" method_name=\"EJSCREEN Areas of Concern, National, 90th percentile\",\n",
|
||||
" priority_communities_field=\"EJSCREEN Areas of Concern, National, 90th percentile (communities)\",\n",
|
||||
" other_census_tract_fields_to_keep=[],\n",
|
||||
" ),\n",
|
||||
" Index(\n",
|
||||
" method_name=\"EJSCREEN Areas of Concern, National, 95th percentile\",\n",
|
||||
" priority_communities_field=\"EJSCREEN Areas of Concern, National, 95th percentile (communities)\",\n",
|
||||
" other_census_tract_fields_to_keep=[],\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Before including EJSCREEN AoC indicators are included, check whether or not the EJSCREEN AoC data is available locally.\n",
|
||||
"if EJSCREENAreasOfConcernETL.ejscreen_areas_of_concern_data_exists():\n",
|
||||
" # Add EJSCREEN AoCs to all of the CBG indices.\n",
|
||||
" census_block_group_indices.extend(\n",
|
||||
" ejscreen_areas_of_concern_census_block_group_indices\n",
|
||||
" )\n",
|
||||
"else:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"census_tract_indices = [\n",
|
||||
" Index(\n",
|
||||
" method_name=\"Persistent Poverty\",\n",
|
||||
|
@ -620,6 +702,17 @@
|
|||
" for index in census_block_group_indices + census_tract_indices\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Convert all indices to boolean\n",
|
||||
"for field_to_analyze in fields_to_analyze:\n",
|
||||
" if \"Areas of Concern\" in field_to_analyze:\n",
|
||||
" print(f\"Converting {field_to_analyze} to boolean.\")\n",
|
||||
"\n",
|
||||
" merged_df[field_to_analyze] = merged_df[field_to_analyze].fillna(\n",
|
||||
" value=0\n",
|
||||
" )\n",
|
||||
" merged_df[field_to_analyze] = merged_df[field_to_analyze].astype(bool)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"state_fips_codes = get_state_information(DATA_DIR)\n",
|
||||
"\n",
|
||||
"merged_with_state_information_df = merged_df.merge(\n",
|
||||
|
@ -835,6 +928,9 @@
|
|||
" \"Unemployed civilians (percent)\",\n",
|
||||
" \"Median household income in the past 12 months\",\n",
|
||||
" URBAN_HEURISTIC_FIELD,\n",
|
||||
" LIFE_EXPECTANCY_FIELD,\n",
|
||||
" HEALTH_INSURANCE_FIELD,\n",
|
||||
" BAD_HEALTH_FIELD,\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for (index_a, index_b) in itertools.combinations(census_block_group_indices, 2):\n",
|
||||
|
@ -1495,7 +1591,7 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
@ -1509,7 +1605,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.5"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue