mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 07:21:18 -07:00
Issue 1141: Definition M (#1151)
This commit is contained in:
parent
a07bf752b0
commit
18f299c5f8
21 changed files with 1000 additions and 143 deletions
|
@ -21,6 +21,7 @@
|
|||
"import requests\n",
|
||||
"import string\n",
|
||||
"import sys\n",
|
||||
"import time\n",
|
||||
"import typing\n",
|
||||
"import us\n",
|
||||
"import zipfile\n",
|
||||
|
@ -61,7 +62,10 @@
|
|||
"# Set some global parameters\n",
|
||||
"DATA_DIR = pathlib.Path.cwd().parent / \"data\"\n",
|
||||
"TEMP_DATA_DIR = DATA_DIR / \"tmp\"\n",
|
||||
"COMPARISON_OUTPUTS_DIR = DATA_DIR / \"comparison_outputs\"\n",
|
||||
"\n",
|
||||
"time_str = time.strftime(\"%Y%m%d-%H%M%S\")\n",
|
||||
"\n",
|
||||
"COMPARISON_OUTPUTS_DIR = DATA_DIR / \"comparison_outputs\" / time_str\n",
|
||||
"\n",
|
||||
"# Make the dirs if they don't exist\n",
|
||||
"TEMP_DATA_DIR.mkdir(parents=True, exist_ok=True)\n",
|
||||
|
@ -109,7 +113,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a251a0fb",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load EJSCREEN Areas of Concern data.\n",
|
||||
|
@ -143,7 +149,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e43a9e23",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Merge EJSCREEN AoCs into CEJST data.\n",
|
||||
|
@ -173,10 +181,13 @@
|
|||
"source": [
|
||||
"# Analyze one field at a time (useful for setting thresholds)\n",
|
||||
"\n",
|
||||
"quantile = 0.9\n",
|
||||
"quantile = 0.95\n",
|
||||
"\n",
|
||||
"for field in [\n",
|
||||
" field_names.MEDIAN_HOUSE_VALUE_FIELD,\n",
|
||||
" field_names.COLLEGE_ATTENDANCE_FIELD,\n",
|
||||
" field_names.HIGH_SCHOOL_ED_FIELD,\n",
|
||||
" field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,\n",
|
||||
" field_names.POVERTY_LESS_THAN_200_FPL_FIELD,\n",
|
||||
"]:\n",
|
||||
" print(f\"\\n~~~~Analysis for field `{field}`~~~~\")\n",
|
||||
" print(cejst_df[field].describe())\n",
|
||||
|
@ -223,7 +234,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d8ec43dc",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load persistent poverty data\n",
|
||||
|
@ -256,7 +269,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "81826d29",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load mapping inequality data\n",
|
||||
|
@ -314,7 +329,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "605af1ff",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load alternative energy-related definition\n",
|
||||
|
@ -333,7 +350,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fe4a2939",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load Michigan EJSCREEN\n",
|
||||
|
@ -356,15 +375,13 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"# Load EPA RSEI EJSCREEN\n",
|
||||
"epa_rsei_aggregate_data_path = (\n",
|
||||
" DATA_DIR / \"dataset\" / \"epa_rsei_aggregated\" / \"usa.csv\"\n",
|
||||
")\n",
|
||||
"epa_rsei_aggregate_df = pd.read_csv(\n",
|
||||
" epa_rsei_aggregate_data_path,\n",
|
||||
"epa_rsei_data_path = DATA_DIR / \"dataset\" / \"epa_rsei\" / \"usa.csv\"\n",
|
||||
"epa_rsei_df = pd.read_csv(\n",
|
||||
" epa_rsei_data_path,\n",
|
||||
" dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"epa_rsei_aggregate_df.head()"
|
||||
"epa_rsei_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -382,7 +399,7 @@
|
|||
" calenviroscreen_df,\n",
|
||||
" persistent_poverty_df,\n",
|
||||
" mapping_inequality_df,\n",
|
||||
" epa_rsei_aggregate_df,\n",
|
||||
" epa_rsei_df,\n",
|
||||
" maryland_ejscreen_df,\n",
|
||||
" energy_definition_alternative_draft_df,\n",
|
||||
" michigan_ejscreen_df,\n",
|
||||
|
@ -416,7 +433,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2de78f71",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Special handling for HOLC.\n",
|
||||
|
@ -461,13 +480,41 @@
|
|||
" field_names.L_NON_WORKFORCE,\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"definition_m_factors = [\n",
|
||||
" field_names.M_CLIMATE,\n",
|
||||
" field_names.M_ENERGY,\n",
|
||||
" field_names.M_TRANSPORTATION,\n",
|
||||
" field_names.M_HOUSING,\n",
|
||||
" field_names.M_POLLUTION,\n",
|
||||
" field_names.M_WATER,\n",
|
||||
" field_names.M_HEALTH,\n",
|
||||
" field_names.M_WORKFORCE,\n",
|
||||
" # Also include a combined factor for all the non-workforce elements.\n",
|
||||
" field_names.M_NON_WORKFORCE,\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"census_tract_indices = (\n",
|
||||
" [\n",
|
||||
" Index(\n",
|
||||
" method_name=\"Definition M\",\n",
|
||||
" priority_communities_field=field_names.SCORE_M_COMMUNITIES,\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
" + [\n",
|
||||
" Index(\n",
|
||||
" method_name=\"Definition L\",\n",
|
||||
" priority_communities_field=field_names.SCORE_L_COMMUNITIES,\n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
" # Insert indices for each of the factors from Definition M.\n",
|
||||
" # Note: since these involve no renaming, we write them using list comprehension.\n",
|
||||
" + [\n",
|
||||
" Index(\n",
|
||||
" method_name=factor,\n",
|
||||
" priority_communities_field=factor,\n",
|
||||
" )\n",
|
||||
" for factor in definition_m_factors\n",
|
||||
" ]\n",
|
||||
" # Insert indices for each of the factors from Definition L.\n",
|
||||
" # Note: since these involve no renaming, we write them using list comprehension.\n",
|
||||
" + [\n",
|
||||
|
@ -575,6 +622,7 @@
|
|||
"comparison_fields = [\n",
|
||||
" field_names.POVERTY_LESS_THAN_100_FPL_FIELD,\n",
|
||||
" field_names.POVERTY_LESS_THAN_200_FPL_FIELD,\n",
|
||||
" field_names.COLLEGE_ATTENDANCE_FIELD,\n",
|
||||
" field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,\n",
|
||||
" field_names.LINGUISTIC_ISO_FIELD,\n",
|
||||
" field_names.UNEMPLOYMENT_FIELD,\n",
|
||||
|
@ -584,6 +632,8 @@
|
|||
" field_names.LIFE_EXPECTANCY_FIELD,\n",
|
||||
" field_names.HEALTH_INSURANCE_FIELD,\n",
|
||||
" field_names.PHYS_HEALTH_NOT_GOOD_FIELD,\n",
|
||||
" field_names.DIABETES_FIELD,\n",
|
||||
" field_names.LOW_READING_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
|
@ -874,7 +924,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2bcbcabf",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"directory = COMPARISON_OUTPUTS_DIR / \"tracts_basic_stats\"\n",
|
||||
|
@ -1001,24 +1053,28 @@
|
|||
" E.g., it might show that tracts prioritized by A but not B have a higher average income,\n",
|
||||
" or that tracts prioritized by B but not A have a lower percent of unemployed people.\n",
|
||||
" \"\"\"\n",
|
||||
" df_subset = df[\n",
|
||||
" [\n",
|
||||
" method_a_priority_census_tracts_field,\n",
|
||||
" method_b_priority_census_tracts_field,\n",
|
||||
" ]\n",
|
||||
" + comparison_fields\n",
|
||||
" fields_to_group_by = [\n",
|
||||
" method_a_priority_census_tracts_field,\n",
|
||||
" method_b_priority_census_tracts_field,\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" df_subset = df[fields_to_group_by + comparison_fields]\n",
|
||||
"\n",
|
||||
" grouped_df = df_subset.groupby(\n",
|
||||
" [\n",
|
||||
" method_a_priority_census_tracts_field,\n",
|
||||
" method_b_priority_census_tracts_field,\n",
|
||||
" ],\n",
|
||||
" fields_to_group_by,\n",
|
||||
" dropna=False,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # Run the comparison function on the groups.\n",
|
||||
" comparison_df = grouped_df.mean().reset_index()\n",
|
||||
" # Take the mean of all fields.\n",
|
||||
" comparison_df = grouped_df.mean()\n",
|
||||
"\n",
|
||||
" # Also add in the count of census tracts.\n",
|
||||
" count_field_name = \"Count of census tracts\"\n",
|
||||
" comparison_df[count_field_name] = grouped_df.size().to_frame(\n",
|
||||
" count_field_name\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" comparison_df = comparison_df.reset_index()\n",
|
||||
"\n",
|
||||
" criteria_description_field_name = \"Description of criteria\"\n",
|
||||
" comparison_df[criteria_description_field_name] = comparison_df.apply(\n",
|
||||
|
@ -1030,10 +1086,13 @@
|
|||
" )\n",
|
||||
"\n",
|
||||
" # Put criteria description column first.\n",
|
||||
" new_column_order = [criteria_description_field_name] + [\n",
|
||||
" col\n",
|
||||
" for col in comparison_df.columns\n",
|
||||
" if col != criteria_description_field_name\n",
|
||||
" columns_to_put_first = (\n",
|
||||
" [criteria_description_field_name]\n",
|
||||
" + fields_to_group_by\n",
|
||||
" + [count_field_name]\n",
|
||||
" )\n",
|
||||
" new_column_order = columns_to_put_first + [\n",
|
||||
" col for col in comparison_df.columns if col not in columns_to_put_first\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" comparison_df = comparison_df[new_column_order]\n",
|
||||
|
@ -1356,7 +1415,9 @@
|
|||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7d095ebd",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note: this is helpful because this file is long-running, so it alerts the user when the\n",
|
||||
|
@ -1369,7 +1430,7 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
@ -1383,7 +1444,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue