Add Score L (#812)

* Create ScoreCalculator

This calculates all the factors for score L for now (with placeholder
formulae because this is a WIP). I think ideallly we'll want to
refactor all the score code to be extracted into this or  similar
classes.

* Add factor logic for score L

Updated factor logic to match score L factors methodology.
Still need to get the Score L field itself working.

Cleanup needed: Pull field names into constants file, extract all score
calculation into score calculator

Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
Shelby Switzer 2021-10-28 16:07:41 -04:00 committed by GitHub
commit 7b87e0ec99
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 385 additions and 73 deletions

View file

@ -119,13 +119,14 @@
"source": [
"# Analyze one field at a time (useful for setting thresholds)\n",
"\n",
"quantile = 0.8\n",
"quantile = 0.9\n",
"\n",
"for field in [\n",
" \"Percent of individuals < 200% Federal Poverty Line\",\n",
" \"Life expectancy (years)\",\n",
" \"Energy burden\",\n",
" URBAN_HEURISTIC_FIELD,\n",
" \"Linguistic isolation (percent)\",\n",
" \"Diesel particulate matter (percentile)\",\n",
" \"Particulate matter (PM2.5) (percentile)\",\n",
" \"Traffic proximity and volume (percentile)\",\n",
" \"Percent of individuals < 200% Federal Poverty Line (percentile)\",\n",
"]:\n",
" print(f\"\\n~~~~Analysis for field `{field}`~~~~\")\n",
" print(cejst_df[field].describe())\n",
@ -234,7 +235,7 @@
"execution_count": null,
"id": "8da016db",
"metadata": {
"scrolled": false
"scrolled": true
},
"outputs": [],
"source": [
@ -278,64 +279,83 @@
"\n",
"# Define the indices used for CEJST scoring (`census_block_group_indices`) as well as comparison\n",
"# (`census_tract_indices`).\n",
"census_block_group_indices = [\n",
" Index(\n",
" method_name=\"Score G\",\n",
" priority_communities_field=\"Score G (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score H\",\n",
" priority_communities_field=\"Score H (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score I\",\n",
" priority_communities_field=\"Score I (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"NMTC\",\n",
" priority_communities_field=\"NMTC (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score A\",\n",
" priority_communities_field=\"Score A (top 25th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score B\",\n",
" priority_communities_field=\"Score B (top 25th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score C\",\n",
" priority_communities_field=\"Score C (top 25th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score D (25th percentile)\",\n",
" priority_communities_field=\"Score D (top 25th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score F\",\n",
" priority_communities_field=\"Score F (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Poverty\",\n",
" priority_communities_field=\"Poverty (Less than 200% of federal poverty line) (top 25th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Persistent Poverty (CBG)\",\n",
" priority_communities_field=PERSISTENT_POVERTY_CBG_LEVEL_FIELD,\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
"\n",
"definition_l_factors = [\n",
" \"Climate Factor (Definition L)\",\n",
" \"Energy Factor (Definition L)\",\n",
" \"Transportation Factor (Definition L)\",\n",
" \"Housing Factor (Definition L)\",\n",
" \"Pollution Factor (Definition L)\",\n",
" \"Water Factor (Definition L)\",\n",
" \"Health Factor (Definition L)\",\n",
" \"Workforce Factor (Definition L)\",\n",
" # Also include a combined factor for all the non-workforce elements.\n",
" \"Any Non-Workforce Factor (Definition L)\",\n",
"]\n",
"\n",
"census_block_group_indices = (\n",
" [\n",
" Index(\n",
" method_name=\"Definition L\",\n",
" priority_communities_field=\"Definition L (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" ]\n",
" # Insert indices for each of the factors from Definition L.\n",
" # Note: since these involve no renaming, we write them using list comprehension.\n",
" + [\n",
" Index(\n",
" method_name=factor,\n",
" priority_communities_field=factor,\n",
" other_census_tract_fields_to_keep=[],\n",
" )\n",
" for factor in definition_l_factors\n",
" ]\n",
" + [\n",
" Index(\n",
" # Note: we're renaming Score G as NMTC Modified for clarity, since that's what Score G is under the hood.\n",
" method_name=\"NMTC Modified\",\n",
" priority_communities_field=\"Score G (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"NMTC\",\n",
" priority_communities_field=\"NMTC (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score C\",\n",
" priority_communities_field=\"Score C (top 25th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score D (30th percentile)\",\n",
" priority_communities_field=\"Score D (top 30th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score D (25th percentile)\",\n",
" priority_communities_field=\"Score D (top 25th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Score F\",\n",
" priority_communities_field=\"Score F (communities)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Poverty\",\n",
" priority_communities_field=\"Poverty (Less than 200% of federal poverty line) (top 25th percentile)\",\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"Persistent Poverty (CBG)\",\n",
" priority_communities_field=PERSISTENT_POVERTY_CBG_LEVEL_FIELD,\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" ]\n",
")\n",
"\n",
"census_tract_indices = [\n",
" Index(\n",
" method_name=\"Persistent Poverty\",\n",
@ -623,9 +643,7 @@
"write_state_distribution_excel(\n",
" state_distribution_df=state_distribution_df,\n",
" file_path=COMPARISON_OUTPUTS_DIR / f\"{file_prefix}.xlsx\",\n",
")\n",
"\n",
"state_distribution_df.head()"
")"
]
},
{
@ -633,7 +651,7 @@
"execution_count": null,
"id": "8790cd64",
"metadata": {
"scrolled": false
"scrolled": true
},
"outputs": [],
"source": [
@ -1461,7 +1479,7 @@
"execution_count": null,
"id": "908e0ad4",
"metadata": {
"scrolled": true
"scrolled": false
},
"outputs": [],
"source": [