From a1a988da46bf67340ee4ec09dcc1f5f89075f5c4 Mon Sep 17 00:00:00 2001
From: Lucas Merrill Brown <lucas.m.brown@omb.eop.gov>
Date: Thu, 16 Sep 2021 14:06:33 -0500
Subject: [PATCH] Minor updates to scoring comparison tool (#686)

* Formatting updates for output XLSX
---
 .../ipython/scoring_comparison.ipynb          | 48 +++++++++++++++----
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
index fb64db3d..9206e0ea 100644
--- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
@@ -263,21 +263,26 @@
     "# (`census_tract_indices`).\n",
     "census_block_group_indices = [\n",
     "    Index(\n",
-    "        method_name=\"Score H\",\n",
-    "        priority_communities_field=\"Score H (communities)\",\n",
-    "        other_census_tract_fields_to_keep=[],\n",
-    "    ),\n",
-    "    Index(\n",
     "        method_name=\"Score G\",\n",
     "        priority_communities_field=\"Score G (communities)\",\n",
     "        other_census_tract_fields_to_keep=[],\n",
     "    ),\n",
     "    Index(\n",
+    "        method_name=\"Score H\",\n",
+    "        priority_communities_field=\"Score H (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
     "        method_name=\"NMTC\",\n",
     "        priority_communities_field=\"NMTC (communities)\",\n",
     "        other_census_tract_fields_to_keep=[],\n",
     "    ),\n",
     "    Index(\n",
+    "        method_name=\"NMTC modified\",\n",
+    "        priority_communities_field=\"NMTC modified (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
     "        method_name=\"Score F\",\n",
     "        priority_communities_field=\"Score F (communities)\",\n",
     "        other_census_tract_fields_to_keep=[],\n",
@@ -383,8 +388,8 @@
     "            summary_dict[\"division\"] = division_id\n",
     "            summary_dict[\"Geography name\"] = division_id\n",
     "\n",
-    "        summary_dict[\"Total CBGs in geography\"] = len(frame)\n",
-    "        summary_dict[\"Total population in geography\"] = frame[\n",
+    "        total_cbgs_in_geography = len(frame)\n",
+    "        total_population_in_geography = frame[\n",
     "            CENSUS_BLOCK_GROUP_POPULATION_FIELD\n",
     "        ].sum()\n",
     "\n",
@@ -400,14 +405,23 @@
     "            # Calculate some combinations of other variables.\n",
     "            summary_dict[f\"{priority_communities_field} (percent CBGs)\"] = (\n",
     "                summary_dict[f\"{priority_communities_field} (total CBGs)\"]\n",
-    "                / summary_dict[\"Total CBGs in geography\"]\n",
+    "                / total_cbgs_in_geography\n",
     "            )\n",
     "\n",
     "            summary_dict[f\"{priority_communities_field} (percent population)\"] = (\n",
     "                summary_dict[f\"{priority_communities_field}{POPULATION_SUFFIX}\"]\n",
-    "                / summary_dict[\"Total population in geography\"]\n",
+    "                / total_population_in_geography\n",
     "            )\n",
     "\n",
+    "            unwanted_keys = [\n",
+    "                f\"{priority_communities_field}{POPULATION_SUFFIX}\",\n",
+    "                f\"{priority_communities_field} (total CBGs)\",\n",
+    "            ]\n",
+    "\n",
+    "            # Remove unneeded columns:\n",
+    "            for unwanted_key in unwanted_keys:\n",
+    "                del summary_dict[unwanted_key]\n",
+    "\n",
     "        df = pd.DataFrame(summary_dict, index=[0])\n",
     "\n",
     "        return df\n",
@@ -609,6 +623,22 @@
     "    # Run the comparison function on the groups.\n",
     "    comparison_df = grouped_df.mean().reset_index()\n",
     "\n",
+    "    criteria_description_field_name = \"Description of criteria\"\n",
+    "    comparison_df[criteria_description_field_name] = comparison_df.apply(\n",
+    "        func=lambda row: f\"CBGs that are {'not' if row[method_a_priority_census_block_groups_field] is False else ''} \" + \n",
+    "        f\"prioritized by {method_a_priority_census_block_groups_field} \" + \n",
+    "        f\"and are {'not' if row[method_b_priority_census_block_groups_field] is False else ''} \" + \n",
+    "        f\"prioritized by {method_b_priority_census_block_groups_field}\",\n",
+    "        axis=1,\n",
+    "    )\n",
+    "\n",
+    "    # Put criteria description column first.\n",
+    "    new_column_order = [criteria_description_field_name] + [\n",
+    "        col for col in comparison_df.columns if col != criteria_description_field_name\n",
+    "    ]\n",
+    "    \n",
+    "    comparison_df = comparison_df[new_column_order]\n",
+    "\n",
     "    # Rename fields to reflect the mean aggregation\n",
     "    comparison_df.rename(\n",
     "        mapper={\n",