From a1a988da46bf67340ee4ec09dcc1f5f89075f5c4 Mon Sep 17 00:00:00 2001 From: Lucas Merrill Brown Date: Thu, 16 Sep 2021 14:06:33 -0500 Subject: [PATCH] Minor updates to scoring comparison tool (#686) * Formatting updates for output XLSX --- .../ipython/scoring_comparison.ipynb | 48 +++++++++++++++---- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index fb64db3d..9206e0ea 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -263,21 +263,26 @@ "# (`census_tract_indices`).\n", "census_block_group_indices = [\n", " Index(\n", - " method_name=\"Score H\",\n", - " priority_communities_field=\"Score H (communities)\",\n", - " other_census_tract_fields_to_keep=[],\n", - " ),\n", - " Index(\n", " method_name=\"Score G\",\n", " priority_communities_field=\"Score G (communities)\",\n", " other_census_tract_fields_to_keep=[],\n", " ),\n", " Index(\n", + " method_name=\"Score H\",\n", + " priority_communities_field=\"Score H (communities)\",\n", + " other_census_tract_fields_to_keep=[],\n", + " ),\n", + " Index(\n", " method_name=\"NMTC\",\n", " priority_communities_field=\"NMTC (communities)\",\n", " other_census_tract_fields_to_keep=[],\n", " ),\n", " Index(\n", + " method_name=\"NMTC modified\",\n", + " priority_communities_field=\"NMTC modified (communities)\",\n", + " other_census_tract_fields_to_keep=[],\n", + " ),\n", + " Index(\n", " method_name=\"Score F\",\n", " priority_communities_field=\"Score F (communities)\",\n", " other_census_tract_fields_to_keep=[],\n", @@ -383,8 +388,8 @@ " summary_dict[\"division\"] = division_id\n", " summary_dict[\"Geography name\"] = division_id\n", "\n", - " summary_dict[\"Total CBGs in geography\"] = len(frame)\n", - " summary_dict[\"Total population in geography\"] = frame[\n", + " total_cbgs_in_geography = len(frame)\n", + " total_population_in_geography = frame[\n", " CENSUS_BLOCK_GROUP_POPULATION_FIELD\n", " ].sum()\n", "\n", @@ -400,14 +405,23 @@ " # Calculate some combinations of other variables.\n", " summary_dict[f\"{priority_communities_field} (percent CBGs)\"] = (\n", " summary_dict[f\"{priority_communities_field} (total CBGs)\"]\n", - " / summary_dict[\"Total CBGs in geography\"]\n", + " / total_cbgs_in_geography\n", " )\n", "\n", " summary_dict[f\"{priority_communities_field} (percent population)\"] = (\n", " summary_dict[f\"{priority_communities_field}{POPULATION_SUFFIX}\"]\n", - " / summary_dict[\"Total population in geography\"]\n", + " / total_population_in_geography\n", " )\n", "\n", + " unwanted_keys = [\n", + " f\"{priority_communities_field}{POPULATION_SUFFIX}\",\n", + " f\"{priority_communities_field} (total CBGs)\",\n", + " ]\n", + "\n", + " # Remove unneeded columns:\n", + " for unwanted_key in unwanted_keys:\n", + " del summary_dict[unwanted_key]\n", + "\n", " df = pd.DataFrame(summary_dict, index=[0])\n", "\n", " return df\n", @@ -609,6 +623,22 @@ " # Run the comparison function on the groups.\n", " comparison_df = grouped_df.mean().reset_index()\n", "\n", + " criteria_description_field_name = \"Description of criteria\"\n", + " comparison_df[criteria_description_field_name] = comparison_df.apply(\n", + " func=lambda row: f\"CBGs that are {'not' if row[method_a_priority_census_block_groups_field] is False else ''} \" + \n", + " f\"prioritized by {method_a_priority_census_block_groups_field} \" + \n", + " f\"and are {'not' if row[method_b_priority_census_block_groups_field] is False else ''} \" + \n", + " f\"prioritized by {method_b_priority_census_block_groups_field}\",\n", + " axis=1,\n", + " )\n", + "\n", + " # Put criteria description column first.\n", + " new_column_order = [criteria_description_field_name] + [\n", + " col for col in comparison_df.columns if col != criteria_description_field_name\n", + " ]\n", + " \n", + " comparison_df = comparison_df[new_column_order]\n", + "\n", " # Rename fields to reflect the mean aggregation\n", " comparison_df.rename(\n", " mapper={\n",