From c9f9f02770d1834a959491af12b69bc41a0f1010 Mon Sep 17 00:00:00 2001
From: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
Date: Wed, 22 Sep 2021 17:49:15 -0500
Subject: [PATCH] fixing comp tool bug

---
 .../data_pipeline/etl/score/etl_score.py      | 12 ++++-----
 .../ipython/scoring_comparison.ipynb          | 25 +++++++++++++++----
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
index c9cc2498..44c318f3 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -106,22 +106,22 @@ class ScoreETL(ExtractTransformLoad):
             "EJSCREEN Areas of Concern, National, 95th percentile (communities)"
         )
         self.EJSCREEN_AREAS_OF_CONCERN_STATE_70TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
-            "EJSCREEN Areas of Concern, National, 70th percentile (communities)"
+            "EJSCREEN Areas of Concern, State, 70th percentile (communities)"
         )
         self.EJSCREEN_AREAS_OF_CONCERN_STATE_75TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
-            "EJSCREEN Areas of Concern, National, 75th percentile (communities)"
+            "EJSCREEN Areas of Concern, State, 75th percentile (communities)"
         )
         self.EJSCREEN_AREAS_OF_CONCERN_STATE_80TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
-            "EJSCREEN Areas of Concern, National, 80th percentile (communities)"
+            "EJSCREEN Areas of Concern, State, 80th percentile (communities)"
         )
         self.EJSCREEN_AREAS_OF_CONCERN_STATE_85TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
-            "EJSCREEN Areas of Concern, National, 85th percentile (communities)"
+            "EJSCREEN Areas of Concern, State, 85th percentile (communities)"
         )
         self.EJSCREEN_AREAS_OF_CONCERN_STATE_90TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
-            "EJSCREEN Areas of Concern, National, 90th percentile (communities)"
+            "EJSCREEN Areas of Concern, State, 90th percentile (communities)"
         )
         self.EJSCREEN_AREAS_OF_CONCERN_STATE_95TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
-            "EJSCREEN Areas of Concern, National, 95th percentile (communities)"
+            "EJSCREEN Areas of Concern, State, 95th percentile (communities)"
         )
 
         # dataframes
diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
index 6e76e46b..bda9aa3d 100644
--- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
@@ -337,10 +337,20 @@
     "        other_census_tract_fields_to_keep=[],\n",
     "    ),\n",
     "    Index(\n",
-    "        method_name=\"EJSCREEN Areas of Concern, National, 80th percentile (communities)\",\n",
+    "        method_name=\"EJSCREEN Areas of Concern, National, 80th percentile\",\n",
     "        priority_communities_field=\"EJSCREEN Areas of Concern, National, 80th percentile (communities)\",\n",
     "        other_census_tract_fields_to_keep=[],\n",
     "    ),\n",
+    "    Index(\n",
+    "        method_name=\"EJSCREEN Areas of Concern, National, 90th percentile\",\n",
+    "        priority_communities_field=\"EJSCREEN Areas of Concern, National, 90th percentile (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
+    "    Index(\n",
+    "        method_name=\"EJSCREEN Areas of Concern, National, 95th percentile\",\n",
+    "        priority_communities_field=\"EJSCREEN Areas of Concern, National, 95th percentile (communities)\",\n",
+    "        other_census_tract_fields_to_keep=[],\n",
+    "    ),\n",
     "]\n",
     "\n",
     "census_tract_indices = [\n",
@@ -376,9 +386,6 @@
     "\n",
     "    # Ensure each field is boolean.\n",
     "    for priority_communities_field in priority_communities_fields:\n",
-    "        if df[priority_communities_field].dtype != bool:\n",
-    "            print(f\"Converting {priority_communities_field} to boolean.\")\n",
-    "\n",
     "        # Calculate the population included as priority communities per CBG. Will either be 0 or the population.\n",
     "        df[f\"{priority_communities_field}{POPULATION_SUFFIX}\"] = (\n",
     "            df[priority_communities_field]\n",
@@ -608,6 +615,14 @@
     "    for index in census_block_group_indices + census_tract_indices\n",
     "]\n",
     "\n",
+    "# Convert all indices to boolean\n",
+    "for field_to_analyze in fields_to_analyze: \n",
+    "    if \"Areas of Concern\" in field_to_analyze:\n",
+    "        print(f\"Converting {field_to_analyze} to boolean.\")\n",
+    "\n",
+    "        merged_df[field_to_analyze] = merged_df[field_to_analyze].fillna(value=0)\n",
+    "        merged_df[field_to_analyze] = merged_df[field_to_analyze].astype(bool)\n",
+    "\n",
     "state_fips_codes = get_state_information(DATA_DIR)\n",
     "\n",
     "merged_with_state_information_df = merged_df.merge(\n",
@@ -1499,7 +1514,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.9.6"
   }
  },
  "nbformat": 4,