From c9f9f02770d1834a959491af12b69bc41a0f1010 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Wed, 22 Sep 2021 17:49:15 -0500 Subject: [PATCH] fixing comp tool bug --- .../data_pipeline/etl/score/etl_score.py | 12 ++++----- .../ipython/scoring_comparison.ipynb | 25 +++++++++++++++---- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index c9cc2498..44c318f3 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -106,22 +106,22 @@ class ScoreETL(ExtractTransformLoad): "EJSCREEN Areas of Concern, National, 95th percentile (communities)" ) self.EJSCREEN_AREAS_OF_CONCERN_STATE_70TH_PERCENTILE_COMMUNITIES_FIELD_NAME = ( - "EJSCREEN Areas of Concern, National, 70th percentile (communities)" + "EJSCREEN Areas of Concern, State, 70th percentile (communities)" ) self.EJSCREEN_AREAS_OF_CONCERN_STATE_75TH_PERCENTILE_COMMUNITIES_FIELD_NAME = ( - "EJSCREEN Areas of Concern, National, 75th percentile (communities)" + "EJSCREEN Areas of Concern, State, 75th percentile (communities)" ) self.EJSCREEN_AREAS_OF_CONCERN_STATE_80TH_PERCENTILE_COMMUNITIES_FIELD_NAME = ( - "EJSCREEN Areas of Concern, National, 80th percentile (communities)" + "EJSCREEN Areas of Concern, State, 80th percentile (communities)" ) self.EJSCREEN_AREAS_OF_CONCERN_STATE_85TH_PERCENTILE_COMMUNITIES_FIELD_NAME = ( - "EJSCREEN Areas of Concern, National, 85th percentile (communities)" + "EJSCREEN Areas of Concern, State, 85th percentile (communities)" ) self.EJSCREEN_AREAS_OF_CONCERN_STATE_90TH_PERCENTILE_COMMUNITIES_FIELD_NAME = ( - "EJSCREEN Areas of Concern, National, 90th percentile (communities)" + "EJSCREEN Areas of Concern, State, 90th percentile (communities)" ) self.EJSCREEN_AREAS_OF_CONCERN_STATE_95TH_PERCENTILE_COMMUNITIES_FIELD_NAME = ( - "EJSCREEN Areas of Concern, National, 95th percentile (communities)" + "EJSCREEN Areas of Concern, State, 95th percentile (communities)" ) # dataframes diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index 6e76e46b..bda9aa3d 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -337,10 +337,20 @@ " other_census_tract_fields_to_keep=[],\n", " ),\n", " Index(\n", - " method_name=\"EJSCREEN Areas of Concern, National, 80th percentile (communities)\",\n", + " method_name=\"EJSCREEN Areas of Concern, National, 80th percentile\",\n", " priority_communities_field=\"EJSCREEN Areas of Concern, National, 80th percentile (communities)\",\n", " other_census_tract_fields_to_keep=[],\n", " ),\n", + " Index(\n", + " method_name=\"EJSCREEN Areas of Concern, National, 90th percentile\",\n", + " priority_communities_field=\"EJSCREEN Areas of Concern, National, 90th percentile (communities)\",\n", + " other_census_tract_fields_to_keep=[],\n", + " ),\n", + " Index(\n", + " method_name=\"EJSCREEN Areas of Concern, National, 95th percentile\",\n", + " priority_communities_field=\"EJSCREEN Areas of Concern, National, 95th percentile (communities)\",\n", + " other_census_tract_fields_to_keep=[],\n", + " ),\n", "]\n", "\n", "census_tract_indices = [\n", @@ -376,9 +386,6 @@ "\n", " # Ensure each field is boolean.\n", " for priority_communities_field in priority_communities_fields:\n", - " if df[priority_communities_field].dtype != bool:\n", - " print(f\"Converting {priority_communities_field} to boolean.\")\n", - "\n", " # Calculate the population included as priority communities per CBG. Will either be 0 or the population.\n", " df[f\"{priority_communities_field}{POPULATION_SUFFIX}\"] = (\n", " df[priority_communities_field]\n", @@ -608,6 +615,14 @@ " for index in census_block_group_indices + census_tract_indices\n", "]\n", "\n", + "# Convert all indices to boolean\n", + "for field_to_analyze in fields_to_analyze: \n", + " if \"Areas of Concern\" in field_to_analyze:\n", + " print(f\"Converting {field_to_analyze} to boolean.\")\n", + "\n", + " merged_df[field_to_analyze] = merged_df[field_to_analyze].fillna(value=0)\n", + " merged_df[field_to_analyze] = merged_df[field_to_analyze].astype(bool)\n", + "\n", "state_fips_codes = get_state_information(DATA_DIR)\n", "\n", "merged_with_state_information_df = merged_df.merge(\n", @@ -1499,7 +1514,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.9.6" } }, "nbformat": 4,