From 6a00b29f5dcdfac6858f71a0cefb7b740dbb6f1f Mon Sep 17 00:00:00 2001 From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com> Date: Fri, 4 Feb 2022 10:00:41 -0500 Subject: [PATCH] Adding VA and CO ETL from mapping for environmental justice (#1177) Adding the mapping for environmental justice data, which contains information about VA and CO, to the ETL pipeline. --- .../data_pipeline/etl/constants.py | 5 + .../etl/sources/mapping_for_ej/README.md | 0 .../etl/sources/mapping_for_ej/__init__.py | 0 .../etl/sources/mapping_for_ej/etl.py | 99 +++++++++ .../ipython/scoring_comparison.ipynb | 208 ++++++++---------- .../data_pipeline/score/field_names.py | 12 + 6 files changed, 209 insertions(+), 115 deletions(-) create mode 100644 data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/README.md create mode 100644 data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/__init__.py create mode 100644 data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/etl.py diff --git a/data/data-pipeline/data_pipeline/etl/constants.py b/data/data-pipeline/data_pipeline/etl/constants.py index fc6adf1f..9ddd1210 100644 --- a/data/data-pipeline/data_pipeline/etl/constants.py +++ b/data/data-pipeline/data_pipeline/etl/constants.py @@ -1,4 +1,9 @@ DATASET_LIST = [ + { + "name": "mapping_for_ej", + "module_dir": "mapping_for_ej", + "class_name": "MappingForEJETL", + }, { "name": "census_acs", "module_dir": "census_acs", diff --git a/data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/README.md b/data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/README.md new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/__init__.py b/data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/etl.py b/data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/etl.py new file mode 100644 index 00000000..ff49ea0d --- /dev/null +++ b/data/data-pipeline/data_pipeline/etl/sources/mapping_for_ej/etl.py @@ -0,0 +1,99 @@ +import pandas as pd +import geopandas as gpd + +from data_pipeline.etl.base import ExtractTransformLoad +from data_pipeline.utils import get_module_logger +from data_pipeline.score import field_names +from data_pipeline.config import settings + +logger = get_module_logger(__name__) + + +class MappingForEJETL(ExtractTransformLoad): + def __init__(self): + self.CSV_PATH = self.DATA_PATH / "dataset" / "mapping_for_ej" + + self.MAPPING_FOR_EJ_VA_URL = ( + settings.AWS_JUSTICE40_DATASOURCES_URL + "/VA_mej.zip" + ) + self.MAPPING_FOR_EJ_CO_URL = ( + settings.AWS_JUSTICE40_DATASOURCES_URL + "/CO_mej.zip" + ) + self.VA_SHP_FILE_PATH = self.TMP_PATH / "mej_virginia_7_1.shp" + self.CO_SHP_FILE_PATH = self.TMP_PATH / "mej_colorado_final.shp" + + # Defining variables + self.COLUMNS_TO_KEEP = [ + self.GEOID_TRACT_FIELD_NAME, + field_names.MAPPING_FOR_EJ_FINAL_PERCENTILE_FIELD, + field_names.MAPPING_FOR_EJ_FINAL_SCORE_FIELD, + field_names.MAPPING_FOR_EJ_PRIORITY_COMMUNITY_FIELD, + ] + + # Choosing constants. + # In our current score implementation, about 17% of CO and 20% of VA tracts are + # identified as disadvantaged. Consequently, the rank-based threshold is 20%. + # Using the scores to calculate which are priority communities doesn't quite track + # with this distribution, and so I've opted to choose roughly 20% of both states. + self.MAPPING_FOR_EJ_PRIORITY_COMMUNITY_PERCENTILE_THRESHOLD = 80 + + self.df: pd.DataFrame + + def extract(self) -> None: + logger.info("Downloading Mapping for EJ Data") + super().extract( + self.MAPPING_FOR_EJ_VA_URL, + self.TMP_PATH, + ) + super().extract( + self.MAPPING_FOR_EJ_CO_URL, + self.TMP_PATH, + ) + + def transform(self) -> None: + logger.info("Transforming Mapping for EJ Data") + + # Join (here, it's just concatenating) the two dataframes from + # CO and VA + self.df = pd.concat( + [ + gpd.read_file(self.VA_SHP_FILE_PATH), + gpd.read_file(self.CO_SHP_FILE_PATH), + ] + ) + + # Fill Census tract to get it to be 11 digits, incl. leading 0s + # Note that VA and CO should never have leading 0s, so this isn't + # strictly necessary, but if in the future, there are more states + # this seems like a reasonable thing to include. + self.df[self.GEOID_TRACT_FIELD_NAME] = ( + self.df["fips_tract"].astype(str).str.zfill(11) + ) + + # Note that there are tracts in this dataset that do not have a final ranking + # because they are missing data. I've retained them to be consistent with other ETLs. + self.df = self.df.rename( + columns={ + "fin_rank": field_names.MAPPING_FOR_EJ_FINAL_PERCENTILE_FIELD, + "fin_score": field_names.MAPPING_FOR_EJ_FINAL_SCORE_FIELD, + } + ) + + # Calculate prioritized communities based on percentile, only + # for tracts that have complete data + self.df[field_names.MAPPING_FOR_EJ_PRIORITY_COMMUNITY_FIELD] = ( + self.df[field_names.MAPPING_FOR_EJ_FINAL_PERCENTILE_FIELD] + >= self.MAPPING_FOR_EJ_PRIORITY_COMMUNITY_PERCENTILE_THRESHOLD + ) + + def load(self) -> None: + logger.info("Saving Mapping for EJ CSV") + # write selected states csv + self.CSV_PATH.mkdir(parents=True, exist_ok=True) + self.df[self.COLUMNS_TO_KEEP].to_csv( + self.CSV_PATH / "co_va.csv", index=False + ) + + def validate(self) -> None: + logger.info("Validating Mapping For EJ Data") + pass diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index 4f143e8d..48f0cc69 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -43,6 +43,7 @@ "\n", "from data_pipeline.score import field_names\n", "\n", + "%load_ext lab_black\n", "# Turn on TQDM for pandas so that we can have progress bars when running `apply`.\n", "tqdm_notebook.pandas()" ] @@ -101,9 +102,7 @@ "# Create the state ID by taking the first two digits of the FIPS CODE of the tract.\n", "# For more information, see https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html.\n", "cejst_df.loc[:, GEOID_STATE_FIELD_NAME] = (\n", - " cejst_df.loc[:, ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]\n", - " .astype(str)\n", - " .str[0:2]\n", + " cejst_df.loc[:, ExtractTransformLoad.GEOID_TRACT_FIELD_NAME].astype(str).str[0:2]\n", ")\n", "\n", "cejst_df.head()" @@ -113,9 +112,7 @@ "cell_type": "code", "execution_count": null, "id": "a251a0fb", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Load EJSCREEN Areas of Concern data.\n", @@ -149,9 +146,7 @@ "cell_type": "code", "execution_count": null, "id": "e43a9e23", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Merge EJSCREEN AoCs into CEJST data.\n", @@ -174,9 +169,7 @@ "cell_type": "code", "execution_count": null, "id": "38c0dc2f", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Analyze one field at a time (useful for setting thresholds)\n", @@ -214,35 +207,71 @@ "CALENVIROSCREEN_PERCENTILE_FIELD = \"calenviroscreen_percentile\"\n", "CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD = \"calenviroscreen_priority_community\"\n", "\n", - "calenviroscreen_data_path = (\n", - " DATA_DIR / \"dataset\" / \"calenviroscreen4\" / \"data06.csv\"\n", - ")\n", + "calenviroscreen_data_path = DATA_DIR / \"dataset\" / \"calenviroscreen4\" / \"data06.csv\"\n", "calenviroscreen_df = pd.read_csv(\n", " calenviroscreen_data_path,\n", " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n", ")\n", "\n", "# Convert priority community field to a bool.\n", - "calenviroscreen_df[\n", + "calenviroscreen_df[CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD] = calenviroscreen_df[\n", " CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD\n", - "] = calenviroscreen_df[CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD].astype(bool)\n", + "].astype(bool)\n", "\n", "calenviroscreen_df.head()" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1ac2854-80c8-42a8-85e8-84c5684bbe43", + "metadata": {}, + "outputs": [], + "source": [ + "# Mapping for EJ\n", + "mapping_for_ej_path = DATA_DIR / \"dataset\" / \"mapping_for_ej\" / \"co_va.csv\"\n", + "\n", + "mapping_for_ej_df = pd.read_csv(\n", + " mapping_for_ej_path,\n", + " dtype={\n", + " ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\",\n", + " field_names.MAPPING_FOR_EJ_PRIORITY_COMMUNITY_FIELD: \"bool\",\n", + " },\n", + ")\n", + "\n", + "mapping_for_ej_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1ac2854-80c8-42a8-85e8-84c5684bbe43", + "metadata": {}, + "outputs": [], + "source": [ + "# Mapping for EJ\n", + "mapping_for_ej_path = DATA_DIR / \"dataset\" / \"mapping_for_ej\" / \"co_va.csv\"\n", + "\n", + "mapping_for_ej_df = pd.read_csv(\n", + " mapping_for_ej_path,\n", + " dtype={\n", + " ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\",\n", + " field_names.MAPPING_FOR_EJ_PRIORITY_COMMUNITY_FIELD: \"bool\",\n", + " },\n", + ")\n", + "\n", + "mapping_for_ej_df.head()" + ] + }, { "cell_type": "code", "execution_count": null, "id": "d8ec43dc", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Load persistent poverty data\n", - "persistent_poverty_path = (\n", - " DATA_DIR / \"dataset\" / \"persistent_poverty\" / \"usa.csv\"\n", - ")\n", + "persistent_poverty_path = DATA_DIR / \"dataset\" / \"persistent_poverty\" / \"usa.csv\"\n", "persistent_poverty_df = pd.read_csv(\n", " persistent_poverty_path,\n", " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n", @@ -255,9 +284,7 @@ "PERSISTENT_POVERTY_CBG_LEVEL_FIELD = \"Persistent Poverty Census Tract\"\n", "\n", "persistent_poverty_df.rename(\n", - " columns={\n", - " PERSISTENT_POVERTY_CBG_LEVEL_FIELD: PERSISTENT_POVERTY_TRACT_LEVEL_FIELD\n", - " },\n", + " columns={PERSISTENT_POVERTY_CBG_LEVEL_FIELD: PERSISTENT_POVERTY_TRACT_LEVEL_FIELD},\n", " inplace=True,\n", " errors=\"raise\",\n", ")\n", @@ -269,9 +296,7 @@ "cell_type": "code", "execution_count": null, "id": "81826d29", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Load mapping inequality data\n", @@ -280,9 +305,7 @@ " field_names.HOLC_GRADE_D_TRACT_50_PERCENT_FIELD,\n", " field_names.HOLC_GRADE_D_TRACT_75_PERCENT_FIELD,\n", "]\n", - "mapping_inequality_path = (\n", - " DATA_DIR / \"dataset\" / \"mapping_inequality\" / \"usa.csv\"\n", - ")\n", + "mapping_inequality_path = DATA_DIR / \"dataset\" / \"mapping_inequality\" / \"usa.csv\"\n", "mapping_inequality_df = pd.read_csv(\n", " mapping_inequality_path,\n", " dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n", @@ -329,9 +352,7 @@ "cell_type": "code", "execution_count": null, "id": "605af1ff", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Load alternative energy-related definition\n", @@ -350,9 +371,7 @@ "cell_type": "code", "execution_count": null, "id": "fe4a2939", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Load Michigan EJSCREEN\n", @@ -404,6 +423,7 @@ " energy_definition_alternative_draft_df,\n", " michigan_ejscreen_df,\n", " cdc_svi_index_df,\n", + " mapping_for_ej_df,\n", "]\n", "\n", "merged_df = functools.reduce(\n", @@ -416,9 +436,7 @@ " census_tract_dfs,\n", ")\n", "\n", - "tract_values = (\n", - " merged_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME].str.len().unique()\n", - ")\n", + "tract_values = merged_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME].str.len().unique()\n", "if any(tract_values != [11]):\n", " print(tract_values)\n", " raise ValueError(\"Some of the census tract data has the wrong length.\")\n", @@ -433,9 +451,7 @@ "cell_type": "code", "execution_count": null, "id": "2de78f71", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Special handling for HOLC.\n", @@ -555,6 +571,10 @@ " priority_communities_field=\"calenviroscreen_priority_community\",\n", " ),\n", " Index(\n", + " method_name=\"Mapping for EJ\",\n", + " priority_communities_field=field_names.MAPPING_FOR_EJ_PRIORITY_COMMUNITY_FIELD,\n", + " ),\n", + " Index(\n", " method_name=\"EPA RSEI Aggregate Microdata\",\n", " priority_communities_field=field_names.EPA_RSEI_SCORE_THRESHOLD_FIELD,\n", " ),\n", @@ -708,13 +728,13 @@ " summary_dict[\"Geography name\"] = summary_dict[\"Urban vs Rural\"]\n", "\n", " for priority_communities_field in priority_communities_fields:\n", - " summary_dict[\n", + " summary_dict[f\"{priority_communities_field}{POPULATION_SUFFIX}\"] = frame[\n", " f\"{priority_communities_field}{POPULATION_SUFFIX}\"\n", - " ] = frame[f\"{priority_communities_field}{POPULATION_SUFFIX}\"].sum()\n", + " ].sum()\n", "\n", - " summary_dict[\n", - " f\"{priority_communities_field} (total tracts)\"\n", - " ] = frame[f\"{priority_communities_field}\"].sum()\n", + " summary_dict[f\"{priority_communities_field} (total tracts)\"] = frame[\n", + " f\"{priority_communities_field}\"\n", + " ].sum()\n", "\n", " # Calculate some combinations of other variables.\n", " summary_dict[f\"{priority_communities_field} (percent tracts)\"] = (\n", @@ -722,9 +742,7 @@ " / total_tracts_in_geography\n", " )\n", "\n", - " summary_dict[\n", - " f\"{priority_communities_field} (percent population)\"\n", - " ] = (\n", + " summary_dict[f\"{priority_communities_field} (percent population)\"] = (\n", " summary_dict[f\"{priority_communities_field}{POPULATION_SUFFIX}\"]\n", " / total_population_in_geography\n", " )\n", @@ -770,9 +788,7 @@ "\n", " # Run the comparison function on the groups.\n", " region_distribution_df = region_grouped_df.progress_apply(\n", - " lambda frame: calculate_state_comparison(\n", - " frame, geography_field=\"region\"\n", - " )\n", + " lambda frame: calculate_state_comparison(frame, geography_field=\"region\")\n", " )\n", "\n", " # Next, run the comparison by division\n", @@ -780,9 +796,7 @@ "\n", " # Run the comparison function on the groups.\n", " division_distribution_df = division_grouped_df.progress_apply(\n", - " lambda frame: calculate_state_comparison(\n", - " frame, geography_field=\"division\"\n", - " )\n", + " lambda frame: calculate_state_comparison(frame, geography_field=\"division\")\n", " )\n", "\n", " # Next, run the comparison by urban/rural\n", @@ -837,9 +851,7 @@ " column_character = get_excel_column_name(column_index)\n", "\n", " # Set all columns to larger width\n", - " worksheet.set_column(\n", - " f\"{column_character}:{column_character}\", column_width\n", - " )\n", + " worksheet.set_column(f\"{column_character}:{column_character}\", column_width)\n", "\n", " # Special formatting for all percent columns\n", " # Note: we can't just search for `percent`, because that's included in the word `percentile`.\n", @@ -854,7 +866,9 @@ "\n", " # Special formatting for columns that capture the percent of population considered priority.\n", " if \"(percent population)\" in column:\n", - " column_ranges = f\"{column_character}2:{column_character}{len(state_distribution_df)+1}\"\n", + " column_ranges = (\n", + " f\"{column_character}2:{column_character}{len(state_distribution_df)+1}\"\n", + " )\n", "\n", " # Add green to red conditional formatting.\n", " worksheet.conditional_format(\n", @@ -880,18 +894,14 @@ " writer.save()\n", "\n", "\n", - "fields_to_analyze = [\n", - " index.priority_communities_field for index in census_tract_indices\n", - "]\n", + "fields_to_analyze = [index.priority_communities_field for index in census_tract_indices]\n", "\n", "# Convert all indices to boolean\n", "for field_to_analyze in fields_to_analyze:\n", " if \"Areas of Concern\" in field_to_analyze:\n", " print(f\"Converting {field_to_analyze} to boolean.\")\n", "\n", - " merged_df[field_to_analyze] = merged_df[field_to_analyze].fillna(\n", - " value=0\n", - " )\n", + " merged_df[field_to_analyze] = merged_df[field_to_analyze].fillna(value=0)\n", " merged_df[field_to_analyze] = merged_df[field_to_analyze].astype(bool)\n", "\n", "\n", @@ -924,9 +934,7 @@ "cell_type": "code", "execution_count": null, "id": "2bcbcabf", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "directory = COMPARISON_OUTPUTS_DIR / \"tracts_basic_stats\"\n", @@ -960,14 +968,10 @@ " column_character = get_excel_column_name(column_index)\n", "\n", " # Set all columns to larger width\n", - " worksheet.set_column(\n", - " f\"{column_character}:{column_character}\", column_width\n", - " )\n", + " worksheet.set_column(f\"{column_character}:{column_character}\", column_width)\n", "\n", " # Add green to red conditional formatting.\n", - " column_ranges = (\n", - " f\"{column_character}2:{column_character}{len(basic_stats_df)+1}\"\n", - " )\n", + " column_ranges = f\"{column_character}2:{column_character}{len(basic_stats_df)+1}\"\n", " worksheet.conditional_format(\n", " column_ranges,\n", " # Min: green, max: red.\n", @@ -980,11 +984,7 @@ "\n", " # Special formatting for all percent columns\n", " # Note: we can't just search for `percent`, because that's included in the word `percentile`.\n", - " if (\n", - " \"percent \" in column\n", - " or \"(percent)\" in column\n", - " or \"Percent \" in column\n", - " ):\n", + " if \"percent \" in column or \"(percent)\" in column or \"Percent \" in column:\n", " # Make these columns percentages.\n", " percentage_format = workbook.add_format({\"num_format\": \"0%\"})\n", " worksheet.set_column(\n", @@ -1013,15 +1013,9 @@ " temp_df[index.priority_communities_field] == True\n", " )\n", "\n", - " grouped_df = (\n", - " temp_df.groupby(index.priority_communities_field).mean().reset_index()\n", - " )\n", - " result_df = grouped_df[\n", - " [index.priority_communities_field] + comparison_fields\n", - " ]\n", - " result_df.to_csv(\n", - " directory / f\"{index.method_name} Basic Stats.csv\", index=False\n", - " )\n", + " grouped_df = temp_df.groupby(index.priority_communities_field).mean().reset_index()\n", + " result_df = grouped_df[[index.priority_communities_field] + comparison_fields]\n", + " result_df.to_csv(directory / f\"{index.method_name} Basic Stats.csv\", index=False)\n", " write_basic_stats_excel(\n", " basic_stats_df=result_df,\n", " file_path=directory / f\"{index.method_name} Basic Stats.xlsx\",\n", @@ -1070,9 +1064,7 @@ "\n", " # Also add in the count of census tracts.\n", " count_field_name = \"Count of census tracts\"\n", - " comparison_df[count_field_name] = grouped_df.size().to_frame(\n", - " count_field_name\n", - " )\n", + " comparison_df[count_field_name] = grouped_df.size().to_frame(count_field_name)\n", "\n", " comparison_df = comparison_df.reset_index()\n", "\n", @@ -1087,9 +1079,7 @@ "\n", " # Put criteria description column first.\n", " columns_to_put_first = (\n", - " [criteria_description_field_name]\n", - " + fields_to_group_by\n", - " + [count_field_name]\n", + " [criteria_description_field_name] + fields_to_group_by + [count_field_name]\n", " )\n", " new_column_order = columns_to_put_first + [\n", " col for col in comparison_df.columns if col not in columns_to_put_first\n", @@ -1120,9 +1110,7 @@ "\n", " # Convert the dataframe to an XlsxWriter Excel object. We also turn off the\n", " # index column at the left of the output dataframe.\n", - " census_tracts_score_comparison_df.to_excel(\n", - " writer, sheet_name=\"Sheet1\", index=False\n", - " )\n", + " census_tracts_score_comparison_df.to_excel(writer, sheet_name=\"Sheet1\", index=False)\n", "\n", " # Get the xlsxwriter workbook and worksheet objects.\n", " workbook = writer.book\n", @@ -1144,9 +1132,7 @@ " column_character = get_excel_column_name(column_index)\n", "\n", " # Set all columns to larger width\n", - " worksheet.set_column(\n", - " f\"{column_character}:{column_character}\", column_width\n", - " )\n", + " worksheet.set_column(f\"{column_character}:{column_character}\", column_width)\n", "\n", " # Add green to red conditional formatting.\n", " column_ranges = f\"{column_character}2:{column_character}{len(census_tracts_score_comparison_df)+1}\"\n", @@ -1162,11 +1148,7 @@ "\n", " # Special formatting for all percent columns\n", " # Note: we can't just search for `percent`, because that's included in the word `percentile`.\n", - " if (\n", - " \"percent \" in column\n", - " or \"(percent)\" in column\n", - " or \"Percent \" in column\n", - " ):\n", + " if \"percent \" in column or \"(percent)\" in column or \"Percent \" in column:\n", " # Make these columns percentages.\n", " percentage_format = workbook.add_format({\"num_format\": \"0%\"})\n", " worksheet.set_column(\n", @@ -1182,9 +1164,7 @@ " # Overwrite both the value and the format of each header cell\n", " # This is because xlsxwriter / pandas has a known bug where it can't wrap text for a dataframe.\n", " # See https://stackoverflow.com/questions/42562977/xlsxwriter-text-wrap-not-working.\n", - " for col_num, value in enumerate(\n", - " census_tracts_score_comparison_df.columns.values\n", - " ):\n", + " for col_num, value in enumerate(census_tracts_score_comparison_df.columns.values):\n", " worksheet.write(0, col_num, value, header_format)\n", "\n", " writer.save()\n", @@ -1415,9 +1395,7 @@ "cell_type": "code", "execution_count": null, "id": "7d095ebd", - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "# Note: this is helpful because this file is long-running, so it alerts the user when the\n", @@ -1444,7 +1422,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.9.10" } }, "nbformat": 4, diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 448d0e52..0641844f 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -553,5 +553,17 @@ FPL_200_SERIES = "Is low income?" FPL_200_AND_COLLEGE_ATTENDANCE_SERIES = ( "Is low income and has a low percent of higher ed students?" ) + +# Mapping for Environmental Justice columns +MAPPING_FOR_EJ_FINAL_PERCENTILE_FIELD = ( + "Mapping for Environmental Justice Final Percentile" +) +MAPPING_FOR_EJ_FINAL_SCORE_FIELD = ( + "Mapping for Environmental Justice Final Score" +) +MAPPING_FOR_EJ_PRIORITY_COMMUNITY_FIELD = ( + "Mapping for Environmental Justice Priority Community" +) + # End of names for individual factors being exceeded ####