Revised Columns on Download File + PDF (#701)

* Revised Columns on Download File + PDF * finishing ticket
2025-07-28 19:51:17 -07:00 · 2021-09-17 13:11:23 -04:00 · 2021-09-17 13:11:23 -04:00 · cd33f323c8
commit cd33f323c8
parent b6789c4d0d
3 changed files with 3524 additions and 15 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -6,6 +6,7 @@ from data_pipeline.config import settings
 # Base Paths
 DATA_PATH = Path(settings.APP_ROOT) / "data"
 TMP_PATH = DATA_PATH / "tmp"
+FILES_PATH = Path(settings.APP_ROOT) / "files"

 # Remote Paths
 CENSUS_COUNTIES_ZIP_URL = "https://www2.census.gov/geo/docs/maps-data/data/gazetteer/Gaz_counties_national.zip"
@ -42,6 +43,7 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
 SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
 SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv"
 SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx"
+SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / "Draft_Communities_List.pdf"
 SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
    SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip"
 )
@ -77,7 +79,6 @@ TILES_SCORE_COLUMNS = [
    "Particulate matter (PM2.5) (percentile)",
    "Median household income (% of AMI) (percentile)",
    "Percent of individuals < 200% Federal Poverty Line (percentile)",
-    "Percent individuals age 25 or over with less than high school degree (percentile)",
 ]

 # columns to round floats to 2 decimals
@ -113,18 +114,21 @@ TILES_SCORE_FLOAT_COLUMNS = [
 TILES_ROUND_NUM_DECIMALS = 2

 DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
+    "Area Median Income (State or metropolitan)",
+    "Percent of individuals < 100% Federal Poverty Line",
    "Percent individuals age 25 or over with less than high school degree",
-    "Linguistic isolation (percent)",
-    "Poverty (Less than 200% of federal poverty line)",
-    "Unemployed civilians (percent)",
-    "Housing burden (percent)",
-    "Respiratory hazard index",
-    "Diesel particulate matter",
-    "Particulate matter (PM2.5)",
+    "Diagnosed diabetes among adults aged >=18 years",
+    "Current asthma among adults aged >=18 years",
+    "Coronary heart disease among adults aged >=18 years",
+    "Life expectancy (years)",
    "Traffic proximity and volume",
-    "Proximity to RMP sites",
+    "FEMA Risk Index Expected Annual Loss Score",
+    "Energy burden",
+    "Housing burden (percent)",
    "Wastewater discharge",
    "Percent pre-1960s housing (lead paint indicator)",
+    "Diesel particulate matter",
+    "Particulate matter (PM2.5)",
    "Total population",
 ]

@ -132,7 +136,7 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
 DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
    pd.core.common.flatten(
        [
-            [p, f"{p} (percentile)", f"{p} (min-max normalized)"]
+            [p, f"{p} (percentile)"]
            for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
        ]
    )
@ -143,7 +147,8 @@ DOWNLOADABLE_SCORE_COLUMNS = [
    "GEOID10",
    "County Name",
    "State Name",
-    "Score D (percentile)",
-    "Score D (top 25th percentile)",
+    "Score G (communities)",
+    "Median household income (% of AMI)",
+    "Median household income (% of state median household income) (percentile)",
    *DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
 ]
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
@ -230,14 +230,18 @@ class PostScoreETL(ExtractTransformLoad):
    ) -> None:
        logger.info("Saving Full Score CSV with County Information")
        score_csv_path.parent.mkdir(parents=True, exist_ok=True)
-        score_county_state_merged.to_csv(score_csv_path, index=False)
+        score_county_state_merged.to_csv(
+            score_csv_path,
+            index=False,
+            encoding="utf-8-sig",  # windows compat https://stackoverflow.com/a/43684587
+        )

    def _load_tile_csv(
        self, score_tiles_df: pd.DataFrame, tile_score_path: Path
    ) -> None:
        logger.info("Saving Tile Score CSV")
        tile_score_path.parent.mkdir(parents=True, exist_ok=True)
-        score_tiles_df.to_csv(tile_score_path, index=False)
+        score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")

    def _load_downloadable_zip(
        self, downloadable_df: pd.DataFrame, downloadable_info_path: Path
@ -248,6 +252,13 @@ class PostScoreETL(ExtractTransformLoad):
        csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
        excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
        zip_path = constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH
+        pdf_path = constants.SCORE_DOWNLOADABLE_PDF_FILE_PATH
+
+        # Rename score column
+        downloadable_df.rename(
+            columns={"Score G (communities)": "Community of focus (v0.1)"},
+            inplace=True,
+        )

        logger.info("Writing downloadable csv")
        downloadable_df.to_csv(csv_path, index=False)
@ -256,7 +267,7 @@ class PostScoreETL(ExtractTransformLoad):
        downloadable_df.to_excel(excel_path, index=False)

        logger.info("Compressing files")
-        files_to_compress = [csv_path, excel_path]
+        files_to_compress = [csv_path, excel_path, pdf_path]
        with zipfile.ZipFile(zip_path, "w") as zf:
            for f in files_to_compress:
                zf.write(f, arcname=Path(f).name, compress_type=compression)
--- a/data/data-pipeline/data_pipeline/files/Draft_Communities_List.pdf
+++ b/data/data-pipeline/data_pipeline/files/Draft_Communities_List.pdf