Update to use new FSF files (#1838)

backend is partially done!
2025-09-10 04:01:00 -07:00 · 2022-08-18 15:54:44 -04:00 · 2022-08-18 15:54:44 -04:00 · 3ba1c620f5
commit 3ba1c620f5
parent cb4866b93f
8 changed files with 24 additions and 28 deletions
--- a/data/data-pipeline/data_pipeline/content/config/csv.yml
+++ b/data/data-pipeline/data_pipeline/content/config/csv.yml
@ -334,4 +334,10 @@ fields:
    format: bool
  - score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
    label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
-    format: bool
+    format: bool
+  - score_name:  Tract-level redlining score meets or exceeds 3.25 and is low income
+    label: Tract experienced historic underinvestment and remains low income
+    format: bool
+  - score_name: Tract-level redlining score meets or exceeds 3.25
+    label: Tract experienced historic underinvestment
+    format: bool
--- a/data/data-pipeline/data_pipeline/content/config/excel.yml
+++ b/data/data-pipeline/data_pipeline/content/config/excel.yml
@ -338,4 +338,10 @@ sheets:
        format: bool
      - score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
        label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
-        format: bool
+        format: bool
+      - score_name:  Tract-level redlining score meets or exceeds 3.25 and is low income
+        label: Tract experienced historic underinvestment and remains low income
+        format: bool
+      - score_name: Tract-level redlining score meets or exceeds 3.25
+        label: Tract experienced historic underinvestment
+        format: bool
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -303,9 +303,9 @@ TILES_SCORE_COLUMNS = {
    field_names.FUTURE_FLOOD_RISK_FIELD
    + field_names.PERCENTILE_FIELD_SUFFIX: "FLD_PFS",
    field_names.FUTURE_WILDFIRE_RISK_FIELD
-    + field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
+    + field_names.PERCENTILE_FIELD_SUFFIX: "WFR_PFS",
    field_names.HIGH_FUTURE_FLOOD_RISK_FIELD: "FLD_ET",
-    field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD: "WF_ET",
+    field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD: "WFR_ET",
    field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD: "ADJ_ET",
    field_names.SCORE_N_COMMUNITIES
    + field_names.ADJACENCY_INDEX_SUFFIX: "ADJ_PFS",
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py
@ -27,7 +27,7 @@ class FloodRiskETL(ExtractTransformLoad):
    def __init__(self):
        # define the full path for the input CSV file
        self.INPUT_CSV = (
-            self.get_tmp_path() / "fsf_flood" / "flood_tract_2010.csv"
+            self.get_tmp_path() / "fsf_flood" / "flood-tract2010.csv"
        )

        # this is the main dataframe
@ -50,24 +50,16 @@ class FloodRiskETL(ExtractTransformLoad):

        # read in the unzipped csv data source then rename the
        # Census Tract column for merging
-        df_fsf_flood_disagg: pd.DataFrame = pd.read_csv(
+        df_fsf_flood: pd.DataFrame = pd.read_csv(
            self.INPUT_CSV,
            dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str},
            low_memory=False,
        )

-        df_fsf_flood_disagg[self.GEOID_TRACT_FIELD_NAME] = df_fsf_flood_disagg[
+        df_fsf_flood[self.GEOID_TRACT_FIELD_NAME] = df_fsf_flood[
            self.INPUT_GEOID_TRACT_FIELD_NAME
        ].str.zfill(11)

-        # Because we have some tracts that are listed twice, we aggregate based on
-        # GEOID10_TRACT. Note that I haven't confirmed this with the FSF boys -- to do!
-        df_fsf_flood = (
-            df_fsf_flood_disagg.groupby(self.GEOID_TRACT_FIELD_NAME)
-            .sum()
-            .reset_index()
-        )
-
        df_fsf_flood[self.COUNT_PROPERTIES] = df_fsf_flood[
            self.COUNT_PROPERTIES_NATIVE_FIELD_NAME
        ].clip(lower=self.CLIP_PROPERTIES_COUNT)
--- a/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py
@ -26,9 +26,7 @@ class WildfireRiskETL(ExtractTransformLoad):

    def __init__(self):
        # define the full path for the input CSV file
-        self.INPUT_CSV = (
-            self.get_tmp_path() / "fsf_fire" / "fire_tract_2010.csv"
-        )
+        self.INPUT_CSV = self.get_tmp_path() / "fsf_fire" / "fire-tract2010.csv"

        # this is the main dataframe
        self.df: pd.DataFrame
@ -49,24 +47,16 @@ class WildfireRiskETL(ExtractTransformLoad):
        logger.info("Transforming National Risk Index Data")
        # read in the unzipped csv data source then rename the
        # Census Tract column for merging
-        df_fsf_fire_disagg: pd.DataFrame = pd.read_csv(
+        df_fsf_fire: pd.DataFrame = pd.read_csv(
            self.INPUT_CSV,
            dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str},
            low_memory=False,
        )

-        df_fsf_fire_disagg[self.GEOID_TRACT_FIELD_NAME] = df_fsf_fire_disagg[
+        df_fsf_fire[self.GEOID_TRACT_FIELD_NAME] = df_fsf_fire[
            self.INPUT_GEOID_TRACT_FIELD_NAME
        ].str.zfill(11)

-        # Because we have some tracts that are listed twice, we aggregate based on
-        # GEOID10_TRACT. Note that I haven't confirmed this with the FSF boys -- to do!
-        df_fsf_fire = (
-            df_fsf_fire_disagg.groupby(self.GEOID_TRACT_FIELD_NAME)
-            .sum()
-            .reset_index()
-        )
-
        df_fsf_fire[self.COUNT_PROPERTIES] = df_fsf_fire[
            self.COUNT_PROPERTIES_NATIVE_FIELD_NAME
        ].clip(lower=self.CLIP_PROPERTIES_COUNT)
--- a/data/data-pipeline/data_pipeline/utils.py
+++ b/data/data-pipeline/data_pipeline/utils.py
@ -1409,6 +1409,8 @@ def get_excel_column_name(index: int) -> str:
        "ALI",
        "ALJ",
        "ALK",
+        "ALL",
+        "ALM",
    ]

    return excel_column_names[index]