Adding eamlis and fuds data to legacy pollution in score (#1832)

Update to add EAMLIS and FUDS data to score
2025-09-30 15:03:17 -07:00 · 2022-08-18 13:32:29 -04:00 · 2022-08-18 13:32:29 -04:00 · cb4866b93f
commit cb4866b93f
parent 6e41e0d9f0
14 changed files with 93 additions and 24 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -312,6 +312,8 @@ TILES_SCORE_COLUMNS = {
    field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
    + field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
    field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
+    field_names.AML_BOOLEAN: "AML_ET",
+    field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET"
    ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
    ## FPL_200 (there is no higher ed in narwhal)
 }
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -14,6 +14,8 @@ from data_pipeline.etl.sources.dot_travel_composite.etl import (
 from data_pipeline.etl.sources.fsf_flood_risk.etl import (
    FloodRiskETL,
 )
+from data_pipeline.etl.sources.eamlis.etl import AbandonedMineETL
+from data_pipeline.etl.sources.us_army_fuds.etl import USArmyFUDS
 from data_pipeline.etl.sources.nlcd_nature_deprived.etl import NatureDeprivedETL
 from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
 from data_pipeline.score.score_runner import ScoreRunner
@ -49,6 +51,8 @@ class ScoreETL(ExtractTransformLoad):
        self.fsf_flood_df: pd.DataFrame
        self.fsf_fire_df: pd.DataFrame
        self.nature_deprived_df: pd.DataFrame
+        self.eamlis_df: pd.DataFrame
+        self.fuds_df: pd.DataFrame

    def extract(self) -> None:
        logger.info("Loading data sets from disk.")
@ -139,6 +143,12 @@ class ScoreETL(ExtractTransformLoad):
        # Load NLCD Nature-Deprived Communities data
        self.nature_deprived_df = NatureDeprivedETL.get_data_frame()

+        # Load eAMLIS dataset
+        self.eamlis_df = AbandonedMineETL.get_data_frame()
+
+        # Load FUDS dataset
+        self.fuds_df = USArmyFUDS.get_data_frame()
+
        # Load GeoCorr Urban Rural Map
        geocorr_urban_rural_csv = (
            constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
@ -362,6 +372,8 @@ class ScoreETL(ExtractTransformLoad):
            self.fsf_flood_df,
            self.fsf_fire_df,
            self.nature_deprived_df,
+            self.eamlis_df,
+            self.fuds_df,
        ]

        # Sanity check each data frame before merging.
@ -457,6 +469,8 @@ class ScoreETL(ExtractTransformLoad):
            field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
            field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
            field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
+            field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
+            field_names.AML_BOOLEAN,
        ]

        # For some columns, high values are "good", so we want to reverse the percentile
--- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl