Fast flag update (#1844)

Added additional flags for the front end based on our conversation in stand up this morning.
2025-07-25 07:20:18 -07:00 · 2022-08-19 13:14:44 -04:00 · 2022-08-19 13:14:44 -04:00 · d892bce6cf
commit d892bce6cf
parent 1ee26bf30d
14 changed files with 63 additions and 31 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/score/constants.py
@ -208,9 +208,10 @@ TILES_SCORE_COLUMNS = {
    field_names.M_HEALTH: "M_HLTH",
    # temporarily update this so that it's the Narwhal score that gets visualized on the map
    # The NEW final score value INCLUDES the adjacency index.
-    field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX: "SM_C",
+    field_names.FINAL_SCORE_N_BOOLEAN: "SM_C",
    field_names.SCORE_N_COMMUNITIES
-    + field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
+    + field_names.ADJACENT_MEAN_SUFFIX: "SM_DON",
+    field_names.SCORE_N_COMMUNITIES: "SM_NO_DON",
    field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
    field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
    field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI",
@ -313,7 +314,8 @@ TILES_SCORE_COLUMNS = {
    + field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
    field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
    field_names.AML_BOOLEAN: "AML_ET",
-    field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET"
+    field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET",
+    field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG"
    ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
    ## FPL_200 (there is no higher ed in narwhal)
 }
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -471,6 +471,7 @@ class ScoreETL(ExtractTransformLoad):
            field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
            field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
            field_names.AML_BOOLEAN,
+            field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
        ]

        # For some columns, high values are "good", so we want to reverse the percentile
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
@ -521,8 +521,6 @@ class PostScoreETL(ExtractTransformLoad):
        score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")

    def _load_downloadable_zip(self, downloadable_info_path: Path) -> None:
-        logger.info("Saving Downloadable CSV")
-
        downloadable_info_path.mkdir(parents=True, exist_ok=True)
        csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
        excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
--- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
+++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
@ -227,6 +227,7 @@ class CensusACSETL(ExtractTransformLoad):
                self.COLLEGE_ATTENDANCE_FIELD,
                self.COLLEGE_NON_ATTENDANCE_FIELD,
                self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
+                field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
            ]
            + self.RE_OUTPUT_FIELDS
            + [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
@ -503,6 +504,13 @@ class CensusACSETL(ExtractTransformLoad):
            }
        )

+        # We generate a boolean that is TRUE when there is an imputed income but not a baseline income, and FALSE otherwise.
+        # This allows us to see which tracts have an imputed income. 
+        df[field_names.IMPUTED_INCOME_FLAG_FIELD_NAME] = (
+            df[field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD].notna()
+            & df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD].isna()
+        )
+
        # Strip columns and save results to self.
        self.df = df[self.COLUMNS_TO_KEEP]

--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
@ -92,12 +92,17 @@ def calculate_income_measures(
    )

    # Iterate through the dataframe to impute in place
+    ## TODO: We should probably convert this to a spatial join now that we are doing >1 imputation and it's taking a lot
+    ## of time, but thinking through how to do this while maintaining the masking will take some time. I think the best
+    ## way would be to (1) spatial join to all neighbors, and then (2) iterate to take the "smallest" set of neighbors...
+    ## but haven't implemented it yet.
    for index, row in geo_df.iterrows():
        if row[geoid_field] in tract_list:
            neighbor_mask = _get_neighbor_mask(geo_df, row)
            county_mask = _get_fips_mask(
                geo_df=geo_df, row=row, fips_digits=5, geoid_field=geoid_field
            )
+            ## TODO: Did CEQ decide to cut this?
            state_mask = _get_fips_mask(
                geo_df=geo_df, row=row, fips_digits=2, geoid_field=geoid_field
            )