Fast flag update (#1844)

Added additional flags for the front end based on our conversation in stand up this morning.
2025-07-28 14:31:16 -07:00 · 2022-08-19 13:14:44 -04:00 · 2022-08-19 13:14:44 -04:00 · d892bce6cf
commit d892bce6cf
parent 1ee26bf30d
14 changed files with 63 additions and 31 deletions
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
@ -227,6 +227,7 @@ class CensusACSETL(ExtractTransformLoad):
                self.COLLEGE_ATTENDANCE_FIELD,
                self.COLLEGE_NON_ATTENDANCE_FIELD,
                self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
+                field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
            ]
            + self.RE_OUTPUT_FIELDS
            + [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
@ -503,6 +504,13 @@ class CensusACSETL(ExtractTransformLoad):
            }
        )

+        # We generate a boolean that is TRUE when there is an imputed income but not a baseline income, and FALSE otherwise.
+        # This allows us to see which tracts have an imputed income. 
+        df[field_names.IMPUTED_INCOME_FLAG_FIELD_NAME] = (
+            df[field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD].notna()
+            & df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD].isna()
+        )
+
        # Strip columns and save results to self.
        self.df = df[self.COLUMNS_TO_KEEP]

--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
@ -92,12 +92,17 @@ def calculate_income_measures(
    )

    # Iterate through the dataframe to impute in place
+    ## TODO: We should probably convert this to a spatial join now that we are doing >1 imputation and it's taking a lot
+    ## of time, but thinking through how to do this while maintaining the masking will take some time. I think the best
+    ## way would be to (1) spatial join to all neighbors, and then (2) iterate to take the "smallest" set of neighbors...
+    ## but haven't implemented it yet.
    for index, row in geo_df.iterrows():
        if row[geoid_field] in tract_list:
            neighbor_mask = _get_neighbor_mask(geo_df, row)
            county_mask = _get_fips_mask(
                geo_df=geo_df, row=row, fips_digits=5, geoid_field=geoid_field
            )
+            ## TODO: Did CEQ decide to cut this?
            state_mask = _get_fips_mask(
                geo_df=geo_df, row=row, fips_digits=2, geoid_field=geoid_field
            )