diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index b0fc3b4d..7ee7b18a 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -634,8 +634,24 @@ class ScoreETL(ExtractTransformLoad): ] ].mean(axis=1, skipna=True) + # For AS, MP, GU, and VI, backfill data from the 2010 census where we have it + df_copy = self._backfill_island_data(df_copy) + return df_copy + @staticmethod + def _backfill_island_data(df: pd.DataFrame) -> pd.DataFrame: + logger.info("Backfilling island data") + island_index = ( + df[field_names.GEOID_TRACT_FIELD] + .str[:2] + .isin(constants.TILES_ISLAND_AREA_FIPS_CODES) + ) + df.loc[island_index, field_names.TOTAL_POP_FIELD] = df.loc[ + island_index, field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010 + ] + return df + def transform(self) -> None: logger.info("Transforming Score Data")