Prototype G (#672)

* wip * cleanup * cleanup 2 * fixing import ordering linter error * updating backend to use score G * adding percentile to score output * update tippeanoe compression Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
2025-07-28 14:11:17 -07:00 · 2021-09-14 09:48:11 -05:00 · 2021-09-14 09:48:11 -05:00 · 1083e953da
commit 1083e953da
parent 92d7f40004
6 changed files with 123 additions and 39 deletions
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
@ -99,6 +99,16 @@ class CensusACSETL(ExtractTransformLoad):
            self.MEDIAN_INCOME_FIELD
        ]

+        # Handle null values for CBG median income, which are `-666666666`.
+        missing_value_count = sum(self.df[self.MEDIAN_INCOME_FIELD_NAME]==-666666666)
+        logger.info(
+            f"There are {missing_value_count} ({int(100*missing_value_count/self.df[self.MEDIAN_INCOME_FIELD_NAME].count())}%) values of "
+            + f"`{self.MEDIAN_INCOME_FIELD_NAME}` being marked as null values."
+        )
+        self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[
+            self.MEDIAN_INCOME_FIELD_NAME
+        ].replace(to_replace=-666666666, value=None)
+
        # Calculate percent unemployment.
        # TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
        self.df[self.UNEMPLOYED_FIELD_NAME] = (