Prototype G (#672)

* wip

* cleanup

* cleanup 2

* fixing import ordering linter error

* updating backend to use score G

* adding percentile to score output

* update tippeanoe compression

Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
This commit is contained in:
Lucas Merrill Brown 2021-09-14 09:48:11 -05:00 committed by GitHub
commit 1083e953da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 123 additions and 39 deletions

View file

@ -99,6 +99,16 @@ class CensusACSETL(ExtractTransformLoad):
self.MEDIAN_INCOME_FIELD
]
# Handle null values for CBG median income, which are `-666666666`.
missing_value_count = sum(self.df[self.MEDIAN_INCOME_FIELD_NAME]==-666666666)
logger.info(
f"There are {missing_value_count} ({int(100*missing_value_count/self.df[self.MEDIAN_INCOME_FIELD_NAME].count())}%) values of "
+ f"`{self.MEDIAN_INCOME_FIELD_NAME}` being marked as null values."
)
self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[
self.MEDIAN_INCOME_FIELD_NAME
].replace(to_replace=-666666666, value=None)
# Calculate percent unemployment.
# TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
self.df[self.UNEMPLOYED_FIELD_NAME] = (