mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 14:11:17 -07:00
Prototype G (#672)
* wip * cleanup * cleanup 2 * fixing import ordering linter error * updating backend to use score G * adding percentile to score output * update tippeanoe compression Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
This commit is contained in:
parent
92d7f40004
commit
1083e953da
6 changed files with 123 additions and 39 deletions
|
@ -99,6 +99,16 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.MEDIAN_INCOME_FIELD
|
||||
]
|
||||
|
||||
# Handle null values for CBG median income, which are `-666666666`.
|
||||
missing_value_count = sum(self.df[self.MEDIAN_INCOME_FIELD_NAME]==-666666666)
|
||||
logger.info(
|
||||
f"There are {missing_value_count} ({int(100*missing_value_count/self.df[self.MEDIAN_INCOME_FIELD_NAME].count())}%) values of "
|
||||
+ f"`{self.MEDIAN_INCOME_FIELD_NAME}` being marked as null values."
|
||||
)
|
||||
self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[
|
||||
self.MEDIAN_INCOME_FIELD_NAME
|
||||
].replace(to_replace=-666666666, value=None)
|
||||
|
||||
# Calculate percent unemployment.
|
||||
# TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
|
||||
self.df[self.UNEMPLOYED_FIELD_NAME] = (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue