hotfix on float cols (#526)

This commit is contained in:
Jorge Escobar 2021-08-13 15:48:31 -04:00 committed by GitHub
parent 1dbb1018d6
commit c19cd3ee55
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -32,7 +32,9 @@ class PostScoreETL(ExtractTransformLoad):
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv" self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
self.DOWNLOADABLE_INFO_PATH = self.DATA_PATH / "score" / "downloadable" self.DOWNLOADABLE_INFO_PATH = self.DATA_PATH / "score" / "downloadable"
self.STATE_CSV = self.DATA_PATH / "census" / "csv" / "fips_states_2010.csv" self.STATE_CSV = (
self.DATA_PATH / "census" / "csv" / "fips_states_2010.csv"
)
self.FULL_SCORE_CSV = self.SCORE_CSV_PATH / "full" / "usa.csv" self.FULL_SCORE_CSV = self.SCORE_CSV_PATH / "full" / "usa.csv"
self.FULL_SCORE_CSV_PLUS_COUNTIES = ( self.FULL_SCORE_CSV_PLUS_COUNTIES = (
@ -59,11 +61,11 @@ class PostScoreETL(ExtractTransformLoad):
self.TILES_SCORE_FLOAT_COLUMNS = [ self.TILES_SCORE_FLOAT_COLUMNS = [
"Score E (percentile)", "Score E (percentile)",
"Score E (top 25th percentile)", "Score E (top 25th percentile)",
"Poverty (Less than 200% of federal poverty line)", "Poverty (Less than 200% of federal poverty line) (percentile)",
"Percent individuals age 25 or over with less than high school degree", "Percent individuals age 25 or over with less than high school degree (percentile)",
"Linguistic isolation (percent)", "Linguistic isolation (percent) (percentile)",
"Unemployed civilians (percent)", "Unemployed civilians (percent) (percentile)",
"Housing burden (percent)", "Housing burden (percent) (percentile)",
] ]
self.TILES_ROUND_NUM_DECIMALS = 2 self.TILES_ROUND_NUM_DECIMALS = 2
@ -203,9 +205,9 @@ class PostScoreETL(ExtractTransformLoad):
# subsctract data sets # subsctract data sets
# this follows the XOR pattern outlined here: # this follows the XOR pattern outlined here:
# https://stackoverflow.com/a/37313953 # https://stackoverflow.com/a/37313953
removed_df = pd.concat([merged_df, null_cbg_df, null_cbg_df]).drop_duplicates( removed_df = pd.concat(
keep=False [merged_df, null_cbg_df, null_cbg_df]
) ).drop_duplicates(keep=False)
# set the score to the new df # set the score to the new df
self.score_county_state_merged = removed_df self.score_county_state_merged = removed_df
@ -222,7 +224,8 @@ class PostScoreETL(ExtractTransformLoad):
score_tiles = self.score_county_state_merged[self.TILES_SCORE_COLUMNS] score_tiles = self.score_county_state_merged[self.TILES_SCORE_COLUMNS]
decimals = pd.Series( decimals = pd.Series(
[self.TILES_ROUND_NUM_DECIMALS] * len(self.TILES_SCORE_FLOAT_COLUMNS), [self.TILES_ROUND_NUM_DECIMALS]
* len(self.TILES_SCORE_FLOAT_COLUMNS),
index=self.TILES_SCORE_FLOAT_COLUMNS, index=self.TILES_SCORE_FLOAT_COLUMNS,
) )
score_tiles = score_tiles.round(decimals) score_tiles = score_tiles.round(decimals)