hotfix on float cols (#526)

2025-07-09 07:39:58 -07:00 · 2021-08-13 15:48:31 -04:00 · 2021-08-13 15:48:31 -04:00 · c19cd3ee55
commit c19cd3ee55
parent 1dbb1018d6
1 changed files with 13 additions and 10 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
@ -32,7 +32,9 @@ class PostScoreETL(ExtractTransformLoad):
        self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
        self.DOWNLOADABLE_INFO_PATH = self.DATA_PATH / "score" / "downloadable"
-        self.STATE_CSV = self.DATA_PATH / "census" / "csv" / "fips_states_2010.csv"
+        self.STATE_CSV = (
            self.DATA_PATH / "census" / "csv" / "fips_states_2010.csv"
        )
        self.FULL_SCORE_CSV = self.SCORE_CSV_PATH / "full" / "usa.csv"
        self.FULL_SCORE_CSV_PLUS_COUNTIES = (
@ -59,11 +61,11 @@ class PostScoreETL(ExtractTransformLoad):
        self.TILES_SCORE_FLOAT_COLUMNS = [
            "Score E (percentile)",
            "Score E (top 25th percentile)",
-            "Poverty (Less than 200% of federal poverty line)",
+            "Poverty (Less than 200% of federal poverty line) (percentile)",
-            "Percent individuals age 25 or over with less than high school degree",
+            "Percent individuals age 25 or over with less than high school degree (percentile)",
-            "Linguistic isolation (percent)",
+            "Linguistic isolation (percent) (percentile)",
-            "Unemployed civilians (percent)",
+            "Unemployed civilians (percent) (percentile)",
-            "Housing burden (percent)",
+            "Housing burden (percent) (percentile)",
        ]
        self.TILES_ROUND_NUM_DECIMALS = 2
@ -203,9 +205,9 @@ class PostScoreETL(ExtractTransformLoad):
        # subsctract data sets
        # this follows the XOR pattern outlined here:
        # https://stackoverflow.com/a/37313953
-        removed_df = pd.concat([merged_df, null_cbg_df, null_cbg_df]).drop_duplicates(
+        removed_df = pd.concat(
-            keep=False
+            [merged_df, null_cbg_df, null_cbg_df]
-        )
+        ).drop_duplicates(keep=False)
        # set the score to the new df
        self.score_county_state_merged = removed_df
@ -222,7 +224,8 @@ class PostScoreETL(ExtractTransformLoad):
        score_tiles = self.score_county_state_merged[self.TILES_SCORE_COLUMNS]
        decimals = pd.Series(
-            [self.TILES_ROUND_NUM_DECIMALS] * len(self.TILES_SCORE_FLOAT_COLUMNS),
+            [self.TILES_ROUND_NUM_DECIMALS]
            * len(self.TILES_SCORE_FLOAT_COLUMNS),
            index=self.TILES_SCORE_FLOAT_COLUMNS,
        )
        score_tiles = score_tiles.round(decimals)