adding median income field and running black

2025-07-28 23:11:16 -07:00 · 2021-08-09 20:47:51 -05:00 · 2021-08-09 20:47:51 -05:00 · 4ae7eff4c4
commit 4ae7eff4c4
parent 6c986adfe4
6 changed files with 33 additions and 10 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -59,7 +59,9 @@ class ScoreETL(ExtractTransformLoad):
        # Load census data
        census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
        self.census_df = pd.read_csv(
-            census_csv, dtype={self.GEOID_FIELD_NAME: "string"}, low_memory=False,
+            census_csv,
+            dtype={self.GEOID_FIELD_NAME: "string"},
+            low_memory=False,
        )

        # Load housing and transportation data
@ -121,7 +123,8 @@ class ScoreETL(ExtractTransformLoad):

        # Define a named tuple that will be used for each data set input.
        DataSet = collections.namedtuple(
-            typename="DataSet", field_names=["input_field", "renamed_field", "bucket"],
+            typename="DataSet",
+            field_names=["input_field", "renamed_field", "bucket"],
        )

        data_sets = [
@ -138,7 +141,9 @@ class ScoreETL(ExtractTransformLoad):
                bucket=None,
            ),
            DataSet(
-                input_field="ACSTOTPOP", renamed_field="Total population", bucket=None,
+                input_field="ACSTOTPOP",
+                renamed_field="Total population",
+                bucket=None,
            ),
            # The following data sets have buckets, because they're used in the score
            DataSet(
@ -244,7 +249,9 @@ class ScoreETL(ExtractTransformLoad):
        }

        self.df.rename(
-            columns=renaming_dict, inplace=True, errors="raise",
+            columns=renaming_dict,
+            inplace=True,
+            errors="raise",
        )

        columns_to_keep = [data_set.renamed_field for data_set in data_sets]
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
@ -46,7 +46,9 @@ class GeoScoreETL(ExtractTransformLoad):

        logger.info("Reading score CSV")
        self.score_usa_df = pd.read_csv(
-            self.TILE_SCORE_CSV, dtype={"GEOID10": "string"}, low_memory=False,
+            self.TILE_SCORE_CSV,
+            dtype={"GEOID10": "string"},
+            low_memory=False,
        )

    def transform(self) -> None:
@ -68,7 +70,8 @@ class GeoScoreETL(ExtractTransformLoad):
        ].reset_index(drop=True)

        usa_simplified.rename(
-            columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO}, inplace=True,
+            columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
+            inplace=True,
        )

        logger.info("Aggregating into tracts (~5 minutes)")