Running Black

2025-02-22 17:44:20 -08:00 · 2021-08-04 21:10:30 -04:00 · 2021-08-04 21:10:30 -04:00 · 508925618b
commit 508925618b
parent dd8c37e06d
11 changed files with 30 additions and 87 deletions
--- a/data/data-pipeline/data_pipeline/application.py
+++ b/data/data-pipeline/data_pipeline/application.py
@ -22,9 +22,7 @@ def cli():
    pass


-@cli.command(
-    help="Clean up all census data folders",
-)
+@cli.command(help="Clean up all census data folders",)
 def census_cleanup():
    """CLI command to clean up the census data folder"""

@ -37,9 +35,7 @@ def census_cleanup():
    logger.info("Cleaned up all census data files")


-@cli.command(
-    help="Clean up all data folders",
-)
+@cli.command(help="Clean up all data folders",)
 def data_cleanup():
    """CLI command to clean up the all the data folders"""

@ -50,9 +46,7 @@ def data_cleanup():
    logger.info("Cleaned up all data folders")


-@cli.command(
-    help="Census data download",
-)
+@cli.command(help="Census data download",)
 def census_data_download():
    """CLI command to download all census shape files from the Census FTP and extract the geojson
    to generate national and by state Census Block Group CSVs"""
@ -64,9 +58,7 @@ def census_data_download():
    logger.info("Completed downloading census data")


-@cli.command(
-    help="Run all ETL processes or a specific one",
-)
+@cli.command(help="Run all ETL processes or a specific one",)
@click.option("-d", "--dataset", required=False, type=str)
 def etl_run(dataset: str):
    """Run a specific or all ETL processes
@ -81,27 +73,21 @@ def etl_run(dataset: str):
    etl_runner(dataset)


-@cli.command(
-    help="Generate Score",
-)
+@cli.command(help="Generate Score",)
 def score_run():
    """CLI command to generate the score"""

    score_generate()


-@cli.command(
-    help="Generate Geojson files with scores baked in",
-)
+@cli.command(help="Generate Geojson files with scores baked in",)
 def geo_score():
    """CLI command to generate the score"""

    score_geo()


-@cli.command(
-    help="Generate map tiles",
-)
+@cli.command(help="Generate map tiles",)
 def generate_map_tiles():
    """CLI command to generate the map tiles"""

--- a/data/data-pipeline/data_pipeline/etl/runner.py
+++ b/data/data-pipeline/data_pipeline/etl/runner.py
@ -27,11 +27,7 @@ def etl_runner(dataset_to_run: str = None) -> None:
            "module_dir": "census_acs",
            "class_name": "CensusACSETL",
        },
-        {
-            "name": "ejscreen",
-            "module_dir": "ejscreen",
-            "class_name": "EJScreenETL",
-        },
+        {"name": "ejscreen", "module_dir": "ejscreen", "class_name": "EJScreenETL",},
        {
            "name": "housing_and_transportation",
            "module_dir": "housing_and_transportation",
@ -47,17 +43,12 @@ def etl_runner(dataset_to_run: str = None) -> None:
            "module_dir": "calenviroscreen",
            "class_name": "CalEnviroScreenETL",
        },
-        {
-            "name": "hud_recap",
-            "module_dir": "hud_recap",
-            "class_name": "HudRecapETL",
-        },
+        {"name": "hud_recap", "module_dir": "hud_recap", "class_name": "HudRecapETL",},
    ]

    if dataset_to_run:
        dataset_element = next(
-            (item for item in dataset_list if item["name"] == dataset_to_run),
-            None,
+            (item for item in dataset_list if item["name"] == dataset_to_run), None,
        )
        if not dataset_list:
            raise ValueError("Invalid dataset name")
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -59,9 +59,7 @@ class ScoreETL(ExtractTransformLoad):
        # Load census data
        census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
        self.census_df = pd.read_csv(
-            census_csv,
-            dtype={self.GEOID_FIELD_NAME: "string"},
-            low_memory=False,
+            census_csv, dtype={self.GEOID_FIELD_NAME: "string"}, low_memory=False,
        )

        # Load housing and transportation data
@ -123,8 +121,7 @@ class ScoreETL(ExtractTransformLoad):

        # Define a named tuple that will be used for each data set input.
        DataSet = collections.namedtuple(
-            typename="DataSet",
-            field_names=["input_field", "renamed_field", "bucket"],
+            typename="DataSet", field_names=["input_field", "renamed_field", "bucket"],
        )

        data_sets = [
@ -141,9 +138,7 @@ class ScoreETL(ExtractTransformLoad):
                bucket=None,
            ),
            DataSet(
-                input_field="ACSTOTPOP",
-                renamed_field="Total population",
-                bucket=None,
+                input_field="ACSTOTPOP", renamed_field="Total population", bucket=None,
            ),
            # The following data sets have buckets, because they're used in the score
            DataSet(
@ -249,9 +244,7 @@ class ScoreETL(ExtractTransformLoad):
        }

        self.df.rename(
-            columns=renaming_dict,
-            inplace=True,
-            errors="raise",
+            columns=renaming_dict, inplace=True, errors="raise",
        )

        columns_to_keep = [data_set.renamed_field for data_set in data_sets]
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
@ -46,9 +46,7 @@ class GeoScoreETL(ExtractTransformLoad):

        logger.info("Reading score CSV")
        self.score_usa_df = pd.read_csv(
-            self.TILE_SCORE_CSV,
-            dtype={"GEOID10": "string"},
-            low_memory=False,
+            self.TILE_SCORE_CSV, dtype={"GEOID10": "string"}, low_memory=False,
        )

    def transform(self) -> None:
@ -70,8 +68,7 @@ class GeoScoreETL(ExtractTransformLoad):
        ].reset_index(drop=True)

        usa_simplified.rename(
-            columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
-            inplace=True,
+            columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO}, inplace=True,
        )

        logger.info("Aggregating into tracts (~5 minutes)")
@ -156,4 +153,4 @@ class GeoScoreETL(ExtractTransformLoad):

        logger.info("Writing usa-low (~9 minutes)")
        self.geojson_score_usa_low.to_file(self.SCORE_LOW_GEOJSON, driver="GeoJSON")
-        logger.info("Completed writing usa-low")
+        logger.info("Completed writing usa-low")
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
@ -43,8 +43,7 @@ class PostScoreETL(ExtractTransformLoad):

    def extract(self) -> None:
        super().extract(
-            self.CENSUS_COUNTIES_ZIP_URL,
-            self.TMP_PATH,
+            self.CENSUS_COUNTIES_ZIP_URL, self.TMP_PATH,
        )

        logger.info("Reading Counties CSV")
@ -68,8 +67,7 @@ class PostScoreETL(ExtractTransformLoad):
        # rename some of the columns to prepare for merge
        self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]]
        self.counties_df.rename(
-            columns={"USPS": "State Abbreviation", "NAME": "County Name"},
-            inplace=True,
+            columns={"USPS": "State Abbreviation", "NAME": "County Name"}, inplace=True,
        )

        # remove unnecessary columns
--- a/data/data-pipeline/data_pipeline/etl/sources/calenviroscreen/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/calenviroscreen/etl.py
@ -28,8 +28,7 @@ class CalEnviroScreenETL(ExtractTransformLoad):
    def extract(self) -> None:
        logger.info("Downloading CalEnviroScreen Data")
        super().extract(
-            self.CALENVIROSCREEN_FTP_URL,
-            self.TMP_PATH,
+            self.CALENVIROSCREEN_FTP_URL, self.TMP_PATH,
        )

    def transform(self) -> None:
--- a/data/data-pipeline/data_pipeline/etl/sources/census/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census/etl.py
@ -39,9 +39,7 @@ def download_census_csvs(data_path: Path) -> None:
            # But using 2010 for now
            cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
            unzip_file_from_url(
-                cbg_state_url,
-                data_path / "tmp",
-                data_path / "census" / "shp" / fips,
+                cbg_state_url, data_path / "tmp", data_path / "census" / "shp" / fips,
            )

            cmd = (
@ -80,32 +78,22 @@ def download_census_csvs(data_path: Path) -> None:
            csv_dir_path / f"{state_id}.csv", mode="w", newline=""
        ) as cbg_csv_file:
            cbg_csv_file_writer = csv.writer(
-                cbg_csv_file,
-                delimiter=",",
-                quotechar='"',
-                quoting=csv.QUOTE_MINIMAL,
+                cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
            )

            for geoid10 in geoid10_list:
                cbg_csv_file_writer.writerow(
-                    [
-                        geoid10,
-                    ]
+                    [geoid10,]
                )

    ## write US csv
    with open(csv_dir_path / "us.csv", mode="w", newline="") as cbg_csv_file:
        cbg_csv_file_writer = csv.writer(
-            cbg_csv_file,
-            delimiter=",",
-            quotechar='"',
-            quoting=csv.QUOTE_MINIMAL,
+            cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
        )
        for geoid10 in cbg_national:
            cbg_csv_file_writer.writerow(
-                [
-                    geoid10,
-                ]
+                [geoid10,]
            )

    ## create national geojson
--- a/data/data-pipeline/data_pipeline/etl/sources/ejscreen/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/ejscreen/etl.py
@ -18,8 +18,7 @@ class EJScreenETL(ExtractTransformLoad):
    def extract(self) -> None:
        logger.info("Downloading EJScreen Data")
        super().extract(
-            self.EJSCREEN_FTP_URL,
-            self.TMP_PATH,
+            self.EJSCREEN_FTP_URL, self.TMP_PATH,
        )

    def transform(self) -> None:
--- a/data/data-pipeline/data_pipeline/etl/sources/hud_housing/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/hud_housing/etl.py
@ -34,8 +34,7 @@ class HudHousingETL(ExtractTransformLoad):
    def extract(self) -> None:
        logger.info("Extracting HUD Housing Data")
        super().extract(
-            self.HOUSING_FTP_URL,
-            self.HOUSING_ZIP_FILE_DIR,
+            self.HOUSING_FTP_URL, self.HOUSING_ZIP_FILE_DIR,
        )

    def transform(self) -> None:
@ -49,10 +48,7 @@ class HudHousingETL(ExtractTransformLoad):
            / "140"
            / "Table8.csv"
        )
-        self.df = pd.read_csv(
-            filepath_or_buffer=tmp_csv_file_path,
-            encoding="latin-1",
-        )
+        self.df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path, encoding="latin-1",)

        # Rename and reformat block group ID
        self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)
--- a/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py
@ -70,8 +70,7 @@ class TreeEquityScoreETL(ExtractTransformLoad):
        logger.info("Downloading Tree Equity Score Data")
        for state in self.states:
            super().extract(
-                f"{self.TES_URL}{state}.zip.zip",
-                f"{self.TMP_PATH}/{state}",
+                f"{self.TES_URL}{state}.zip.zip", f"{self.TMP_PATH}/{state}",
            )

    def transform(self) -> None:
--- a/data/data-pipeline/data_pipeline/utils.py
+++ b/data/data-pipeline/data_pipeline/utils.py
@ -97,10 +97,7 @@ def remove_all_dirs_from_dir(dir_path: Path) -> None:


 def unzip_file_from_url(
-    file_url: str,
-    download_path: Path,
-    unzipped_file_path: Path,
-    verify: bool = False,
+    file_url: str, download_path: Path, unzipped_file_path: Path, verify: bool = False,
 ) -> None:
    """Downloads a zip file from a remote URL location and unzips it in a specific directory, removing the temporary file after