Issue 308 python linting (#443)

* Adds flake8, pylint, liccheck, flake8 to dependencies for data-pipeline * Sets up and runs black autoformatting * Adds flake8 to tox linting * Fixes flake8 error F541 f string missing placeholders * Fixes flake8 E501 line too long * Fixes flake8 F401 imported but not used * Adds pylint to tox and disables the following pylint errors: - C0114: module docstrings - R0201: method could have been a function - R0903: too few public methods - C0103: name case styling - W0511: fix me - W1203: f-string interpolation in logging * Adds utils.py to tox.ini linting, runs black on utils.py * Fixes import related pylint errors: C0411 and C0412 * Fixes or ignores remaining pylint errors (for discussion later) * Adds safety and liccheck to tox.ini
2025-10-17 15:31:27 -07:00 · 2021-08-02 12:16:38 -04:00 · 2021-08-02 12:16:38 -04:00 · 5504528fdf
commit 5504528fdf
parent 51f7666062
22 changed files with 709 additions and 228 deletions
--- a/data/data-pipeline/.flake8
+++ b/data/data-pipeline/.flake8
@ -0,0 +1,7 @@
 [flake8]
 ignore =
  E266,  # to many leading '#' for block comment
  W503  # line break before binary operator
 max-line-length = 150
 max-complexity = 18
 select = B,C,E,F,W,T4,B9
--- a/data/data-pipeline/application.py
+++ b/data/data-pipeline/application.py
@ -1,7 +1,6 @@
 import click
 from config import settings
 from etl.sources.census.etl_utils import reset_data_directories as census_reset
 from utils import (
    get_module_logger,
    data_folder_cleanup,
@ -9,6 +8,7 @@ from utils import (
    temp_folder_cleanup,
 )
 from etl.sources.census.etl import download_census_csvs
 from etl.sources.census.etl_utils import reset_data_directories as census_reset
 from etl.runner import etl_runner, score_generate, score_geo
 logger = get_module_logger(__name__)
@ -30,7 +30,7 @@ def census_cleanup():
    data_path = settings.APP_ROOT / "data"
    # census directories
-    logger.info(f"Initializing all census data")
+    logger.info("Initializing all census data")
    census_reset(data_path)
    logger.info("Cleaned up all census data files")
--- a/data/data-pipeline/config.py
+++ b/data/data-pipeline/config.py
@ -1,6 +1,7 @@
 from dynaconf import Dynaconf
 from pathlib import Path
 from dynaconf import Dynaconf
 settings = Dynaconf(
    envvar_prefix="DYNACONF",
    settings_files=["settings.toml", ".secrets.toml"],
--- a/data/data-pipeline/etl/base.py
+++ b/data/data-pipeline/etl/base.py
@ -1,11 +1,10 @@
 from pathlib import Path
 import pathlib
 from config import settings
 from utils import unzip_file_from_url, remove_all_from_dir
-class ExtractTransformLoad(object):
+class ExtractTransformLoad:
    """
    A class used to instantiate an ETL object to retrieve and process data from
    datasets.
@ -34,9 +33,7 @@ class ExtractTransformLoad(object):
        pass
-    def extract(
+    def extract(self, source_url: str = None, extract_path: Path = None) -> None:
        self, source_url: str = None, extract_path: Path = None
    ) -> None:
        """Extract the data from
        a remote source. By default it provides code to get the file from a source url,
        unzips it and stores it on an extract_path."""
--- a/data/data-pipeline/etl/runner.py
+++ b/data/data-pipeline/etl/runner.py
@ -67,9 +67,7 @@ def etl_runner(dataset_to_run: str = None) -> None:
    # Run the ETLs for the dataset_list
    for dataset in dataset_list:
-        etl_module = importlib.import_module(
+        etl_module = importlib.import_module(f"etl.sources.{dataset['module_dir']}.etl")
            f"etl.sources.{dataset['module_dir']}.etl"
        )
        etl_class = getattr(etl_module, dataset["class_name"])
        etl_instance = etl_class()
--- a/data/data-pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/etl/score/etl_score.py
@ -4,7 +4,6 @@ import pandas as pd
 from etl.base import ExtractTransformLoad
 from utils import get_module_logger
 from etl.sources.census.etl_utils import get_state_fips_codes
 logger = get_module_logger(__name__)
@ -28,10 +27,10 @@ class ScoreETL(ExtractTransformLoad):
        self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)"
        self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)"
        self.HOUSING_BURDEN_FIELD_NAME = "Housing burden (percent)"
-        self.POVERTY_FIELD_NAME = (
+        self.POVERTY_FIELD_NAME = "Poverty (Less than 200% of federal poverty line)"
-            "Poverty (Less than 200% of federal poverty line)"
+        self.HIGH_SCHOOL_FIELD_NAME = (
            "Percent individuals age 25 or over with less than high school degree"
        )
        self.HIGH_SCHOOL_FIELD_NAME = "Percent individuals age 25 or over with less than high school degree"
        # There's another aggregation level (a second level of "buckets").
        self.AGGREGATION_POLLUTION = "Pollution Burden"
@ -55,9 +54,7 @@ class ScoreETL(ExtractTransformLoad):
        self.ejscreen_df = pd.read_csv(
            ejscreen_csv, dtype={"ID": "string"}, low_memory=False
        )
-        self.ejscreen_df.rename(
+        self.ejscreen_df.rename(columns={"ID": self.GEOID_FIELD_NAME}, inplace=True)
            columns={"ID": self.GEOID_FIELD_NAME}, inplace=True
        )
        # Load census data
        census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
@ -69,10 +66,7 @@ class ScoreETL(ExtractTransformLoad):
        # Load housing and transportation data
        housing_and_transportation_index_csv = (
-            self.DATA_PATH
+            self.DATA_PATH / "dataset" / "housing_and_transportation_index" / "usa.csv"
            / "dataset"
            / "housing_and_transportation_index"
            / "usa.csv"
        )
        self.housing_and_transportation_df = pd.read_csv(
            housing_and_transportation_index_csv,
@ -89,7 +83,7 @@ class ScoreETL(ExtractTransformLoad):
        )
    def transform(self) -> None:
-        logger.info(f"Transforming Score Data")
+        logger.info("Transforming Score Data")
        # Join all the data sources that use census block groups
        census_block_group_dfs = [
@ -106,10 +100,7 @@ class ScoreETL(ExtractTransformLoad):
        )
        # Sanity check the join.
-        if (
+        if len(census_block_group_df[self.GEOID_FIELD_NAME].str.len().unique()) != 1:
            len(census_block_group_df[self.GEOID_FIELD_NAME].str.len().unique())
            != 1
        ):
            raise ValueError(
                f"One of the input CSVs uses {self.GEOID_FIELD_NAME} with a different length."
            )
@ -119,9 +110,9 @@ class ScoreETL(ExtractTransformLoad):
        census_tract_df = self.hud_housing_df
        # Calculate the tract for the CBG data.
-        census_block_group_df[
+        census_block_group_df[self.GEOID_TRACT_FIELD_NAME] = census_block_group_df[
-            self.GEOID_TRACT_FIELD_NAME
+            self.GEOID_FIELD_NAME
-        ] = census_block_group_df[self.GEOID_FIELD_NAME].str[0:11]
+        ].str[0:11]
        self.df = census_block_group_df.merge(
            census_tract_df, on=self.GEOID_TRACT_FIELD_NAME
@ -254,8 +245,7 @@ class ScoreETL(ExtractTransformLoad):
        # Rename columns:
        renaming_dict = {
-            data_set.input_field: data_set.renamed_field
+            data_set.input_field: data_set.renamed_field for data_set in data_sets
            for data_set in data_sets
        }
        self.df.rename(
@ -310,7 +300,7 @@ class ScoreETL(ExtractTransformLoad):
            ) / (max_value - min_value)
            # Graph distributions and correlations.
-            min_max_fields = [
+            min_max_fields = [  # noqa: F841
                f"{data_set.renamed_field}{self.MIN_MAX_FIELD_SUFFIX}"
                for data_set in data_sets
                if data_set.renamed_field != self.GEOID_FIELD_NAME
@ -324,9 +314,7 @@ class ScoreETL(ExtractTransformLoad):
            ]
        ].mean(axis=1)
        self.df["Score B"] = (
-            self.df[
+            self.df["Poverty (Less than 200% of federal poverty line) (percentile)"]
                "Poverty (Less than 200% of federal poverty line) (percentile)"
            ]
            * self.df[
                "Percent individuals age 25 or over with less than high school degree (percentile)"
            ]
@ -342,21 +330,26 @@ class ScoreETL(ExtractTransformLoad):
            ]
            self.df[f"{bucket}"] = self.df[fields_in_bucket].mean(axis=1)
-        # Combine the score from the two Exposures and Environmental Effects buckets into a single score called "Pollution Burden". The math for this score is: (1.0 * Exposures Score + 0.5 * Environment Effects score) / 1.5.
+        # Combine the score from the two Exposures and Environmental Effects buckets
        # into a single score called "Pollution Burden".
        # The math for this score is:
        # (1.0 * Exposures Score + 0.5 * Environment Effects score) / 1.5.
        self.df[self.AGGREGATION_POLLUTION] = (
            1.0 * self.df[f"{self.BUCKET_EXPOSURES}"]
            + 0.5 * self.df[f"{self.BUCKET_ENVIRONMENTAL}"]
        ) / 1.5
-        # Average the score from the two Sensitive populations and Socioeconomic factors buckets into a single score called "Population Characteristics".
+        # Average the score from the two Sensitive populations and
        # Socioeconomic factors buckets into a single score called
        # "Population Characteristics".
        self.df[self.AGGREGATION_POPULATION] = self.df[
            [f"{self.BUCKET_SENSITIVE}", f"{self.BUCKET_SOCIOECONOMIC}"]
        ].mean(axis=1)
-        # Multiply the "Pollution Burden" score and the "Population Characteristics" together to produce the cumulative impact score.
+        # Multiply the "Pollution Burden" score and the "Population Characteristics"
        # together to produce the cumulative impact score.
        self.df["Score C"] = (
-            self.df[self.AGGREGATION_POLLUTION]
+            self.df[self.AGGREGATION_POLLUTION] * self.df[self.AGGREGATION_POPULATION]
            * self.df[self.AGGREGATION_POPULATION]
        )
        if len(census_block_group_df) > 220333:
@ -371,12 +364,10 @@ class ScoreETL(ExtractTransformLoad):
        ]
        fields_min_max = [
-            f"{field}{self.MIN_MAX_FIELD_SUFFIX}"
+            f"{field}{self.MIN_MAX_FIELD_SUFFIX}" for field in fields_to_use_in_score
            for field in fields_to_use_in_score
        ]
        fields_percentile = [
-            f"{field}{self.PERCENTILE_FIELD_SUFFIX}"
+            f"{field}{self.PERCENTILE_FIELD_SUFFIX}" for field in fields_to_use_in_score
            for field in fields_to_use_in_score
        ]
        # Calculate "Score D", which uses min-max normalization
@ -396,17 +387,22 @@ class ScoreETL(ExtractTransformLoad):
            "Score E",
            "Poverty (Less than 200% of federal poverty line)",
        ]:
-            self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] = self.df[score_field].rank(pct=True)
+            self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] = self.df[
                score_field
            ].rank(pct=True)
            for threshold in [0.25, 0.3, 0.35, 0.4]:
                fraction_converted_to_percent = int(100 * threshold)
-                self.df[f"{score_field} (top {fraction_converted_to_percent}th percentile)"] = (
+                self.df[
-                        self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] >= 1 - threshold
+                    f"{score_field} (top {fraction_converted_to_percent}th percentile)"
                ] = (
                    self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"]
                    >= 1 - threshold
                )
    def load(self) -> None:
-        logger.info(f"Saving Score CSV")
+        logger.info("Saving Score CSV")
        # write nationwide csv
        self.SCORE_CSV_PATH.mkdir(parents=True, exist_ok=True)
-        self.df.to_csv(self.SCORE_CSV_PATH / f"usa.csv", index=False)
+        self.df.to_csv(self.SCORE_CSV_PATH / "usa.csv", index=False)
--- a/data/data-pipeline/etl/score/etl_score_geo.py
+++ b/data/data-pipeline/etl/score/etl_score_geo.py
@ -1,6 +1,7 @@
 import math
 import pandas as pd
 import geopandas as gpd
 import math
 from etl.base import ExtractTransformLoad
 from utils import get_module_logger
@ -21,9 +22,7 @@ class GeoScoreETL(ExtractTransformLoad):
        self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
        self.TILE_SCORE_CSV = self.SCORE_CSV_PATH / "tiles" / "usa.csv"
-        self.CENSUS_USA_GEOJSON = (
+        self.CENSUS_USA_GEOJSON = self.DATA_PATH / "census" / "geojson" / "us.json"
            self.DATA_PATH / "census" / "geojson" / "us.json"
        )
        self.TARGET_SCORE_NAME = "Score E (percentile)"
        self.TARGET_SCORE_RENAME_TO = "E_SCORE"
@ -36,7 +35,7 @@ class GeoScoreETL(ExtractTransformLoad):
        self.geojson_score_usa_low: gpd.GeoDataFrame
    def extract(self) -> None:
-        logger.info(f"Reading US GeoJSON (~6 minutes)")
+        logger.info("Reading US GeoJSON (~6 minutes)")
        self.geojson_usa_df = gpd.read_file(
            self.CENSUS_USA_GEOJSON,
            dtype={"GEOID10": "string"},
@ -45,7 +44,7 @@ class GeoScoreETL(ExtractTransformLoad):
        )
        self.geojson_usa_df.head()
-        logger.info(f"Reading score CSV")
+        logger.info("Reading score CSV")
        self.score_usa_df = pd.read_csv(
            self.TILE_SCORE_CSV,
            dtype={"GEOID10": "string"},
@ -53,11 +52,11 @@ class GeoScoreETL(ExtractTransformLoad):
        )
    def transform(self) -> None:
-        logger.info(f"Pruning Census GeoJSON")
+        logger.info("Pruning Census GeoJSON")
        fields = ["GEOID10", "geometry"]
        self.geojson_usa_df = self.geojson_usa_df[fields]
-        logger.info(f"Merging and compressing score CSV with USA GeoJSON")
+        logger.info("Merging and compressing score CSV with USA GeoJSON")
        self.geojson_score_usa_high = self.score_usa_df.merge(
            self.geojson_usa_df, on="GEOID10", how="left"
        )
@ -75,7 +74,7 @@ class GeoScoreETL(ExtractTransformLoad):
            inplace=True,
        )
-        logger.info(f"Aggregating into tracts (~5 minutes)")
+        logger.info("Aggregating into tracts (~5 minutes)")
        usa_tracts = self._aggregate_to_tracts(usa_simplified)
        usa_tracts = gpd.GeoDataFrame(
@ -84,17 +83,15 @@ class GeoScoreETL(ExtractTransformLoad):
            crs="EPSG:4326",
        )
-        logger.info(f"Creating buckets from tracts")
+        logger.info("Creating buckets from tracts")
        usa_bucketed = self._create_buckets_from_tracts(
            usa_tracts, self.NUMBER_OF_BUCKETS
        )
-        logger.info(f"Aggregating buckets")
+        logger.info("Aggregating buckets")
        usa_aggregated = self._aggregate_buckets(usa_bucketed, agg_func="mean")
-        compressed = self._breakup_multipolygons(
+        compressed = self._breakup_multipolygons(usa_aggregated, self.NUMBER_OF_BUCKETS)
            usa_aggregated, self.NUMBER_OF_BUCKETS
        )
        self.geojson_score_usa_low = gpd.GeoDataFrame(
            compressed,
@ -118,9 +115,7 @@ class GeoScoreETL(ExtractTransformLoad):
        # assign tracts to buckets by D_SCORE
        state_tracts.sort_values(self.TARGET_SCORE_RENAME_TO, inplace=True)
        SCORE_bucket = []
-        bucket_size = math.ceil(
+        bucket_size = math.ceil(len(state_tracts.index) / self.NUMBER_OF_BUCKETS)
            len(state_tracts.index) / self.NUMBER_OF_BUCKETS
        )
        for i in range(len(state_tracts.index)):
            SCORE_bucket.extend([math.floor(i / bucket_size)])
        state_tracts[f"{self.TARGET_SCORE_RENAME_TO}_bucket"] = SCORE_bucket
@ -155,14 +150,10 @@ class GeoScoreETL(ExtractTransformLoad):
        return compressed
    def load(self) -> None:
-        logger.info(f"Writing usa-high (~9 minutes)")
+        logger.info("Writing usa-high (~9 minutes)")
-        self.geojson_score_usa_high.to_file(
+        self.geojson_score_usa_high.to_file(self.SCORE_HIGH_GEOJSON, driver="GeoJSON")
-            self.SCORE_HIGH_GEOJSON, driver="GeoJSON"
+        logger.info("Completed writing usa-high")
        )
        logger.info(f"Completed writing usa-high")
-        logger.info(f"Writing usa-low (~9 minutes)")
+        logger.info("Writing usa-low (~9 minutes)")
-        self.geojson_score_usa_low.to_file(
+        self.geojson_score_usa_low.to_file(self.SCORE_LOW_GEOJSON, driver="GeoJSON")
-            self.SCORE_LOW_GEOJSON, driver="GeoJSON"
+        logger.info("Completed writing usa-low")
        )
        logger.info(f"Completed writing usa-low")
--- a/data/data-pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/etl/score/etl_score_post.py
@ -19,9 +19,7 @@ class PostScoreETL(ExtractTransformLoad):
        self.CENSUS_USA_CSV = self.DATA_PATH / "census" / "csv" / "us.csv"
        self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
-        self.STATE_CSV = (
+        self.STATE_CSV = self.DATA_PATH / "census" / "csv" / "fips_states_2010.csv"
            self.DATA_PATH / "census" / "csv" / "fips_states_2010.csv"
        )
        self.FULL_SCORE_CSV = self.SCORE_CSV_PATH / "full" / "usa.csv"
        self.TILR_SCORE_CSV = self.SCORE_CSV_PATH / "tile" / "usa.csv"
@ -49,7 +47,7 @@ class PostScoreETL(ExtractTransformLoad):
            self.TMP_PATH,
        )
-        logger.info(f"Reading Counties CSV")
+        logger.info("Reading Counties CSV")
        self.counties_df = pd.read_csv(
            self.CENSUS_COUNTIES_TXT,
            sep="\t",
@ -58,16 +56,14 @@ class PostScoreETL(ExtractTransformLoad):
            encoding="latin-1",
        )
-        logger.info(f"Reading States CSV")
+        logger.info("Reading States CSV")
        self.states_df = pd.read_csv(
            self.STATE_CSV, dtype={"fips": "string", "state_code": "string"}
        )
-        self.score_df = pd.read_csv(
+        self.score_df = pd.read_csv(self.FULL_SCORE_CSV, dtype={"GEOID10": "string"})
            self.FULL_SCORE_CSV, dtype={"GEOID10": "string"}
        )
    def transform(self) -> None:
-        logger.info(f"Transforming data sources for Score + County CSV")
+        logger.info("Transforming data sources for Score + County CSV")
        # rename some of the columns to prepare for merge
        self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]]
@ -101,7 +97,7 @@ class PostScoreETL(ExtractTransformLoad):
        )
        # check if there are census cbgs without score
-        logger.info(f"Removing CBG rows without score")
+        logger.info("Removing CBG rows without score")
        ## load cbgs
        cbg_usa_df = pd.read_csv(
@ -121,19 +117,19 @@ class PostScoreETL(ExtractTransformLoad):
        null_cbg_df = merged_df[merged_df["Score E (percentile)"].isnull()]
        # subsctract data sets
-        removed_df = pd.concat(
+        removed_df = pd.concat([merged_df, null_cbg_df, null_cbg_df]).drop_duplicates(
-            [merged_df, null_cbg_df, null_cbg_df]
+            keep=False
-        ).drop_duplicates(keep=False)
+        )
        # set the score to the new df
        self.score_county_state_merged = removed_df
    def load(self) -> None:
-        logger.info(f"Saving Full Score CSV with County Information")
+        logger.info("Saving Full Score CSV with County Information")
        self.SCORE_CSV_PATH.mkdir(parents=True, exist_ok=True)
        self.score_county_state_merged.to_csv(self.FULL_SCORE_CSV, index=False)
-        logger.info(f"Saving Tile Score CSV")
+        logger.info("Saving Tile Score CSV")
        # TODO: check which are the columns we'll use
        # Related to: https://github.com/usds/justice40-tool/issues/302
        score_tiles = self.score_county_state_merged[self.TILES_SCORE_COLUMNS]
--- a/data/data-pipeline/etl/sources/calenviroscreen/etl.py
+++ b/data/data-pipeline/etl/sources/calenviroscreen/etl.py
@ -9,16 +9,12 @@ logger = get_module_logger(__name__)
 class CalEnviroScreenETL(ExtractTransformLoad):
    def __init__(self):
        self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/data-sources/CalEnviroScreen_4.0_2021.zip"
-        self.CALENVIROSCREEN_CSV = (
+        self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
            self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
        )
        self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4"
        # Definining some variable names
        self.CALENVIROSCREEN_SCORE_FIELD_NAME = "calenviroscreen_score"
-        self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = (
+        self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = "calenviroscreen_percentile"
            "calenviroscreen_percentile"
        )
        self.CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME = (
            "calenviroscreen_priority_community"
        )
@ -30,14 +26,14 @@ class CalEnviroScreenETL(ExtractTransformLoad):
        self.df: pd.DataFrame
    def extract(self) -> None:
-        logger.info(f"Downloading CalEnviroScreen Data")
+        logger.info("Downloading CalEnviroScreen Data")
        super().extract(
            self.CALENVIROSCREEN_FTP_URL,
            self.TMP_PATH,
        )
    def transform(self) -> None:
-        logger.info(f"Transforming CalEnviroScreen Data")
+        logger.info("Transforming CalEnviroScreen Data")
        # Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:
        # https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip
@ -67,7 +63,7 @@ class CalEnviroScreenETL(ExtractTransformLoad):
        )
    def load(self) -> None:
-        logger.info(f"Saving CalEnviroScreen CSV")
+        logger.info("Saving CalEnviroScreen CSV")
        # write nationwide csv
        self.CSV_PATH.mkdir(parents=True, exist_ok=True)
-        self.df.to_csv(self.CSV_PATH / f"data06.csv", index=False)
+        self.df.to_csv(self.CSV_PATH / "data06.csv", index=False)
--- a/data/data-pipeline/etl/sources/census/etl.py
+++ b/data/data-pipeline/etl/sources/census/etl.py
@ -1,11 +1,12 @@
 import csv
 import os
 import csv
 import json
 from pathlib import Path
 import geopandas as gpd
 from .etl_utils import get_state_fips_codes
 from utils import unzip_file_from_url, get_module_logger
 from .etl_utils import get_state_fips_codes
 logger = get_module_logger(__name__)
@ -29,9 +30,7 @@ def download_census_csvs(data_path: Path) -> None:
    for fips in state_fips_codes:
        # check if file exists
-        shp_file_path = (
+        shp_file_path = data_path / "census" / "shp" / fips / f"tl_2010_{fips}_bg10.shp"
            data_path / "census" / "shp" / fips / f"tl_2010_{fips}_bg10.shp"
        )
        logger.info(f"Checking if {fips} file exists")
        if not os.path.isfile(shp_file_path):
@ -110,7 +109,7 @@ def download_census_csvs(data_path: Path) -> None:
            )
    ## create national geojson
-    logger.info(f"Generating national geojson file")
+    logger.info("Generating national geojson file")
    usa_df = gpd.GeoDataFrame()
    for file_name in geojson_dir_path.rglob("*.json"):
@ -119,7 +118,7 @@ def download_census_csvs(data_path: Path) -> None:
        usa_df = usa_df.append(state_gdf)
    usa_df = usa_df.to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
-    logger.info(f"Writing national geojson file")
+    logger.info("Writing national geojson file")
    usa_df.to_file(geojson_dir_path / "us.json", driver="GeoJSON")
    logger.info("Census block groups downloading complete")
--- a/data/data-pipeline/etl/sources/census/etl_utils.py
+++ b/data/data-pipeline/etl/sources/census/etl_utils.py
@ -1,7 +1,8 @@
 from pathlib import Path
 import csv
 import pandas as pd
 import os
 import csv
 from pathlib import Path
 import pandas as pd
 from config import settings
 from utils import (
@ -35,7 +36,7 @@ def get_state_fips_codes(data_path: Path) -> list:
    # check if file exists
    if not os.path.isfile(fips_csv_path):
-        logger.info(f"Downloading fips from S3 repository")
+        logger.info("Downloading fips from S3 repository")
        unzip_file_from_url(
            settings.AWS_JUSTICE40_DATA_URL + "/Census/fips_states_2010.zip",
            data_path / "tmp",
--- a/data/data-pipeline/etl/sources/census_acs/etl.py
+++ b/data/data-pipeline/etl/sources/census_acs/etl.py
@ -11,14 +11,10 @@ logger = get_module_logger(__name__)
 class CensusACSETL(ExtractTransformLoad):
    def __init__(self):
        self.ACS_YEAR = 2019
-        self.OUTPUT_PATH = (
+        self.OUTPUT_PATH = self.DATA_PATH / "dataset" / f"census_acs_{self.ACS_YEAR}"
            self.DATA_PATH / "dataset" / f"census_acs_{self.ACS_YEAR}"
        )
        self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)"
        self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)"
-        self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = (
+        self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = "Linguistic isolation (total)"
            "Linguistic isolation (total)"
        )
        self.LINGUISTIC_ISOLATION_FIELDS = [
            "C16002_001E",
            "C16002_004E",
@ -28,9 +24,7 @@ class CensusACSETL(ExtractTransformLoad):
        ]
        self.df: pd.DataFrame
-    def _fips_from_censusdata_censusgeo(
+    def _fips_from_censusdata_censusgeo(self, censusgeo: censusdata.censusgeo) -> str:
        self, censusgeo: censusdata.censusgeo
    ) -> str:
        """Create a FIPS code from the proprietary censusgeo index."""
        fips = "".join([value for (key, value) in censusgeo.params()])
        return fips
@ -38,9 +32,7 @@ class CensusACSETL(ExtractTransformLoad):
    def extract(self) -> None:
        dfs = []
        for fips in get_state_fips_codes(self.DATA_PATH):
-            logger.info(
+            logger.info(f"Downloading data for state/territory with FIPS code {fips}")
                f"Downloading data for state/territory with FIPS code {fips}"
            )
            dfs.append(
                censusdata.download(
@ -65,13 +57,11 @@ class CensusACSETL(ExtractTransformLoad):
        )
    def transform(self) -> None:
-        logger.info(f"Starting Census ACS Transform")
+        logger.info("Starting Census ACS Transform")
        # Calculate percent unemployment.
        # TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
-        self.df[self.UNEMPLOYED_FIELD_NAME] = (
+        self.df[self.UNEMPLOYED_FIELD_NAME] = self.df.B23025_005E / self.df.B23025_003E
            self.df.B23025_005E / self.df.B23025_003E
        )
        # Calculate linguistic isolation.
        individual_limited_english_fields = [
@ -92,7 +82,7 @@ class CensusACSETL(ExtractTransformLoad):
        self.df[self.LINGUISTIC_ISOLATION_FIELD_NAME].describe()
    def load(self) -> None:
-        logger.info(f"Saving Census ACS Data")
+        logger.info("Saving Census ACS Data")
        # mkdir census
        self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
@ -108,6 +98,6 @@ class CensusACSETL(ExtractTransformLoad):
        )
    def validate(self) -> None:
-        logger.info(f"Validating Census ACS Data")
+        logger.info("Validating Census ACS Data")
        pass
--- a/data/data-pipeline/etl/sources/ejscreen/etl.py
+++ b/data/data-pipeline/etl/sources/ejscreen/etl.py
@ -8,20 +8,22 @@ logger = get_module_logger(__name__)
 class EJScreenETL(ExtractTransformLoad):
    def __init__(self):
-        self.EJSCREEN_FTP_URL = "https://gaftp.epa.gov/EJSCREEN/2019/EJSCREEN_2019_StatePctile.csv.zip"
+        self.EJSCREEN_FTP_URL = (
            "https://gaftp.epa.gov/EJSCREEN/2019/EJSCREEN_2019_StatePctile.csv.zip"
        )
        self.EJSCREEN_CSV = self.TMP_PATH / "EJSCREEN_2019_StatePctiles.csv"
        self.CSV_PATH = self.DATA_PATH / "dataset" / "ejscreen_2019"
        self.df: pd.DataFrame
    def extract(self) -> None:
-        logger.info(f"Downloading EJScreen Data")
+        logger.info("Downloading EJScreen Data")
        super().extract(
            self.EJSCREEN_FTP_URL,
            self.TMP_PATH,
        )
    def transform(self) -> None:
-        logger.info(f"Transforming EJScreen Data")
+        logger.info("Transforming EJScreen Data")
        self.df = pd.read_csv(
            self.EJSCREEN_CSV,
            dtype={"ID": "string"},
@ -31,7 +33,7 @@ class EJScreenETL(ExtractTransformLoad):
        )
    def load(self) -> None:
-        logger.info(f"Saving EJScreen CSV")
+        logger.info("Saving EJScreen CSV")
        # write nationwide csv
        self.CSV_PATH.mkdir(parents=True, exist_ok=True)
-        self.df.to_csv(self.CSV_PATH / f"usa.csv", index=False)
+        self.df.to_csv(self.CSV_PATH / "usa.csv", index=False)
--- a/data/data-pipeline/etl/sources/housing_and_transportation/etl.py
+++ b/data/data-pipeline/etl/sources/housing_and_transportation/etl.py
@ -35,9 +35,7 @@ class HousingTransportationETL(ExtractTransformLoad):
            )
            # New file name:
-            tmp_csv_file_path = (
+            tmp_csv_file_path = zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
                zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
            )
            tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
            dfs.append(tmp_df)
@ -45,16 +43,16 @@ class HousingTransportationETL(ExtractTransformLoad):
        self.df = pd.concat(dfs)
    def transform(self) -> None:
-        logger.info(f"Transforming Housing and Transportation Data")
+        logger.info("Transforming Housing and Transportation Data")
        # Rename and reformat block group ID
        self.df.rename(columns={"blkgrp": self.GEOID_FIELD_NAME}, inplace=True)
-        self.df[self.GEOID_FIELD_NAME] = self.df[
+        self.df[self.GEOID_FIELD_NAME] = self.df[self.GEOID_FIELD_NAME].str.replace(
-            self.GEOID_FIELD_NAME
+            '"', ""
-        ].str.replace('"', "")
+        )
    def load(self) -> None:
-        logger.info(f"Saving Housing and Transportation Data")
+        logger.info("Saving Housing and Transportation Data")
        self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
        self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)
--- a/data/data-pipeline/etl/sources/hud_housing/etl.py
+++ b/data/data-pipeline/etl/sources/hud_housing/etl.py
@ -1,8 +1,7 @@
 import pandas as pd
 from etl.base import ExtractTransformLoad
-from etl.sources.census.etl_utils import get_state_fips_codes
+from utils import get_module_logger
 from utils import get_module_logger, unzip_file_from_url, remove_all_from_dir
 logger = get_module_logger(__name__)
@ -11,33 +10,37 @@ class HudHousingETL(ExtractTransformLoad):
    def __init__(self):
        self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "hud_housing"
        self.GEOID_TRACT_FIELD_NAME = "GEOID10_TRACT"
-        self.HOUSING_FTP_URL = "https://www.huduser.gov/portal/datasets/cp/2012thru2016-140-csv.zip"
+        self.HOUSING_FTP_URL = (
            "https://www.huduser.gov/portal/datasets/cp/2012thru2016-140-csv.zip"
        )
        self.HOUSING_ZIP_FILE_DIR = self.TMP_PATH / "hud_housing"
        # We measure households earning less than 80% of HUD Area Median Family Income by county
        # and paying greater than 30% of their income to housing costs.
        self.HOUSING_BURDEN_FIELD_NAME = "Housing burden (percent)"
        self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME = "HOUSING_BURDEN_NUMERATOR"
-        self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME = (
+        self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME = "HOUSING_BURDEN_DENOMINATOR"
            "HOUSING_BURDEN_DENOMINATOR"
        )
        # Note: some variable definitions.
        # HUD-adjusted median family income (HAMFI).
-        # The four housing problems are: incomplete kitchen facilities, incomplete plumbing facilities, more than 1 person per room, and cost burden greater than 30%.
+        # The four housing problems are:
        #   - incomplete kitchen facilities,
        #   - incomplete plumbing facilities,
        #   - more than 1 person per room,
        #   - cost burden greater than 30%.
        # Table 8 is the desired table.
        self.df: pd.DataFrame
    def extract(self) -> None:
-        logger.info(f"Extracting HUD Housing Data")
+        logger.info("Extracting HUD Housing Data")
        super().extract(
            self.HOUSING_FTP_URL,
            self.HOUSING_ZIP_FILE_DIR,
        )
    def transform(self) -> None:
-        logger.info(f"Transforming HUD Housing Data")
+        logger.info("Transforming HUD Housing Data")
        # New file name:
        tmp_csv_file_path = (
@ -53,9 +56,7 @@ class HudHousingETL(ExtractTransformLoad):
        )
        # Rename and reformat block group ID
-        self.df.rename(
+        self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)
            columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True
        )
        # The CHAS data has census tract ids such as `14000US01001020100`
        # Whereas the rest of our data uses, for the same tract, `01001020100`.
@ -70,69 +71,177 @@ class HudHousingETL(ExtractTransformLoad):
        # Owner occupied numerator fields
        OWNER_OCCUPIED_NUMERATOR_FIELDS = [
-            # Key: Column Name	Line_Type	Tenure	Household income	Cost burden	Facilities
+            # Column Name
-            # T8_est7	Subtotal	Owner occupied	less than or equal to 30% of HAMFI	greater than 30% but less than or equal to 50%	All
+            #   Line_Type
            #   Tenure
            #   Household income
            #   Cost burden
            #   Facilities
            "T8_est7",
-            # T8_est10	Subtotal	Owner occupied	less than or equal to 30% of HAMFI	greater than 50%	All
+            #   Subtotal
            #   Owner occupied
            #   less than or equal to 30% of HAMFI
            #   greater than 30% but less than or equal to 50%
            #   All
            "T8_est10",
-            # T8_est20	Subtotal	Owner occupied	greater than 30% but less than or equal to 50% of HAMFI	greater than 30% but less than or equal to 50%	All
+            #   Subtotal
            #   Owner occupied
            #   less than or equal to 30% of HAMFI
            #   greater than 50%
            #   All
            "T8_est20",
-            # T8_est23	Subtotal	Owner occupied	greater than 30% but less than or equal to 50% of HAMFI	greater than 50%	All
+            #   Subtotal
            #   Owner occupied
            #   greater than 30% but less than or equal to 50% of HAMFI
            #   greater than 30% but less than or equal to 50%
            #   All
            "T8_est23",
-            # T8_est33	Subtotal	Owner occupied	greater than 50% but less than or equal to 80% of HAMFI	greater than 30% but less than or equal to 50%	All
+            #   Subtotal
            #   Owner occupied
            #   greater than 30% but less than or equal to 50% of HAMFI
            #   greater than 50%
            #   All
            "T8_est33",
-            # T8_est36	Subtotal	Owner occupied	greater than 50% but less than or equal to 80% of HAMFI	greater than 50%	All
+            #   Subtotal
            #   Owner occupied
            #   greater than 50% but less than or equal to 80% of HAMFI
            #   greater than 30% but less than or equal to 50%
            #   All
            "T8_est36",
            #   Subtotal
            #   Owner occupied
            #   greater than 50% but less than or equal to 80% of HAMFI
            #   greater than 50%
            #   All
        ]
        # These rows have the values where HAMFI was not computed, b/c of no or negative income.
        OWNER_OCCUPIED_NOT_COMPUTED_FIELDS = [
-            # Key: Column Name	Line_Type	Tenure	Household income	Cost burden	Facilities
+            # Column Name
-            # T8_est13	Subtotal	Owner occupied	less than or equal to 30% of HAMFI	not computed (no/negative income)	All
+            #   Line_Type
            #   Tenure
            #   Household income
            #   Cost burden
            #   Facilities
            "T8_est13",
-            # T8_est26	Subtotal	Owner occupied	greater than 30% but less than or equal to 50% of HAMFI	not computed (no/negative income)	All
+            #   Subtotal
            #   Owner occupied
            #   less than or equal to 30% of HAMFI
            #   not computed (no/negative income)
            #   All
            "T8_est26",
-            # T8_est39	Subtotal	Owner occupied	greater than 50% but less than or equal to 80% of HAMFI	not computed (no/negative income)	All
+            #   Subtotal
            #   Owner occupied
            #   greater than 30% but less than or equal to 50% of HAMFI
            #   not computed (no/negative income)
            #   All
            "T8_est39",
-            # T8_est52	Subtotal	Owner occupied	greater than 80% but less than or equal to 100% of HAMFI	not computed (no/negative income)	All
+            #   Subtotal
            #   Owner occupied
            #   greater than 50% but less than or equal to 80% of HAMFI
            #   not computed (no/negative income)
            #   All
            "T8_est52",
-            # T8_est65	Subtotal	Owner occupied	greater than 100% of HAMFI	not computed (no/negative income)	All
+            #   Subtotal
            #   Owner occupied
            #   greater than 80% but less than or equal to 100% of HAMFI
            #   not computed (no/negative income)
            #   All
            "T8_est65",
            #   Subtotal
            #   Owner occupied
            #   greater than 100% of HAMFI
            #   not computed (no/negative income)
            #   All
        ]
        # T8_est2	Subtotal	Owner occupied	All	All	All
        OWNER_OCCUPIED_POPULATION_FIELD = "T8_est2"
        #   Subtotal
        #   Owner occupied
        #   All
        #   All
        #   All
        # Renter occupied numerator fields
        RENTER_OCCUPIED_NUMERATOR_FIELDS = [
-            # Key: Column Name	Line_Type	Tenure	Household income	Cost burden	Facilities
+            # Column Name
-            # T8_est73	Subtotal	Renter occupied	less than or equal to 30% of HAMFI	greater than 30% but less than or equal to 50%	All
+            #   Line_Type
            #   Tenure
            #   Household income
            #   Cost burden
            #   Facilities
            "T8_est73",
-            # T8_est76	Subtotal	Renter occupied	less than or equal to 30% of HAMFI	greater than 50%	All
+            #   Subtotal
            #   Renter occupied
            #   less than or equal to 30% of HAMFI
            #   greater than 30% but less than or equal to 50%
            #   All
            "T8_est76",
-            # T8_est86	Subtotal	Renter occupied	greater than 30% but less than or equal to 50% of HAMFI	greater than 30% but less than or equal to 50%	All
+            #   Subtotal
            #   Renter occupied
            #   less than or equal to 30% of HAMFI
            #   greater than 50%
            #   All
            "T8_est86",
-            # T8_est89	Subtotal	Renter occupied	greater than 30% but less than or equal to 50% of HAMFI	greater than 50%	All
+            #   Subtotal
            #   Renter occupied
            #   greater than 30% but less than or equal to 50% of HAMFI
            #   greater than 30% but less than or equal to 50%
            #   All
            "T8_est89",
-            # T8_est99	Subtotal	Renter occupied	greater than 50% but less than or equal to 80% of HAMFI	greater than 30% but less than or equal to 50%	All
+            #   Subtotal
            #   Renter occupied
            #   greater than 30% but less than or equal to 50% of HAMFI
            #   greater than 50%
            #   All
            "T8_est99",
-            # T8_est102	Subtotal	Renter occupied	greater than 50% but less than or equal to 80% of HAMFI	greater than 50%	All
+            #   Subtotal
            #   Renter occupied	greater than 50% but less than or equal to 80% of HAMFI
            #   greater than 30% but less than or equal to 50%
            #   All
            "T8_est102",
            #   Subtotal
            #   Renter occupied
            #   greater than 50% but less than or equal to 80% of HAMFI
            #   greater than 50%
            #   All
        ]
        # These rows have the values where HAMFI was not computed, b/c of no or negative income.
        RENTER_OCCUPIED_NOT_COMPUTED_FIELDS = [
-            # Key: Column Name	Line_Type	Tenure	Household income	Cost burden	Facilities
+            # Column Name
-            # T8_est79	Subtotal	Renter occupied	less than or equal to 30% of HAMFI	not computed (no/negative income)	All
+            #   Line_Type
            #   Tenure
            #   Household income
            #   Cost burden
            #   Facilities
            "T8_est79",
-            # T8_est92	Subtotal	Renter occupied	greater than 30% but less than or equal to 50% of HAMFI	not computed (no/negative income)	All
+            #   Subtotal
            #   Renter occupied	less than or equal to 30% of HAMFI
            #   not computed (no/negative income)
            #   All
            "T8_est92",
-            # T8_est105	Subtotal	Renter occupied	greater than 50% but less than or equal to 80% of HAMFI	not computed (no/negative income)	All
+            #   Subtotal
            #   Renter occupied	greater than 30% but less than or equal to 50% of HAMFI
            #   not computed (no/negative income)
            #   All
            "T8_est105",
-            # T8_est118	Subtotal	Renter occupied	greater than 80% but less than or equal to 100% of HAMFI	not computed (no/negative income)	All
+            #   Subtotal
            #   Renter occupied
            #   greater than 50% but less than or equal to 80% of HAMFI
            #   not computed (no/negative income)
            #   All
            "T8_est118",
-            # T8_est131	Subtotal	Renter occupied	greater than 100% of HAMFI	not computed (no/negative income)	All
+            #   Subtotal
            #   Renter occupied	greater than 80% but less than or equal to 100% of HAMFI
            #   not computed (no/negative income)
            #   All
            "T8_est131",
            #   Subtotal
            #   Renter occupied
            #   greater than 100% of HAMFI
            #   not computed (no/negative income)
            #   All
        ]
        # T8_est68	Subtotal	Renter occupied	All	All	All
@ -165,14 +274,12 @@ class HudHousingETL(ExtractTransformLoad):
        # TODO: add small sample size checks
        self.df[self.HOUSING_BURDEN_FIELD_NAME] = self.df[
            self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME
-        ].astype(float) / self.df[
+        ].astype(float) / self.df[self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME].astype(
            self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME
        ].astype(
            float
        )
    def load(self) -> None:
-        logger.info(f"Saving HUD Housing Data")
+        logger.info("Saving HUD Housing Data")
        self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
--- a/data/data-pipeline/etl/sources/hud_recap/etl.py
+++ b/data/data-pipeline/etl/sources/hud_recap/etl.py
@ -9,7 +9,8 @@ logger = get_module_logger(__name__)
 class HudRecapETL(ExtractTransformLoad):
    def __init__(self):
-        self.HUD_RECAP_CSV_URL = "https://opendata.arcgis.com/api/v3/datasets/56de4edea8264fe5a344da9811ef5d6e_0/downloads/data?format=csv&spatialRefId=4326"
+        # pylint: disable=line-too-long
        self.HUD_RECAP_CSV_URL = "https://opendata.arcgis.com/api/v3/datasets/56de4edea8264fe5a344da9811ef5d6e_0/downloads/data?format=csv&spatialRefId=4326"  # noqa: E501
        self.HUD_RECAP_CSV = (
            self.TMP_PATH
            / "Racially_or_Ethnically_Concentrated_Areas_of_Poverty__R_ECAPs_.csv"
@ -22,7 +23,7 @@ class HudRecapETL(ExtractTransformLoad):
        self.df: pd.DataFrame
    def extract(self) -> None:
-        logger.info(f"Downloading HUD Recap Data")
+        logger.info("Downloading HUD Recap Data")
        download = requests.get(self.HUD_RECAP_CSV_URL, verify=None)
        file_contents = download.content
        csv_file = open(self.HUD_RECAP_CSV, "wb")
@ -30,7 +31,7 @@ class HudRecapETL(ExtractTransformLoad):
        csv_file.close()
    def transform(self) -> None:
-        logger.info(f"Transforming HUD Recap Data")
+        logger.info("Transforming HUD Recap Data")
        # Load comparison index (CalEnviroScreen 4)
        self.df = pd.read_csv(self.HUD_RECAP_CSV, dtype={"GEOID": "string"})
@ -57,7 +58,7 @@ class HudRecapETL(ExtractTransformLoad):
        self.df.sort_values(by=self.GEOID_TRACT_FIELD_NAME, inplace=True)
    def load(self) -> None:
-        logger.info(f"Saving HUD Recap CSV")
+        logger.info("Saving HUD Recap CSV")
        # write nationwide csv
        self.CSV_PATH.mkdir(parents=True, exist_ok=True)
-        self.df.to_csv(self.CSV_PATH / f"usa.csv", index=False)
+        self.df.to_csv(self.CSV_PATH / "usa.csv", index=False)
--- a/data/data-pipeline/etl/sources/tree_equity_score/etl.py
+++ b/data/data-pipeline/etl/sources/tree_equity_score/etl.py
@ -3,25 +3,72 @@ import geopandas as gpd
 from etl.base import ExtractTransformLoad
 from utils import get_module_logger
 import os
 logger = get_module_logger(__name__)
 class TreeEquityScoreETL(ExtractTransformLoad):
    def __init__(self):
-        self.TES_URL = "https://national-tes-data-share.s3.amazonaws.com/national_tes_share/"
+        self.TES_URL = (
            "https://national-tes-data-share.s3.amazonaws.com/national_tes_share/"
        )
        self.TES_CSV = self.TMP_PATH / "tes_2021_data.csv"
        self.CSV_PATH = self.DATA_PATH / "dataset" / "tree_equity_score"
        self.df: gpd.GeoDataFrame
-        self.states =  ["al", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", 
+        self.states = [
-        "ga", "id", "il", "in", "ia", "ks", "ky", "la", "me", 
+            "al",
-        "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", 
+            "az",
-        "nj", "nm", "ny", "nc", "nd", "oh", "ok", "or", "pa", 
+            "ar",
-        "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy"]
+            "ca",
            "co",
            "ct",
            "de",
            "dc",
            "fl",
            "ga",
            "id",
            "il",
            "in",
            "ia",
            "ks",
            "ky",
            "la",
            "me",
            "md",
            "ma",
            "mi",
            "mn",
            "ms",
            "mo",
            "mt",
            "ne",
            "nv",
            "nh",
            "nj",
            "nm",
            "ny",
            "nc",
            "nd",
            "oh",
            "ok",
            "or",
            "pa",
            "ri",
            "sc",
            "sd",
            "tn",
            "tx",
            "ut",
            "vt",
            "va",
            "wa",
            "wv",
            "wi",
            "wy",
        ]
    def extract(self) -> None:
-        logger.info(f"Downloading Tree Equity Score Data")
+        logger.info("Downloading Tree Equity Score Data")
        for state in self.states:
            super().extract(
                f"{self.TES_URL}{state}.zip.zip",
@ -29,14 +76,14 @@ class TreeEquityScoreETL(ExtractTransformLoad):
            )
    def transform(self) -> None:
-        logger.info(f"Transforming Tree Equity Score Data")
+        logger.info("Transforming Tree Equity Score Data")
        tes_state_dfs = []
        for state in self.states:
            tes_state_dfs.append(gpd.read_file(f"{self.TMP_PATH}/{state}/{state}.shp"))
        self.df = gpd.GeoDataFrame(pd.concat(tes_state_dfs), crs=tes_state_dfs[0].crs)
    def load(self) -> None:
-        logger.info(f"Saving Tree Equity Score GeoJSON")
+        logger.info("Saving Tree Equity Score GeoJSON")
        # write nationwide csv
        self.CSV_PATH.mkdir(parents=True, exist_ok=True)
-        self.df.to_file(self.CSV_PATH / "tes_conus.geojson", driver='GeoJSON')
+        self.df.to_file(self.CSV_PATH / "tes_conus.geojson", driver="GeoJSON")
--- a/data/data-pipeline/poetry.lock
+++ b/data/data-pipeline/poetry.lock
@ -31,6 +31,20 @@ dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest", "sphinx", "wheel", "p
 docs = ["sphinx"]
 tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest"]
 [[package]]
 name = "astroid"
 version = "2.6.5"
 description = "An abstract syntax tree for Python with inference support."
 category = "dev"
 optional = false
 python-versions = "~=3.6"
 [package.dependencies]
 lazy-object-proxy = ">=1.4.0"
 typed-ast = {version = ">=1.4.0,<1.5", markers = "implementation_name == \"cpython\" and python_version < \"3.8\""}
 typing-extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""}
 wrapt = ">=1.11,<1.13"
 [[package]]
 name = "async-generator"
 version = "1.10"
@ -203,6 +217,18 @@ category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
 [[package]]
 name = "configparser"
 version = "5.0.2"
 description = "Updated configparser from Python 3.8 for Python 2.6+."
 category = "dev"
 optional = false
 python-versions = ">=3.6"
 [package.extras]
 docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
 testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pytest-cov", "pytest-enabler", "pytest-black (>=0.3.7)", "pytest-mypy"]
 [[package]]
 name = "debugpy"
 version = "1.4.0"
@ -235,6 +261,22 @@ category = "dev"
 optional = false
 python-versions = "*"
 [[package]]
 name = "dparse"
 version = "0.5.1"
 description = "A parser for Python dependency files"
 category = "dev"
 optional = false
 python-versions = ">=3.5"
 [package.dependencies]
 packaging = "*"
 pyyaml = "*"
 toml = "*"
 [package.extras]
 pipenv = ["pipenv"]
 [[package]]
 name = "dynaconf"
 version = "3.1.4"
@ -291,6 +333,20 @@ calc = ["shapely"]
 s3 = ["boto3 (>=1.2.4)"]
 test = ["pytest (>=3)", "pytest-cov", "boto3 (>=1.2.4)", "mock"]
 [[package]]
 name = "flake8"
 version = "3.9.2"
 description = "the modular source code checker: pep8 pyflakes and co"
 category = "dev"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 [package.dependencies]
 importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
 mccabe = ">=0.6.0,<0.7.0"
 pycodestyle = ">=2.7.0,<2.8.0"
 pyflakes = ">=2.3.0,<2.4.0"
 [[package]]
 name = "geopandas"
 version = "0.9.0"
@ -409,6 +465,20 @@ widgetsnbextension = ">=3.5.0,<3.6.0"
 [package.extras]
 test = ["pytest (>=3.6.0)", "pytest-cov", "mock"]
 [[package]]
 name = "isort"
 version = "5.9.3"
 description = "A Python utility / library to sort Python imports."
 category = "dev"
 optional = false
 python-versions = ">=3.6.1,<4.0"
 [package.extras]
 pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
 requirements_deprecated_finder = ["pipreqs", "pip-api"]
 colors = ["colorama (>=0.4.3,<0.5.0)"]
 plugins = ["setuptools"]
 [[package]]
 name = "jedi"
 version = "0.18.0"
@ -625,6 +695,27 @@ category = "main"
 optional = false
 python-versions = ">=3.6"
 [[package]]
 name = "lazy-object-proxy"
 version = "1.6.0"
 description = "A fast and thorough lazy object proxy."
 category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 [[package]]
 name = "liccheck"
 version = "0.6.2"
 description = "Check python packages from requirement.txt and report issues"
 category = "dev"
 optional = false
 python-versions = ">=2.7"
 [package.dependencies]
 configparser = {version = "*", markers = "python_version >= \"3.4\""}
 semantic-version = ">=2.7.0"
 toml = "*"
 [[package]]
 name = "lxml"
 version = "4.6.3"
@ -658,6 +749,14 @@ python-versions = ">=3.5"
 [package.dependencies]
 traitlets = "*"
 [[package]]
 name = "mccabe"
 version = "0.6.1"
 description = "McCabe checker, plugin for flake8"
 category = "dev"
 optional = false
 python-versions = "*"
 [[package]]
 name = "mistune"
 version = "0.8.4"
@ -954,6 +1053,14 @@ category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 [[package]]
 name = "pycodestyle"
 version = "2.7.0"
 description = "Python style guide checker"
 category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 [[package]]
 name = "pycparser"
 version = "2.20"
@ -962,6 +1069,14 @@ category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 [[package]]
 name = "pyflakes"
 version = "2.3.1"
 description = "passive checker of Python programs"
 category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 [[package]]
 name = "pygments"
 version = "2.9.0"
@ -970,6 +1085,21 @@ category = "main"
 optional = false
 python-versions = ">=3.5"
 [[package]]
 name = "pylint"
 version = "2.9.6"
 description = "python code static checker"
 category = "dev"
 optional = false
 python-versions = "~=3.6"
 [package.dependencies]
 astroid = ">=2.6.5,<2.7"
 colorama = {version = "*", markers = "sys_platform == \"win32\""}
 isort = ">=4.2.5,<6"
 mccabe = ">=0.6,<0.7"
 toml = ">=0.7.1"
 [[package]]
 name = "pyparsing"
 version = "2.4.7"
@ -1108,6 +1238,28 @@ urllib3 = ">=1.21.1,<1.27"
 socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
 use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
 [[package]]
 name = "safety"
 version = "1.10.3"
 description = "Checks installed dependencies for known vulnerabilities."
 category = "dev"
 optional = false
 python-versions = ">=3.5"
 [package.dependencies]
 Click = ">=6.0"
 dparse = ">=0.5.1"
 packaging = "*"
 requests = "*"
 [[package]]
 name = "semantic-version"
 version = "2.8.5"
 description = "A library implementing the 'SemVer' scheme."
 category = "dev"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 [[package]]
 name = "send2trash"
 version = "1.7.1"
@ -1312,6 +1464,14 @@ python-versions = "*"
 [package.dependencies]
 notebook = ">=4.4.1"
 [[package]]
 name = "wrapt"
 version = "1.12.1"
 description = "Module for decorators, wrappers and monkey patching."
 category = "dev"
 optional = false
 python-versions = "*"
 [[package]]
 name = "zipp"
 version = "3.5.0"
@ -1327,7 +1487,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.7.1"
-content-hash = "e6692af9b40f2508a858739de08cb9c1a2e86b54a219b8196ca736981a61ce4d"
+content-hash = "705b0cf25d9ecd3028ba5b71581b5139608cb3b0b4d13c4817b4f3a49643308c"
 [metadata.files]
 appdirs = [
@ -1362,6 +1522,10 @@ argon2-cffi = [
    {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3aa804c0e52f208973845e8b10c70d8957c9e5a666f702793256242e9167c4e0"},
    {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:36320372133a003374ef4275fbfce78b7ab581440dfca9f9471be3dd9a522428"},
 ]
 astroid = [
    {file = "astroid-2.6.5-py3-none-any.whl", hash = "sha256:7b963d1c590d490f60d2973e57437115978d3a2529843f160b5003b721e1e925"},
    {file = "astroid-2.6.5.tar.gz", hash = "sha256:83e494b02d75d07d4e347b27c066fd791c0c74fc96c613d1ea3de0c82c48168f"},
 ]
 async-generator = [
    {file = "async_generator-1.10-py3-none-any.whl", hash = "sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b"},
    {file = "async_generator-1.10.tar.gz", hash = "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"},
@ -1460,6 +1624,10 @@ colorama = [
    {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
    {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
 ]
 configparser = [
    {file = "configparser-5.0.2-py3-none-any.whl", hash = "sha256:af59f2cdd7efbdd5d111c1976ecd0b82db9066653362f0962d7bf1d3ab89a1fa"},
    {file = "configparser-5.0.2.tar.gz", hash = "sha256:85d5de102cfe6d14a5172676f09d19c465ce63d6019cf0a4ef13385fc535e828"},
 ]
 debugpy = [
    {file = "debugpy-1.4.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:55d12ee03b3b705af5250b8344a87fbd9bb720d00bd9d281d2998dbf9f60c8d3"},
    {file = "debugpy-1.4.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:712ef6a4be1ee4b9a954c6f36788ac12686dc1d5eeef501e0b81e1c89c16484d"},
@ -1530,6 +1698,10 @@ distlib = [
    {file = "distlib-0.3.2-py2.py3-none-any.whl", hash = "sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c"},
    {file = "distlib-0.3.2.zip", hash = "sha256:106fef6dc37dd8c0e2c0a60d3fca3e77460a48907f335fa28420463a6f799736"},
 ]
 dparse = [
    {file = "dparse-0.5.1-py3-none-any.whl", hash = "sha256:e953a25e44ebb60a5c6efc2add4420c177f1d8404509da88da9729202f306994"},
    {file = "dparse-0.5.1.tar.gz", hash = "sha256:a1b5f169102e1c894f9a7d5ccf6f9402a836a5d24be80a986c7ce9eaed78f367"},
 ]
 dynaconf = [
    {file = "dynaconf-3.1.4-py2.py3-none-any.whl", hash = "sha256:e6f383b84150b70fc439c8b2757581a38a58d07962aa14517292dcce1a77e160"},
    {file = "dynaconf-3.1.4.tar.gz", hash = "sha256:b2f472d83052f809c5925565b8a2ba76a103d5dc1dbb9748b693ed67212781b9"},
@ -1553,6 +1725,10 @@ fiona = [
    {file = "Fiona-1.8.20-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e72e4a5b84ec410be531d4fe4c1a5c87c6c0e92d01116c145c0f1b33f81c8080"},
    {file = "Fiona-1.8.20.tar.gz", hash = "sha256:a70502d2857b82f749c09cb0dea3726787747933a2a1599b5ab787d74e3c143b"},
 ]
 flake8 = [
    {file = "flake8-3.9.2-py2.py3-none-any.whl", hash = "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"},
    {file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"},
 ]
 geopandas = [
    {file = "geopandas-0.9.0-py2.py3-none-any.whl", hash = "sha256:79f6e557ba0dba76eec44f8351b1c6b42a17c38f5f08fef347e98fe4dae563c7"},
    {file = "geopandas-0.9.0.tar.gz", hash = "sha256:63972ab4dc44c4029f340600dcb83264eb8132dd22b104da0b654bef7f42630a"},
@ -1581,6 +1757,10 @@ ipywidgets = [
    {file = "ipywidgets-7.6.3-py2.py3-none-any.whl", hash = "sha256:e6513cfdaf5878de30f32d57f6dc2474da395a2a2991b94d487406c0ab7f55ca"},
    {file = "ipywidgets-7.6.3.tar.gz", hash = "sha256:9f1a43e620530f9e570e4a493677d25f08310118d315b00e25a18f12913c41f0"},
 ]
 isort = [
    {file = "isort-5.9.3-py3-none-any.whl", hash = "sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2"},
    {file = "isort-5.9.3.tar.gz", hash = "sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899"},
 ]
 jedi = [
    {file = "jedi-0.18.0-py2.py3-none-any.whl", hash = "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93"},
    {file = "jedi-0.18.0.tar.gz", hash = "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707"},
@ -1636,6 +1816,34 @@ jupyterlab-widgets = [
    {file = "jupyterlab_widgets-1.0.0-py3-none-any.whl", hash = "sha256:caeaf3e6103180e654e7d8d2b81b7d645e59e432487c1d35a41d6d3ee56b3fef"},
    {file = "jupyterlab_widgets-1.0.0.tar.gz", hash = "sha256:5c1a29a84d3069208cb506b10609175b249b6486d6b1cbae8fcde2a11584fb78"},
 ]
 lazy-object-proxy = [
    {file = "lazy-object-proxy-1.6.0.tar.gz", hash = "sha256:489000d368377571c6f982fba6497f2aa13c6d1facc40660963da62f5c379726"},
    {file = "lazy_object_proxy-1.6.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:c6938967f8528b3668622a9ed3b31d145fab161a32f5891ea7b84f6b790be05b"},
    {file = "lazy_object_proxy-1.6.0-cp27-cp27m-win32.whl", hash = "sha256:ebfd274dcd5133e0afae738e6d9da4323c3eb021b3e13052d8cbd0e457b1256e"},
    {file = "lazy_object_proxy-1.6.0-cp27-cp27m-win_amd64.whl", hash = "sha256:ed361bb83436f117f9917d282a456f9e5009ea12fd6de8742d1a4752c3017e93"},
    {file = "lazy_object_proxy-1.6.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d900d949b707778696fdf01036f58c9876a0d8bfe116e8d220cfd4b15f14e741"},
    {file = "lazy_object_proxy-1.6.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:5743a5ab42ae40caa8421b320ebf3a998f89c85cdc8376d6b2e00bd12bd1b587"},
    {file = "lazy_object_proxy-1.6.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:bf34e368e8dd976423396555078def5cfc3039ebc6fc06d1ae2c5a65eebbcde4"},
    {file = "lazy_object_proxy-1.6.0-cp36-cp36m-win32.whl", hash = "sha256:b579f8acbf2bdd9ea200b1d5dea36abd93cabf56cf626ab9c744a432e15c815f"},
    {file = "lazy_object_proxy-1.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:4f60460e9f1eb632584c9685bccea152f4ac2130e299784dbaf9fae9f49891b3"},
    {file = "lazy_object_proxy-1.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:d7124f52f3bd259f510651450e18e0fd081ed82f3c08541dffc7b94b883aa981"},
    {file = "lazy_object_proxy-1.6.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:22ddd618cefe54305df49e4c069fa65715be4ad0e78e8d252a33debf00f6ede2"},
    {file = "lazy_object_proxy-1.6.0-cp37-cp37m-win32.whl", hash = "sha256:9d397bf41caad3f489e10774667310d73cb9c4258e9aed94b9ec734b34b495fd"},
    {file = "lazy_object_proxy-1.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:24a5045889cc2729033b3e604d496c2b6f588c754f7a62027ad4437a7ecc4837"},
    {file = "lazy_object_proxy-1.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:17e0967ba374fc24141738c69736da90e94419338fd4c7c7bef01ee26b339653"},
    {file = "lazy_object_proxy-1.6.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:410283732af311b51b837894fa2f24f2c0039aa7f220135192b38fcc42bd43d3"},
    {file = "lazy_object_proxy-1.6.0-cp38-cp38-win32.whl", hash = "sha256:85fb7608121fd5621cc4377a8961d0b32ccf84a7285b4f1d21988b2eae2868e8"},
    {file = "lazy_object_proxy-1.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:d1c2676e3d840852a2de7c7d5d76407c772927addff8d742b9808fe0afccebdf"},
    {file = "lazy_object_proxy-1.6.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:b865b01a2e7f96db0c5d12cfea590f98d8c5ba64ad222300d93ce6ff9138bcad"},
    {file = "lazy_object_proxy-1.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4732c765372bd78a2d6b2150a6e99d00a78ec963375f236979c0626b97ed8e43"},
    {file = "lazy_object_proxy-1.6.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:9698110e36e2df951c7c36b6729e96429c9c32b3331989ef19976592c5f3c77a"},
    {file = "lazy_object_proxy-1.6.0-cp39-cp39-win32.whl", hash = "sha256:1fee665d2638491f4d6e55bd483e15ef21f6c8c2095f235fef72601021e64f61"},
    {file = "lazy_object_proxy-1.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:f5144c75445ae3ca2057faac03fda5a902eff196702b0a24daf1d6ce0650514b"},
 ]
 liccheck = [
    {file = "liccheck-0.6.2-py2.py3-none-any.whl", hash = "sha256:e6583fc327126695a31a7ed8941e784ecd5c84bb2aecbe2782d925cac5c3fe47"},
    {file = "liccheck-0.6.2.tar.gz", hash = "sha256:5667be7c9ef6496bd381e709e938e9fe51c31d601afc44965615cdfbce375eab"},
 ]
 lxml = [
    {file = "lxml-4.6.3-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2"},
    {file = "lxml-4.6.3-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f"},
@ -1724,6 +1932,10 @@ matplotlib-inline = [
    {file = "matplotlib-inline-0.1.2.tar.gz", hash = "sha256:f41d5ff73c9f5385775d5c0bc13b424535c8402fe70ea8210f93e11f3683993e"},
    {file = "matplotlib_inline-0.1.2-py3-none-any.whl", hash = "sha256:5cf1176f554abb4fa98cb362aa2b55c500147e4bdbb07e3fda359143e1da0811"},
 ]
 mccabe = [
    {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
    {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
 ]
 mistune = [
    {file = "mistune-0.8.4-py2.py3-none-any.whl", hash = "sha256:88a1051873018da288eee8538d476dffe1262495144b33ecb586c4ab266bb8d4"},
    {file = "mistune-0.8.4.tar.gz", hash = "sha256:59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e"},
@ -1879,14 +2091,26 @@ py = [
    {file = "py-1.10.0-py2.py3-none-any.whl", hash = "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"},
    {file = "py-1.10.0.tar.gz", hash = "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3"},
 ]
 pycodestyle = [
    {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"},
    {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"},
 ]
 pycparser = [
    {file = "pycparser-2.20-py2.py3-none-any.whl", hash = "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"},
    {file = "pycparser-2.20.tar.gz", hash = "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0"},
 ]
 pyflakes = [
    {file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"},
    {file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"},
 ]
 pygments = [
    {file = "Pygments-2.9.0-py3-none-any.whl", hash = "sha256:d66e804411278594d764fc69ec36ec13d9ae9147193a1740cd34d272ca383b8e"},
    {file = "Pygments-2.9.0.tar.gz", hash = "sha256:a18f47b506a429f6f4b9df81bb02beab9ca21d0a5fee38ed15aef65f0545519f"},
 ]
 pylint = [
    {file = "pylint-2.9.6-py3-none-any.whl", hash = "sha256:2e1a0eb2e8ab41d6b5dbada87f066492bb1557b12b76c47c2ee8aa8a11186594"},
    {file = "pylint-2.9.6.tar.gz", hash = "sha256:8b838c8983ee1904b2de66cce9d0b96649a91901350e956d78f289c3bc87b48e"},
 ]
 pyparsing = [
    {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"},
    {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"},
@ -2082,6 +2306,14 @@ requests = [
    {file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"},
    {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},
 ]
 safety = [
    {file = "safety-1.10.3-py2.py3-none-any.whl", hash = "sha256:5f802ad5df5614f9622d8d71fedec2757099705c2356f862847c58c6dfe13e84"},
    {file = "safety-1.10.3.tar.gz", hash = "sha256:30e394d02a20ac49b7f65292d19d38fa927a8f9582cdfd3ad1adbbc66c641ad5"},
 ]
 semantic-version = [
    {file = "semantic_version-2.8.5-py2.py3-none-any.whl", hash = "sha256:45e4b32ee9d6d70ba5f440ec8cc5221074c7f4b0e8918bdab748cc37912440a9"},
    {file = "semantic_version-2.8.5.tar.gz", hash = "sha256:d2cb2de0558762934679b9a104e82eca7af448c9f4974d1f3eeccff651df8a54"},
 ]
 send2trash = [
    {file = "Send2Trash-1.7.1-py3-none-any.whl", hash = "sha256:c20fee8c09378231b3907df9c215ec9766a84ee20053d99fbad854fe8bd42159"},
    {file = "Send2Trash-1.7.1.tar.gz", hash = "sha256:17730aa0a33ab82ed6ca76be3bb25f0433d0014f1ccf63c979bab13a5b9db2b2"},
@ -2243,6 +2475,9 @@ widgetsnbextension = [
    {file = "widgetsnbextension-3.5.1-py2.py3-none-any.whl", hash = "sha256:bd314f8ceb488571a5ffea6cc5b9fc6cba0adaf88a9d2386b93a489751938bcd"},
    {file = "widgetsnbextension-3.5.1.tar.gz", hash = "sha256:079f87d87270bce047512400efd70238820751a11d2d8cb137a5a5bdbaf255c7"},
 ]
 wrapt = [
    {file = "wrapt-1.12.1.tar.gz", hash = "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"},
 ]
 zipp = [
    {file = "zipp-3.5.0-py3-none-any.whl", hash = "sha256:957cfda87797e389580cb8b9e3870841ca991e2125350677b2ca83a0e99390a3"},
    {file = "zipp-3.5.0.tar.gz", hash = "sha256:f5812b1e007e48cff63449a5e9f4e7ebea716b4111f9c4f9a645f91d579bf0c4"},
--- a/data/data-pipeline/pyproject.toml
+++ b/data/data-pipeline/pyproject.toml
@ -25,7 +25,79 @@ types-requests = "^2.25.0"
 black = {version = "^21.6b0", allow-prereleases = true}
 mypy = "^0.910"
 tox = "^3.24.0"
 flake8 = "^3.9.2"
 pylint = "^2.9.6"
 liccheck = "^0.6.2"
 safety = "^1.10.3"
 [build-system]
 build-backend = "poetry.core.masonry.api"
 requires = ["poetry-core>=1.0.0"]
 [tool.pylint]
 [tool.pylint."MESSAGE CONTROL"]
 disable = [
  "C0114",  # Disables module docstrings
  "R0201",  # Disables method could have been a function
  "R0903",  # Disables too few public methods
  "C0103",  # Disables name case styling
  "W0511",  # Disables FIXME warning
  "W1203",  # Disables f-string interpolation for logging warning
  # Errors temporarily ignored for further discussion
  "W0107",  # Disables unnecessary pass
  "W0221",  # Disables arguments differ
  "R0902",  # Disables too many instance attributes
  "R0914",  # Disables too many local variables
  "W0621",  # Disables redefined outer name
  "C0302",  # Disables too many lines in module
  "R1732",  # Disables consider using "with"
  "R1720",  # Disables unnecessary "else" after "raise"
  "C0206",  # Disables consider iteratig with ".items()"
  "C0200",  # Disables consider using "enumerate" instead of "range" + "len"
  "W0612",  # Disables unused variable
  "W0613",  # Disables unused argument
  "C0116",  # Disables missing function or method docstring
  "C0115",  # Disables missing class docstring
 ]
 [tool.pylint.FORMAT]
 max-line-length=150
 [tool.pylint.SIMILARITIES]
 # Configures how pylint detects repetitive code
 min-similarity-lines = 4
 ignore-comments = "yes"
 ignore-docstrings = "yes"
 ignore-imports = "yes"
 [tool.liccheck]
 # Authorized and unauthorized licenses in LOWER CASE
 authorized_licenses = [
        "bsd",
        "new bsd",
        "bsd license",
        "bsd 3-clause",
        "new bsd license",
        "simplified bsd",
        "apache",
        "apache 2.0",
        "apache license 2.0",
        "apache software license",
        "apache software",
        "gnu lgpl",
        "gnu lesser general public license v2 (lgplv2)",
        "gnu general public license v2 (gplv2)",
        "gnu library or lesser general public license (lgpl)",
        "lgpl with exceptions or zpl",
        "isc license",
        "isc license (iscl)",
        "mit",
        "mit license",
        "mozilla public license 2.0 (mpl 2.0)",
        "public domain",
        "python software foundation license",
        "python software foundation",
        "zpl 2.1",
        "gpl v3"
 ]
--- a/data/data-pipeline/requirements.txt
+++ b/data/data-pipeline/requirements.txt
@ -1,30 +1,40 @@
-appnope==0.1.2; sys_platform == "darwin" and python_version >= "3.7"
+appdirs==1.4.4; python_full_version >= "3.6.2"
 appnope==0.1.2; sys_platform == "darwin" and python_version >= "3.7" and platform_system == "Darwin"
 argon2-cffi==20.1.0; python_version >= "3.6"
 astroid==2.6.5; python_version >= "3.6" and python_version < "4.0"
 async-generator==1.10; python_full_version >= "3.6.1" and python_version >= "3.7"
 attrs==21.2.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
 backcall==0.2.0; python_version >= "3.7"
-bleach==3.3.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
+backports.entry-points-selectable==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7"
 black==21.7b0; python_full_version >= "3.6.2"
 bleach==3.3.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
 censusdata==1.13; python_version >= "2.7"
-certifi==2021.5.30; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
+certifi==2021.5.30; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.7"
 cffi==1.14.6; implementation_name == "pypy" and python_version >= "3.6"
-chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7"
+charset-normalizer==2.0.3; python_full_version >= "3.6.0" and python_version >= "3"
 click-plugins==1.1.1; python_version >= "3.6"
 click==8.0.1; python_version >= "3.6"
 cligj==0.7.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version < "4" and python_version >= "3.6"
-colorama==0.4.4; python_version >= "3.7" and python_full_version < "3.0.0" and platform_system == "Windows" and sys_platform == "win32" or platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.5.0" and sys_platform == "win32"
+colorama==0.4.4; platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.6.2" and sys_platform == "win32" and python_version < "4.0" and (python_version >= "3.7" and python_full_version < "3.0.0" and sys_platform == "win32" or sys_platform == "win32" and python_version >= "3.7" and python_full_version >= "3.5.0")
-debugpy==1.3.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
+configparser==5.0.2; python_version >= "3.6"
 debugpy==1.4.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
 decorator==5.0.9; python_version >= "3.7"
 defusedxml==0.7.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
 distlib==0.3.2; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
 dparse==0.5.1; python_version >= "3.5"
 dynaconf==3.1.4
 entrypoints==0.3; python_version >= "3.7"
 filelock==3.0.12; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
 fiona==1.8.20; python_version >= "3.6"
 flake8==3.9.2; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
 geopandas==0.9.0; python_version >= "3.6"
-idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7"
+idna==3.2; python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.5"
-importlib-metadata==3.10.1; python_version < "3.8" and python_version >= "3.7"
+importlib-metadata==3.10.1; python_version < "3.8" and python_version >= "3.7" and (python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.5.0" and python_version < "3.8" and python_version >= "3.6") and python_full_version >= "3.6.2"
-ipykernel==6.0.1; python_version >= "3.7"
+ipykernel==6.0.3; python_version >= "3.7"
 ipython-genutils==0.2.0; python_version >= "3.7"
 ipython==7.25.0; python_version >= "3.7"
 ipywidgets==7.6.3
 isort==5.9.3; python_full_version >= "3.6.1" and python_version < "4.0" and python_version >= "3.6"
 jedi==0.18.0; python_version >= "3.7"
 jinja2==3.0.1; python_version >= "3.7"
 jsonschema==3.2.0; python_version >= "3.5"
@ -39,52 +49,72 @@ jupyter-nbextensions-configurator==0.4.1
 jupyter==1.0.0
 jupyterlab-pygments==0.1.2; python_version >= "3.7"
 jupyterlab-widgets==1.0.0; python_version >= "3.6"
 lazy-object-proxy==1.6.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.6" and python_version < "4.0" and python_full_version >= "3.6.0"
 liccheck==0.6.2; python_version >= "2.7"
 lxml==4.6.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
 markupsafe==2.0.1; python_version >= "3.7"
-matplotlib-inline==0.1.2; platform_system == "Darwin" and python_version >= "3.7"
+matplotlib-inline==0.1.2; python_version >= "3.7"
 mccabe==0.6.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.6" and python_version < "4.0" and python_full_version >= "3.5.0"
 mistune==0.8.4; python_version >= "3.7"
 munch==2.5.0; python_version >= "3.6"
 mypy-extensions==0.4.3; python_full_version >= "3.6.2" and python_version >= "3.5"
 mypy==0.910; python_version >= "3.5"
 nbclient==0.5.3; python_full_version >= "3.6.1" and python_version >= "3.7"
 nbconvert==6.1.0; python_version >= "3.7"
 nbformat==5.1.3; python_full_version >= "3.6.1" and python_version >= "3.7"
 nest-asyncio==1.5.1; python_full_version >= "3.6.1" and python_version >= "3.7"
 notebook==6.4.0; python_version >= "3.6"
-numpy==1.21.0; python_version >= "3.7"
+numpy==1.21.1; python_version >= "3.7"
 packaging==21.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
 pandas==1.3.0; python_full_version >= "3.7.1"
 pandocfilters==1.4.3; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.7"
 parso==0.8.2; python_version >= "3.7"
 pathspec==0.9.0; python_full_version >= "3.6.2"
 pexpect==4.8.0; sys_platform != "win32" and python_version >= "3.7"
 pickleshare==0.7.5; python_version >= "3.7"
 platformdirs==2.0.2; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
 pluggy==0.13.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
 prometheus-client==0.11.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
 prompt-toolkit==3.0.19; python_full_version >= "3.6.1" and python_version >= "3.7"
 ptyprocess==0.7.0; sys_platform != "win32" and python_version >= "3.7" and os_name != "nt"
-py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" and implementation_name == "pypy" or implementation_name == "pypy" and python_version >= "3.6" and python_full_version >= "3.4.0"
+py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" and implementation_name == "pypy" or implementation_name == "pypy" and python_version >= "3.6" and python_full_version >= "3.5.0"
 pycodestyle==2.7.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
 pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
 pyflakes==2.3.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
 pygments==2.9.0; python_version >= "3.7"
-pyparsing==2.4.7; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
+pylint==2.9.6; python_version >= "3.6" and python_version < "4.0"
 pyparsing==2.4.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
 pyproj==3.1.0; python_version >= "3.7"
 pyrsistent==0.18.0; python_version >= "3.6"
-python-dateutil==2.8.1; python_full_version >= "3.7.1" and python_version >= "3.7"
+python-dateutil==2.8.2; python_full_version >= "3.7.1" and python_version >= "3.7"
 pytz==2021.1; python_full_version >= "3.7.1" and python_version >= "2.7"
 pywin32==301; sys_platform == "win32" and python_version >= "3.6"
 pywinpty==1.1.3; os_name == "nt" and python_version >= "3.6"
-pyyaml==5.4.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0"
+pyyaml==5.4.1; python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.5"
 pyzmq==22.1.0; python_full_version >= "3.6.1" and python_version >= "3.7"
 qtconsole==5.1.1; python_version >= "3.6"
 qtpy==1.9.0; python_version >= "3.6"
-requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
+regex==2021.7.6; python_full_version >= "3.6.2"
 requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
 safety==1.10.3; python_version >= "3.5"
 semantic-version==2.8.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "2.7"
 send2trash==1.7.1; python_version >= "3.6"
 shapely==1.7.1; python_version >= "3.6"
-six==1.16.0; python_full_version >= "3.7.1" and python_version >= "2.7" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.6") and (python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7") and (python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.5")
+six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0") and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.6") and (python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7") and (python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.5")
 terminado==0.10.1; python_version >= "3.6"
 testpath==0.5.0; python_version >= "3.7"
 toml==0.10.2; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_full_version >= "3.5.0" and python_version >= "3.6" and python_version < "4.0"
 tomli==1.1.0; python_version >= "3.6" and python_full_version >= "3.6.2"
 tornado==6.1; python_full_version >= "3.6.1" and python_version >= "3.7"
 tox==3.24.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
 traitlets==5.0.5; python_full_version >= "3.6.1" and python_version >= "3.7"
 typed-ast==1.4.3; python_version < "3.8" and python_full_version >= "3.6.2" and python_version >= "3.6" and implementation_name == "cpython"
 types-requests==2.25.0
-typing-extensions==3.10.0.0; python_version < "3.8" and python_version >= "3.6"
+typing-extensions==3.10.0.0; python_version < "3.8" and python_full_version >= "3.6.2" and python_version >= "3.6"
-urllib3==1.26.6; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4" and python_version >= "2.7"
+urllib3==1.26.6; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "2.7"
 virtualenv==20.6.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
 wcwidth==0.2.5; python_full_version >= "3.6.1" and python_version >= "3.7"
 webencodings==0.5.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
 widgetsnbextension==3.5.1
 wrapt==1.12.1; python_version >= "3.6" and python_version < "4.0"
 zipp==3.5.0; python_version < "3.8" and python_version >= "3.6"
--- a/data/data-pipeline/tox.ini
+++ b/data/data-pipeline/tox.ini
@ -1,6 +1,20 @@
 [tox]
 # required because we use pyproject.toml
 isolated_build = true
-envlist = py37, py38, py39
+envlist = py37, py38, py39, lint, checkdeps
 # only checks python versions installed locally
 skip_missing_interpreters = true
 [testenv:lint]
 # lints python code in src and tests
 basepython = python3.9
 deps = -rrequirements.txt
 commands = black etl application.py config.py utils.py
           flake8 etl application.py config.py utils.py
           # pylint etl application.py config.py utils.py
 [testenv:checkdeps]
 # checks the dependencies for security vulnerabilities and open source licenses
 deps = -rrequirements.txt
 commands = safety check
           liccheck
--- a/data/data-pipeline/utils.py
+++ b/data/data-pipeline/utils.py
@ -2,10 +2,11 @@ from pathlib import Path
 import os
 import logging
 import shutil
 import requests
 import zipfile
 import urllib3
 import requests
 from config import settings
@ -133,12 +134,13 @@ def unzip_file_from_url(
    # cleanup temporary file
    os.remove(zip_file_path)
 def data_folder_cleanup() -> None:
    """Remove all files and directories from the local data/dataset path"""
    data_path = settings.APP_ROOT / "data"
-    logger.info(f"Initializing all dataset directoriees")
+    logger.info("Initializing all dataset directoriees")
    remove_all_from_dir(data_path / "dataset")
@ -147,7 +149,7 @@ def score_folder_cleanup() -> None:
    data_path = settings.APP_ROOT / "data"
-    logger.info(f"Initializing all score data")
+    logger.info("Initializing all score data")
    remove_all_from_dir(data_path / "score" / "csv")
    remove_all_from_dir(data_path / "score" / "geojson")
@ -157,9 +159,10 @@ def temp_folder_cleanup() -> None:
    data_path = settings.APP_ROOT / "data"
-    logger.info(f"Initializing all temp directoriees")
+    logger.info("Initializing all temp directoriees")
    remove_all_from_dir(data_path / "tmp")
 def get_excel_column_name(index: int) -> str:
    """Map a numeric index to the appropriate column in Excel. E.g., column #95 is "CR".
    Only works for the first 1000 columns.