Issue 308 python linting (#443)

* Adds flake8, pylint, liccheck, flake8 to dependencies for data-pipeline

* Sets up and runs black autoformatting

* Adds flake8 to tox linting

* Fixes flake8 error F541 f string missing placeholders

* Fixes flake8 E501 line too long

* Fixes flake8 F401 imported but not used

* Adds pylint to tox and disables the following pylint errors:
- C0114: module docstrings
- R0201: method could have been a function
- R0903: too few public methods
- C0103: name case styling
- W0511: fix me
- W1203: f-string interpolation in logging

* Adds utils.py to tox.ini linting, runs black on utils.py

* Fixes import related pylint errors: C0411 and C0412

* Fixes or ignores remaining pylint errors (for discussion later)

* Adds safety and liccheck to tox.ini
This commit is contained in:
Billy Daly 2021-08-02 12:16:38 -04:00 committed by GitHub
commit 5504528fdf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 709 additions and 228 deletions

View file

@ -9,16 +9,12 @@ logger = get_module_logger(__name__)
class CalEnviroScreenETL(ExtractTransformLoad):
def __init__(self):
self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/data-sources/CalEnviroScreen_4.0_2021.zip"
self.CALENVIROSCREEN_CSV = (
self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
)
self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4"
# Definining some variable names
self.CALENVIROSCREEN_SCORE_FIELD_NAME = "calenviroscreen_score"
self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = (
"calenviroscreen_percentile"
)
self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = "calenviroscreen_percentile"
self.CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME = (
"calenviroscreen_priority_community"
)
@ -30,14 +26,14 @@ class CalEnviroScreenETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info(f"Downloading CalEnviroScreen Data")
logger.info("Downloading CalEnviroScreen Data")
super().extract(
self.CALENVIROSCREEN_FTP_URL,
self.TMP_PATH,
)
def transform(self) -> None:
logger.info(f"Transforming CalEnviroScreen Data")
logger.info("Transforming CalEnviroScreen Data")
# Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:
# https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip
@ -67,7 +63,7 @@ class CalEnviroScreenETL(ExtractTransformLoad):
)
def load(self) -> None:
logger.info(f"Saving CalEnviroScreen CSV")
logger.info("Saving CalEnviroScreen CSV")
# write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(self.CSV_PATH / f"data06.csv", index=False)
self.df.to_csv(self.CSV_PATH / "data06.csv", index=False)