Issue 308 python linting (#443)

* Adds flake8, pylint, liccheck, flake8 to dependencies for data-pipeline

* Sets up and runs black autoformatting

* Adds flake8 to tox linting

* Fixes flake8 error F541 f string missing placeholders

* Fixes flake8 E501 line too long

* Fixes flake8 F401 imported but not used

* Adds pylint to tox and disables the following pylint errors:
- C0114: module docstrings
- R0201: method could have been a function
- R0903: too few public methods
- C0103: name case styling
- W0511: fix me
- W1203: f-string interpolation in logging

* Adds utils.py to tox.ini linting, runs black on utils.py

* Fixes import related pylint errors: C0411 and C0412

* Fixes or ignores remaining pylint errors (for discussion later)

* Adds safety and liccheck to tox.ini
This commit is contained in:
Billy Daly 2021-08-02 12:16:38 -04:00 committed by GitHub
commit 5504528fdf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 709 additions and 228 deletions

View file

@ -35,9 +35,7 @@ class HousingTransportationETL(ExtractTransformLoad):
)
# New file name:
tmp_csv_file_path = (
zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
)
tmp_csv_file_path = zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
dfs.append(tmp_df)
@ -45,16 +43,16 @@ class HousingTransportationETL(ExtractTransformLoad):
self.df = pd.concat(dfs)
def transform(self) -> None:
logger.info(f"Transforming Housing and Transportation Data")
logger.info("Transforming Housing and Transportation Data")
# Rename and reformat block group ID
self.df.rename(columns={"blkgrp": self.GEOID_FIELD_NAME}, inplace=True)
self.df[self.GEOID_FIELD_NAME] = self.df[
self.GEOID_FIELD_NAME
].str.replace('"', "")
self.df[self.GEOID_FIELD_NAME] = self.df[self.GEOID_FIELD_NAME].str.replace(
'"', ""
)
def load(self) -> None:
logger.info(f"Saving Housing and Transportation Data")
logger.info("Saving Housing and Transportation Data")
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)