Running Black

This commit is contained in:
Nat Hillard 2021-08-04 21:10:30 -04:00
parent dd8c37e06d
commit 508925618b
11 changed files with 30 additions and 87 deletions

View file

@ -22,9 +22,7 @@ def cli():
pass
@cli.command(
help="Clean up all census data folders",
)
@cli.command(help="Clean up all census data folders",)
def census_cleanup():
"""CLI command to clean up the census data folder"""
@ -37,9 +35,7 @@ def census_cleanup():
logger.info("Cleaned up all census data files")
@cli.command(
help="Clean up all data folders",
)
@cli.command(help="Clean up all data folders",)
def data_cleanup():
"""CLI command to clean up the all the data folders"""
@ -50,9 +46,7 @@ def data_cleanup():
logger.info("Cleaned up all data folders")
@cli.command(
help="Census data download",
)
@cli.command(help="Census data download",)
def census_data_download():
"""CLI command to download all census shape files from the Census FTP and extract the geojson
to generate national and by state Census Block Group CSVs"""
@ -64,9 +58,7 @@ def census_data_download():
logger.info("Completed downloading census data")
@cli.command(
help="Run all ETL processes or a specific one",
)
@cli.command(help="Run all ETL processes or a specific one",)
@click.option("-d", "--dataset", required=False, type=str)
def etl_run(dataset: str):
"""Run a specific or all ETL processes
@ -81,27 +73,21 @@ def etl_run(dataset: str):
etl_runner(dataset)
@cli.command(
help="Generate Score",
)
@cli.command(help="Generate Score",)
def score_run():
"""CLI command to generate the score"""
score_generate()
@cli.command(
help="Generate Geojson files with scores baked in",
)
@cli.command(help="Generate Geojson files with scores baked in",)
def geo_score():
"""CLI command to generate the score"""
score_geo()
@cli.command(
help="Generate map tiles",
)
@cli.command(help="Generate map tiles",)
def generate_map_tiles():
"""CLI command to generate the map tiles"""

View file

@ -27,11 +27,7 @@ def etl_runner(dataset_to_run: str = None) -> None:
"module_dir": "census_acs",
"class_name": "CensusACSETL",
},
{
"name": "ejscreen",
"module_dir": "ejscreen",
"class_name": "EJScreenETL",
},
{"name": "ejscreen", "module_dir": "ejscreen", "class_name": "EJScreenETL",},
{
"name": "housing_and_transportation",
"module_dir": "housing_and_transportation",
@ -47,17 +43,12 @@ def etl_runner(dataset_to_run: str = None) -> None:
"module_dir": "calenviroscreen",
"class_name": "CalEnviroScreenETL",
},
{
"name": "hud_recap",
"module_dir": "hud_recap",
"class_name": "HudRecapETL",
},
{"name": "hud_recap", "module_dir": "hud_recap", "class_name": "HudRecapETL",},
]
if dataset_to_run:
dataset_element = next(
(item for item in dataset_list if item["name"] == dataset_to_run),
None,
(item for item in dataset_list if item["name"] == dataset_to_run), None,
)
if not dataset_list:
raise ValueError("Invalid dataset name")

View file

@ -59,9 +59,7 @@ class ScoreETL(ExtractTransformLoad):
# Load census data
census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
self.census_df = pd.read_csv(
census_csv,
dtype={self.GEOID_FIELD_NAME: "string"},
low_memory=False,
census_csv, dtype={self.GEOID_FIELD_NAME: "string"}, low_memory=False,
)
# Load housing and transportation data
@ -123,8 +121,7 @@ class ScoreETL(ExtractTransformLoad):
# Define a named tuple that will be used for each data set input.
DataSet = collections.namedtuple(
typename="DataSet",
field_names=["input_field", "renamed_field", "bucket"],
typename="DataSet", field_names=["input_field", "renamed_field", "bucket"],
)
data_sets = [
@ -141,9 +138,7 @@ class ScoreETL(ExtractTransformLoad):
bucket=None,
),
DataSet(
input_field="ACSTOTPOP",
renamed_field="Total population",
bucket=None,
input_field="ACSTOTPOP", renamed_field="Total population", bucket=None,
),
# The following data sets have buckets, because they're used in the score
DataSet(
@ -249,9 +244,7 @@ class ScoreETL(ExtractTransformLoad):
}
self.df.rename(
columns=renaming_dict,
inplace=True,
errors="raise",
columns=renaming_dict, inplace=True, errors="raise",
)
columns_to_keep = [data_set.renamed_field for data_set in data_sets]

View file

@ -46,9 +46,7 @@ class GeoScoreETL(ExtractTransformLoad):
logger.info("Reading score CSV")
self.score_usa_df = pd.read_csv(
self.TILE_SCORE_CSV,
dtype={"GEOID10": "string"},
low_memory=False,
self.TILE_SCORE_CSV, dtype={"GEOID10": "string"}, low_memory=False,
)
def transform(self) -> None:
@ -70,8 +68,7 @@ class GeoScoreETL(ExtractTransformLoad):
].reset_index(drop=True)
usa_simplified.rename(
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
inplace=True,
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO}, inplace=True,
)
logger.info("Aggregating into tracts (~5 minutes)")
@ -156,4 +153,4 @@ class GeoScoreETL(ExtractTransformLoad):
logger.info("Writing usa-low (~9 minutes)")
self.geojson_score_usa_low.to_file(self.SCORE_LOW_GEOJSON, driver="GeoJSON")
logger.info("Completed writing usa-low")
logger.info("Completed writing usa-low")

View file

@ -43,8 +43,7 @@ class PostScoreETL(ExtractTransformLoad):
def extract(self) -> None:
super().extract(
self.CENSUS_COUNTIES_ZIP_URL,
self.TMP_PATH,
self.CENSUS_COUNTIES_ZIP_URL, self.TMP_PATH,
)
logger.info("Reading Counties CSV")
@ -68,8 +67,7 @@ class PostScoreETL(ExtractTransformLoad):
# rename some of the columns to prepare for merge
self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]]
self.counties_df.rename(
columns={"USPS": "State Abbreviation", "NAME": "County Name"},
inplace=True,
columns={"USPS": "State Abbreviation", "NAME": "County Name"}, inplace=True,
)
# remove unnecessary columns

View file

@ -28,8 +28,7 @@ class CalEnviroScreenETL(ExtractTransformLoad):
def extract(self) -> None:
logger.info("Downloading CalEnviroScreen Data")
super().extract(
self.CALENVIROSCREEN_FTP_URL,
self.TMP_PATH,
self.CALENVIROSCREEN_FTP_URL, self.TMP_PATH,
)
def transform(self) -> None:

View file

@ -39,9 +39,7 @@ def download_census_csvs(data_path: Path) -> None:
# But using 2010 for now
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
unzip_file_from_url(
cbg_state_url,
data_path / "tmp",
data_path / "census" / "shp" / fips,
cbg_state_url, data_path / "tmp", data_path / "census" / "shp" / fips,
)
cmd = (
@ -80,32 +78,22 @@ def download_census_csvs(data_path: Path) -> None:
csv_dir_path / f"{state_id}.csv", mode="w", newline=""
) as cbg_csv_file:
cbg_csv_file_writer = csv.writer(
cbg_csv_file,
delimiter=",",
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
)
for geoid10 in geoid10_list:
cbg_csv_file_writer.writerow(
[
geoid10,
]
[geoid10,]
)
## write US csv
with open(csv_dir_path / "us.csv", mode="w", newline="") as cbg_csv_file:
cbg_csv_file_writer = csv.writer(
cbg_csv_file,
delimiter=",",
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
)
for geoid10 in cbg_national:
cbg_csv_file_writer.writerow(
[
geoid10,
]
[geoid10,]
)
## create national geojson

View file

@ -18,8 +18,7 @@ class EJScreenETL(ExtractTransformLoad):
def extract(self) -> None:
logger.info("Downloading EJScreen Data")
super().extract(
self.EJSCREEN_FTP_URL,
self.TMP_PATH,
self.EJSCREEN_FTP_URL, self.TMP_PATH,
)
def transform(self) -> None:

View file

@ -34,8 +34,7 @@ class HudHousingETL(ExtractTransformLoad):
def extract(self) -> None:
logger.info("Extracting HUD Housing Data")
super().extract(
self.HOUSING_FTP_URL,
self.HOUSING_ZIP_FILE_DIR,
self.HOUSING_FTP_URL, self.HOUSING_ZIP_FILE_DIR,
)
def transform(self) -> None:
@ -49,10 +48,7 @@ class HudHousingETL(ExtractTransformLoad):
/ "140"
/ "Table8.csv"
)
self.df = pd.read_csv(
filepath_or_buffer=tmp_csv_file_path,
encoding="latin-1",
)
self.df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path, encoding="latin-1",)
# Rename and reformat block group ID
self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)

View file

@ -70,8 +70,7 @@ class TreeEquityScoreETL(ExtractTransformLoad):
logger.info("Downloading Tree Equity Score Data")
for state in self.states:
super().extract(
f"{self.TES_URL}{state}.zip.zip",
f"{self.TMP_PATH}/{state}",
f"{self.TES_URL}{state}.zip.zip", f"{self.TMP_PATH}/{state}",
)
def transform(self) -> None:

View file

@ -97,10 +97,7 @@ def remove_all_dirs_from_dir(dir_path: Path) -> None:
def unzip_file_from_url(
file_url: str,
download_path: Path,
unzipped_file_path: Path,
verify: bool = False,
file_url: str, download_path: Path, unzipped_file_path: Path, verify: bool = False,
) -> None:
"""Downloads a zip file from a remote URL location and unzips it in a specific directory, removing the temporary file after