Running Black

This commit is contained in:
Nat Hillard 2021-08-04 21:10:30 -04:00
parent dd8c37e06d
commit 508925618b
11 changed files with 30 additions and 87 deletions

View file

@ -22,9 +22,7 @@ def cli():
pass pass
@cli.command( @cli.command(help="Clean up all census data folders",)
help="Clean up all census data folders",
)
def census_cleanup(): def census_cleanup():
"""CLI command to clean up the census data folder""" """CLI command to clean up the census data folder"""
@ -37,9 +35,7 @@ def census_cleanup():
logger.info("Cleaned up all census data files") logger.info("Cleaned up all census data files")
@cli.command( @cli.command(help="Clean up all data folders",)
help="Clean up all data folders",
)
def data_cleanup(): def data_cleanup():
"""CLI command to clean up the all the data folders""" """CLI command to clean up the all the data folders"""
@ -50,9 +46,7 @@ def data_cleanup():
logger.info("Cleaned up all data folders") logger.info("Cleaned up all data folders")
@cli.command( @cli.command(help="Census data download",)
help="Census data download",
)
def census_data_download(): def census_data_download():
"""CLI command to download all census shape files from the Census FTP and extract the geojson """CLI command to download all census shape files from the Census FTP and extract the geojson
to generate national and by state Census Block Group CSVs""" to generate national and by state Census Block Group CSVs"""
@ -64,9 +58,7 @@ def census_data_download():
logger.info("Completed downloading census data") logger.info("Completed downloading census data")
@cli.command( @cli.command(help="Run all ETL processes or a specific one",)
help="Run all ETL processes or a specific one",
)
@click.option("-d", "--dataset", required=False, type=str) @click.option("-d", "--dataset", required=False, type=str)
def etl_run(dataset: str): def etl_run(dataset: str):
"""Run a specific or all ETL processes """Run a specific or all ETL processes
@ -81,27 +73,21 @@ def etl_run(dataset: str):
etl_runner(dataset) etl_runner(dataset)
@cli.command( @cli.command(help="Generate Score",)
help="Generate Score",
)
def score_run(): def score_run():
"""CLI command to generate the score""" """CLI command to generate the score"""
score_generate() score_generate()
@cli.command( @cli.command(help="Generate Geojson files with scores baked in",)
help="Generate Geojson files with scores baked in",
)
def geo_score(): def geo_score():
"""CLI command to generate the score""" """CLI command to generate the score"""
score_geo() score_geo()
@cli.command( @cli.command(help="Generate map tiles",)
help="Generate map tiles",
)
def generate_map_tiles(): def generate_map_tiles():
"""CLI command to generate the map tiles""" """CLI command to generate the map tiles"""

View file

@ -27,11 +27,7 @@ def etl_runner(dataset_to_run: str = None) -> None:
"module_dir": "census_acs", "module_dir": "census_acs",
"class_name": "CensusACSETL", "class_name": "CensusACSETL",
}, },
{ {"name": "ejscreen", "module_dir": "ejscreen", "class_name": "EJScreenETL",},
"name": "ejscreen",
"module_dir": "ejscreen",
"class_name": "EJScreenETL",
},
{ {
"name": "housing_and_transportation", "name": "housing_and_transportation",
"module_dir": "housing_and_transportation", "module_dir": "housing_and_transportation",
@ -47,17 +43,12 @@ def etl_runner(dataset_to_run: str = None) -> None:
"module_dir": "calenviroscreen", "module_dir": "calenviroscreen",
"class_name": "CalEnviroScreenETL", "class_name": "CalEnviroScreenETL",
}, },
{ {"name": "hud_recap", "module_dir": "hud_recap", "class_name": "HudRecapETL",},
"name": "hud_recap",
"module_dir": "hud_recap",
"class_name": "HudRecapETL",
},
] ]
if dataset_to_run: if dataset_to_run:
dataset_element = next( dataset_element = next(
(item for item in dataset_list if item["name"] == dataset_to_run), (item for item in dataset_list if item["name"] == dataset_to_run), None,
None,
) )
if not dataset_list: if not dataset_list:
raise ValueError("Invalid dataset name") raise ValueError("Invalid dataset name")

View file

@ -59,9 +59,7 @@ class ScoreETL(ExtractTransformLoad):
# Load census data # Load census data
census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv" census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
self.census_df = pd.read_csv( self.census_df = pd.read_csv(
census_csv, census_csv, dtype={self.GEOID_FIELD_NAME: "string"}, low_memory=False,
dtype={self.GEOID_FIELD_NAME: "string"},
low_memory=False,
) )
# Load housing and transportation data # Load housing and transportation data
@ -123,8 +121,7 @@ class ScoreETL(ExtractTransformLoad):
# Define a named tuple that will be used for each data set input. # Define a named tuple that will be used for each data set input.
DataSet = collections.namedtuple( DataSet = collections.namedtuple(
typename="DataSet", typename="DataSet", field_names=["input_field", "renamed_field", "bucket"],
field_names=["input_field", "renamed_field", "bucket"],
) )
data_sets = [ data_sets = [
@ -141,9 +138,7 @@ class ScoreETL(ExtractTransformLoad):
bucket=None, bucket=None,
), ),
DataSet( DataSet(
input_field="ACSTOTPOP", input_field="ACSTOTPOP", renamed_field="Total population", bucket=None,
renamed_field="Total population",
bucket=None,
), ),
# The following data sets have buckets, because they're used in the score # The following data sets have buckets, because they're used in the score
DataSet( DataSet(
@ -249,9 +244,7 @@ class ScoreETL(ExtractTransformLoad):
} }
self.df.rename( self.df.rename(
columns=renaming_dict, columns=renaming_dict, inplace=True, errors="raise",
inplace=True,
errors="raise",
) )
columns_to_keep = [data_set.renamed_field for data_set in data_sets] columns_to_keep = [data_set.renamed_field for data_set in data_sets]

View file

@ -46,9 +46,7 @@ class GeoScoreETL(ExtractTransformLoad):
logger.info("Reading score CSV") logger.info("Reading score CSV")
self.score_usa_df = pd.read_csv( self.score_usa_df = pd.read_csv(
self.TILE_SCORE_CSV, self.TILE_SCORE_CSV, dtype={"GEOID10": "string"}, low_memory=False,
dtype={"GEOID10": "string"},
low_memory=False,
) )
def transform(self) -> None: def transform(self) -> None:
@ -70,8 +68,7 @@ class GeoScoreETL(ExtractTransformLoad):
].reset_index(drop=True) ].reset_index(drop=True)
usa_simplified.rename( usa_simplified.rename(
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO}, columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO}, inplace=True,
inplace=True,
) )
logger.info("Aggregating into tracts (~5 minutes)") logger.info("Aggregating into tracts (~5 minutes)")

View file

@ -43,8 +43,7 @@ class PostScoreETL(ExtractTransformLoad):
def extract(self) -> None: def extract(self) -> None:
super().extract( super().extract(
self.CENSUS_COUNTIES_ZIP_URL, self.CENSUS_COUNTIES_ZIP_URL, self.TMP_PATH,
self.TMP_PATH,
) )
logger.info("Reading Counties CSV") logger.info("Reading Counties CSV")
@ -68,8 +67,7 @@ class PostScoreETL(ExtractTransformLoad):
# rename some of the columns to prepare for merge # rename some of the columns to prepare for merge
self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]] self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]]
self.counties_df.rename( self.counties_df.rename(
columns={"USPS": "State Abbreviation", "NAME": "County Name"}, columns={"USPS": "State Abbreviation", "NAME": "County Name"}, inplace=True,
inplace=True,
) )
# remove unnecessary columns # remove unnecessary columns

View file

@ -28,8 +28,7 @@ class CalEnviroScreenETL(ExtractTransformLoad):
def extract(self) -> None: def extract(self) -> None:
logger.info("Downloading CalEnviroScreen Data") logger.info("Downloading CalEnviroScreen Data")
super().extract( super().extract(
self.CALENVIROSCREEN_FTP_URL, self.CALENVIROSCREEN_FTP_URL, self.TMP_PATH,
self.TMP_PATH,
) )
def transform(self) -> None: def transform(self) -> None:

View file

@ -39,9 +39,7 @@ def download_census_csvs(data_path: Path) -> None:
# But using 2010 for now # But using 2010 for now
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip" cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
unzip_file_from_url( unzip_file_from_url(
cbg_state_url, cbg_state_url, data_path / "tmp", data_path / "census" / "shp" / fips,
data_path / "tmp",
data_path / "census" / "shp" / fips,
) )
cmd = ( cmd = (
@ -80,32 +78,22 @@ def download_census_csvs(data_path: Path) -> None:
csv_dir_path / f"{state_id}.csv", mode="w", newline="" csv_dir_path / f"{state_id}.csv", mode="w", newline=""
) as cbg_csv_file: ) as cbg_csv_file:
cbg_csv_file_writer = csv.writer( cbg_csv_file_writer = csv.writer(
cbg_csv_file, cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
delimiter=",",
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
) )
for geoid10 in geoid10_list: for geoid10 in geoid10_list:
cbg_csv_file_writer.writerow( cbg_csv_file_writer.writerow(
[ [geoid10,]
geoid10,
]
) )
## write US csv ## write US csv
with open(csv_dir_path / "us.csv", mode="w", newline="") as cbg_csv_file: with open(csv_dir_path / "us.csv", mode="w", newline="") as cbg_csv_file:
cbg_csv_file_writer = csv.writer( cbg_csv_file_writer = csv.writer(
cbg_csv_file, cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
delimiter=",",
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
) )
for geoid10 in cbg_national: for geoid10 in cbg_national:
cbg_csv_file_writer.writerow( cbg_csv_file_writer.writerow(
[ [geoid10,]
geoid10,
]
) )
## create national geojson ## create national geojson

View file

@ -18,8 +18,7 @@ class EJScreenETL(ExtractTransformLoad):
def extract(self) -> None: def extract(self) -> None:
logger.info("Downloading EJScreen Data") logger.info("Downloading EJScreen Data")
super().extract( super().extract(
self.EJSCREEN_FTP_URL, self.EJSCREEN_FTP_URL, self.TMP_PATH,
self.TMP_PATH,
) )
def transform(self) -> None: def transform(self) -> None:

View file

@ -34,8 +34,7 @@ class HudHousingETL(ExtractTransformLoad):
def extract(self) -> None: def extract(self) -> None:
logger.info("Extracting HUD Housing Data") logger.info("Extracting HUD Housing Data")
super().extract( super().extract(
self.HOUSING_FTP_URL, self.HOUSING_FTP_URL, self.HOUSING_ZIP_FILE_DIR,
self.HOUSING_ZIP_FILE_DIR,
) )
def transform(self) -> None: def transform(self) -> None:
@ -49,10 +48,7 @@ class HudHousingETL(ExtractTransformLoad):
/ "140" / "140"
/ "Table8.csv" / "Table8.csv"
) )
self.df = pd.read_csv( self.df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path, encoding="latin-1",)
filepath_or_buffer=tmp_csv_file_path,
encoding="latin-1",
)
# Rename and reformat block group ID # Rename and reformat block group ID
self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True) self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)

View file

@ -70,8 +70,7 @@ class TreeEquityScoreETL(ExtractTransformLoad):
logger.info("Downloading Tree Equity Score Data") logger.info("Downloading Tree Equity Score Data")
for state in self.states: for state in self.states:
super().extract( super().extract(
f"{self.TES_URL}{state}.zip.zip", f"{self.TES_URL}{state}.zip.zip", f"{self.TMP_PATH}/{state}",
f"{self.TMP_PATH}/{state}",
) )
def transform(self) -> None: def transform(self) -> None:

View file

@ -97,10 +97,7 @@ def remove_all_dirs_from_dir(dir_path: Path) -> None:
def unzip_file_from_url( def unzip_file_from_url(
file_url: str, file_url: str, download_path: Path, unzipped_file_path: Path, verify: bool = False,
download_path: Path,
unzipped_file_path: Path,
verify: bool = False,
) -> None: ) -> None:
"""Downloads a zip file from a remote URL location and unzips it in a specific directory, removing the temporary file after """Downloads a zip file from a remote URL location and unzips it in a specific directory, removing the temporary file after