mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 17:44:20 -08:00
Running Black
This commit is contained in:
parent
dd8c37e06d
commit
508925618b
11 changed files with 30 additions and 87 deletions
|
@ -22,9 +22,7 @@ def cli():
|
|||
pass
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Clean up all census data folders",
|
||||
)
|
||||
@cli.command(help="Clean up all census data folders",)
|
||||
def census_cleanup():
|
||||
"""CLI command to clean up the census data folder"""
|
||||
|
||||
|
@ -37,9 +35,7 @@ def census_cleanup():
|
|||
logger.info("Cleaned up all census data files")
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Clean up all data folders",
|
||||
)
|
||||
@cli.command(help="Clean up all data folders",)
|
||||
def data_cleanup():
|
||||
"""CLI command to clean up the all the data folders"""
|
||||
|
||||
|
@ -50,9 +46,7 @@ def data_cleanup():
|
|||
logger.info("Cleaned up all data folders")
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Census data download",
|
||||
)
|
||||
@cli.command(help="Census data download",)
|
||||
def census_data_download():
|
||||
"""CLI command to download all census shape files from the Census FTP and extract the geojson
|
||||
to generate national and by state Census Block Group CSVs"""
|
||||
|
@ -64,9 +58,7 @@ def census_data_download():
|
|||
logger.info("Completed downloading census data")
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Run all ETL processes or a specific one",
|
||||
)
|
||||
@cli.command(help="Run all ETL processes or a specific one",)
|
||||
@click.option("-d", "--dataset", required=False, type=str)
|
||||
def etl_run(dataset: str):
|
||||
"""Run a specific or all ETL processes
|
||||
|
@ -81,27 +73,21 @@ def etl_run(dataset: str):
|
|||
etl_runner(dataset)
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Generate Score",
|
||||
)
|
||||
@cli.command(help="Generate Score",)
|
||||
def score_run():
|
||||
"""CLI command to generate the score"""
|
||||
|
||||
score_generate()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Generate Geojson files with scores baked in",
|
||||
)
|
||||
@cli.command(help="Generate Geojson files with scores baked in",)
|
||||
def geo_score():
|
||||
"""CLI command to generate the score"""
|
||||
|
||||
score_geo()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Generate map tiles",
|
||||
)
|
||||
@cli.command(help="Generate map tiles",)
|
||||
def generate_map_tiles():
|
||||
"""CLI command to generate the map tiles"""
|
||||
|
||||
|
|
|
@ -27,11 +27,7 @@ def etl_runner(dataset_to_run: str = None) -> None:
|
|||
"module_dir": "census_acs",
|
||||
"class_name": "CensusACSETL",
|
||||
},
|
||||
{
|
||||
"name": "ejscreen",
|
||||
"module_dir": "ejscreen",
|
||||
"class_name": "EJScreenETL",
|
||||
},
|
||||
{"name": "ejscreen", "module_dir": "ejscreen", "class_name": "EJScreenETL",},
|
||||
{
|
||||
"name": "housing_and_transportation",
|
||||
"module_dir": "housing_and_transportation",
|
||||
|
@ -47,17 +43,12 @@ def etl_runner(dataset_to_run: str = None) -> None:
|
|||
"module_dir": "calenviroscreen",
|
||||
"class_name": "CalEnviroScreenETL",
|
||||
},
|
||||
{
|
||||
"name": "hud_recap",
|
||||
"module_dir": "hud_recap",
|
||||
"class_name": "HudRecapETL",
|
||||
},
|
||||
{"name": "hud_recap", "module_dir": "hud_recap", "class_name": "HudRecapETL",},
|
||||
]
|
||||
|
||||
if dataset_to_run:
|
||||
dataset_element = next(
|
||||
(item for item in dataset_list if item["name"] == dataset_to_run),
|
||||
None,
|
||||
(item for item in dataset_list if item["name"] == dataset_to_run), None,
|
||||
)
|
||||
if not dataset_list:
|
||||
raise ValueError("Invalid dataset name")
|
||||
|
|
|
@ -59,9 +59,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# Load census data
|
||||
census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
|
||||
self.census_df = pd.read_csv(
|
||||
census_csv,
|
||||
dtype={self.GEOID_FIELD_NAME: "string"},
|
||||
low_memory=False,
|
||||
census_csv, dtype={self.GEOID_FIELD_NAME: "string"}, low_memory=False,
|
||||
)
|
||||
|
||||
# Load housing and transportation data
|
||||
|
@ -123,8 +121,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
|
||||
# Define a named tuple that will be used for each data set input.
|
||||
DataSet = collections.namedtuple(
|
||||
typename="DataSet",
|
||||
field_names=["input_field", "renamed_field", "bucket"],
|
||||
typename="DataSet", field_names=["input_field", "renamed_field", "bucket"],
|
||||
)
|
||||
|
||||
data_sets = [
|
||||
|
@ -141,9 +138,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field="ACSTOTPOP",
|
||||
renamed_field="Total population",
|
||||
bucket=None,
|
||||
input_field="ACSTOTPOP", renamed_field="Total population", bucket=None,
|
||||
),
|
||||
# The following data sets have buckets, because they're used in the score
|
||||
DataSet(
|
||||
|
@ -249,9 +244,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
}
|
||||
|
||||
self.df.rename(
|
||||
columns=renaming_dict,
|
||||
inplace=True,
|
||||
errors="raise",
|
||||
columns=renaming_dict, inplace=True, errors="raise",
|
||||
)
|
||||
|
||||
columns_to_keep = [data_set.renamed_field for data_set in data_sets]
|
||||
|
|
|
@ -46,9 +46,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
|
||||
logger.info("Reading score CSV")
|
||||
self.score_usa_df = pd.read_csv(
|
||||
self.TILE_SCORE_CSV,
|
||||
dtype={"GEOID10": "string"},
|
||||
low_memory=False,
|
||||
self.TILE_SCORE_CSV, dtype={"GEOID10": "string"}, low_memory=False,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
|
@ -70,8 +68,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
].reset_index(drop=True)
|
||||
|
||||
usa_simplified.rename(
|
||||
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
|
||||
inplace=True,
|
||||
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO}, inplace=True,
|
||||
)
|
||||
|
||||
logger.info("Aggregating into tracts (~5 minutes)")
|
||||
|
@ -156,4 +153,4 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
|
||||
logger.info("Writing usa-low (~9 minutes)")
|
||||
self.geojson_score_usa_low.to_file(self.SCORE_LOW_GEOJSON, driver="GeoJSON")
|
||||
logger.info("Completed writing usa-low")
|
||||
logger.info("Completed writing usa-low")
|
||||
|
|
|
@ -43,8 +43,7 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
|
||||
def extract(self) -> None:
|
||||
super().extract(
|
||||
self.CENSUS_COUNTIES_ZIP_URL,
|
||||
self.TMP_PATH,
|
||||
self.CENSUS_COUNTIES_ZIP_URL, self.TMP_PATH,
|
||||
)
|
||||
|
||||
logger.info("Reading Counties CSV")
|
||||
|
@ -68,8 +67,7 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
# rename some of the columns to prepare for merge
|
||||
self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]]
|
||||
self.counties_df.rename(
|
||||
columns={"USPS": "State Abbreviation", "NAME": "County Name"},
|
||||
inplace=True,
|
||||
columns={"USPS": "State Abbreviation", "NAME": "County Name"}, inplace=True,
|
||||
)
|
||||
|
||||
# remove unnecessary columns
|
||||
|
|
|
@ -28,8 +28,7 @@ class CalEnviroScreenETL(ExtractTransformLoad):
|
|||
def extract(self) -> None:
|
||||
logger.info("Downloading CalEnviroScreen Data")
|
||||
super().extract(
|
||||
self.CALENVIROSCREEN_FTP_URL,
|
||||
self.TMP_PATH,
|
||||
self.CALENVIROSCREEN_FTP_URL, self.TMP_PATH,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
|
|
|
@ -39,9 +39,7 @@ def download_census_csvs(data_path: Path) -> None:
|
|||
# But using 2010 for now
|
||||
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
|
||||
unzip_file_from_url(
|
||||
cbg_state_url,
|
||||
data_path / "tmp",
|
||||
data_path / "census" / "shp" / fips,
|
||||
cbg_state_url, data_path / "tmp", data_path / "census" / "shp" / fips,
|
||||
)
|
||||
|
||||
cmd = (
|
||||
|
@ -80,32 +78,22 @@ def download_census_csvs(data_path: Path) -> None:
|
|||
csv_dir_path / f"{state_id}.csv", mode="w", newline=""
|
||||
) as cbg_csv_file:
|
||||
cbg_csv_file_writer = csv.writer(
|
||||
cbg_csv_file,
|
||||
delimiter=",",
|
||||
quotechar='"',
|
||||
quoting=csv.QUOTE_MINIMAL,
|
||||
cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
|
||||
)
|
||||
|
||||
for geoid10 in geoid10_list:
|
||||
cbg_csv_file_writer.writerow(
|
||||
[
|
||||
geoid10,
|
||||
]
|
||||
[geoid10,]
|
||||
)
|
||||
|
||||
## write US csv
|
||||
with open(csv_dir_path / "us.csv", mode="w", newline="") as cbg_csv_file:
|
||||
cbg_csv_file_writer = csv.writer(
|
||||
cbg_csv_file,
|
||||
delimiter=",",
|
||||
quotechar='"',
|
||||
quoting=csv.QUOTE_MINIMAL,
|
||||
cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
|
||||
)
|
||||
for geoid10 in cbg_national:
|
||||
cbg_csv_file_writer.writerow(
|
||||
[
|
||||
geoid10,
|
||||
]
|
||||
[geoid10,]
|
||||
)
|
||||
|
||||
## create national geojson
|
||||
|
|
|
@ -18,8 +18,7 @@ class EJScreenETL(ExtractTransformLoad):
|
|||
def extract(self) -> None:
|
||||
logger.info("Downloading EJScreen Data")
|
||||
super().extract(
|
||||
self.EJSCREEN_FTP_URL,
|
||||
self.TMP_PATH,
|
||||
self.EJSCREEN_FTP_URL, self.TMP_PATH,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
|
|
|
@ -34,8 +34,7 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
def extract(self) -> None:
|
||||
logger.info("Extracting HUD Housing Data")
|
||||
super().extract(
|
||||
self.HOUSING_FTP_URL,
|
||||
self.HOUSING_ZIP_FILE_DIR,
|
||||
self.HOUSING_FTP_URL, self.HOUSING_ZIP_FILE_DIR,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
|
@ -49,10 +48,7 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
/ "140"
|
||||
/ "Table8.csv"
|
||||
)
|
||||
self.df = pd.read_csv(
|
||||
filepath_or_buffer=tmp_csv_file_path,
|
||||
encoding="latin-1",
|
||||
)
|
||||
self.df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path, encoding="latin-1",)
|
||||
|
||||
# Rename and reformat block group ID
|
||||
self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)
|
||||
|
|
|
@ -70,8 +70,7 @@ class TreeEquityScoreETL(ExtractTransformLoad):
|
|||
logger.info("Downloading Tree Equity Score Data")
|
||||
for state in self.states:
|
||||
super().extract(
|
||||
f"{self.TES_URL}{state}.zip.zip",
|
||||
f"{self.TMP_PATH}/{state}",
|
||||
f"{self.TES_URL}{state}.zip.zip", f"{self.TMP_PATH}/{state}",
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
|
|
|
@ -97,10 +97,7 @@ def remove_all_dirs_from_dir(dir_path: Path) -> None:
|
|||
|
||||
|
||||
def unzip_file_from_url(
|
||||
file_url: str,
|
||||
download_path: Path,
|
||||
unzipped_file_path: Path,
|
||||
verify: bool = False,
|
||||
file_url: str, download_path: Path, unzipped_file_path: Path, verify: bool = False,
|
||||
) -> None:
|
||||
"""Downloads a zip file from a remote URL location and unzips it in a specific directory, removing the temporary file after
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue