mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
Running Black
This commit is contained in:
parent
dd8c37e06d
commit
508925618b
11 changed files with 30 additions and 87 deletions
|
@ -22,9 +22,7 @@ def cli():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(help="Clean up all census data folders",)
|
||||||
help="Clean up all census data folders",
|
|
||||||
)
|
|
||||||
def census_cleanup():
|
def census_cleanup():
|
||||||
"""CLI command to clean up the census data folder"""
|
"""CLI command to clean up the census data folder"""
|
||||||
|
|
||||||
|
@ -37,9 +35,7 @@ def census_cleanup():
|
||||||
logger.info("Cleaned up all census data files")
|
logger.info("Cleaned up all census data files")
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(help="Clean up all data folders",)
|
||||||
help="Clean up all data folders",
|
|
||||||
)
|
|
||||||
def data_cleanup():
|
def data_cleanup():
|
||||||
"""CLI command to clean up the all the data folders"""
|
"""CLI command to clean up the all the data folders"""
|
||||||
|
|
||||||
|
@ -50,9 +46,7 @@ def data_cleanup():
|
||||||
logger.info("Cleaned up all data folders")
|
logger.info("Cleaned up all data folders")
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(help="Census data download",)
|
||||||
help="Census data download",
|
|
||||||
)
|
|
||||||
def census_data_download():
|
def census_data_download():
|
||||||
"""CLI command to download all census shape files from the Census FTP and extract the geojson
|
"""CLI command to download all census shape files from the Census FTP and extract the geojson
|
||||||
to generate national and by state Census Block Group CSVs"""
|
to generate national and by state Census Block Group CSVs"""
|
||||||
|
@ -64,9 +58,7 @@ def census_data_download():
|
||||||
logger.info("Completed downloading census data")
|
logger.info("Completed downloading census data")
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(help="Run all ETL processes or a specific one",)
|
||||||
help="Run all ETL processes or a specific one",
|
|
||||||
)
|
|
||||||
@click.option("-d", "--dataset", required=False, type=str)
|
@click.option("-d", "--dataset", required=False, type=str)
|
||||||
def etl_run(dataset: str):
|
def etl_run(dataset: str):
|
||||||
"""Run a specific or all ETL processes
|
"""Run a specific or all ETL processes
|
||||||
|
@ -81,27 +73,21 @@ def etl_run(dataset: str):
|
||||||
etl_runner(dataset)
|
etl_runner(dataset)
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(help="Generate Score",)
|
||||||
help="Generate Score",
|
|
||||||
)
|
|
||||||
def score_run():
|
def score_run():
|
||||||
"""CLI command to generate the score"""
|
"""CLI command to generate the score"""
|
||||||
|
|
||||||
score_generate()
|
score_generate()
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(help="Generate Geojson files with scores baked in",)
|
||||||
help="Generate Geojson files with scores baked in",
|
|
||||||
)
|
|
||||||
def geo_score():
|
def geo_score():
|
||||||
"""CLI command to generate the score"""
|
"""CLI command to generate the score"""
|
||||||
|
|
||||||
score_geo()
|
score_geo()
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(help="Generate map tiles",)
|
||||||
help="Generate map tiles",
|
|
||||||
)
|
|
||||||
def generate_map_tiles():
|
def generate_map_tiles():
|
||||||
"""CLI command to generate the map tiles"""
|
"""CLI command to generate the map tiles"""
|
||||||
|
|
||||||
|
|
|
@ -27,11 +27,7 @@ def etl_runner(dataset_to_run: str = None) -> None:
|
||||||
"module_dir": "census_acs",
|
"module_dir": "census_acs",
|
||||||
"class_name": "CensusACSETL",
|
"class_name": "CensusACSETL",
|
||||||
},
|
},
|
||||||
{
|
{"name": "ejscreen", "module_dir": "ejscreen", "class_name": "EJScreenETL",},
|
||||||
"name": "ejscreen",
|
|
||||||
"module_dir": "ejscreen",
|
|
||||||
"class_name": "EJScreenETL",
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "housing_and_transportation",
|
"name": "housing_and_transportation",
|
||||||
"module_dir": "housing_and_transportation",
|
"module_dir": "housing_and_transportation",
|
||||||
|
@ -47,17 +43,12 @@ def etl_runner(dataset_to_run: str = None) -> None:
|
||||||
"module_dir": "calenviroscreen",
|
"module_dir": "calenviroscreen",
|
||||||
"class_name": "CalEnviroScreenETL",
|
"class_name": "CalEnviroScreenETL",
|
||||||
},
|
},
|
||||||
{
|
{"name": "hud_recap", "module_dir": "hud_recap", "class_name": "HudRecapETL",},
|
||||||
"name": "hud_recap",
|
|
||||||
"module_dir": "hud_recap",
|
|
||||||
"class_name": "HudRecapETL",
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
if dataset_to_run:
|
if dataset_to_run:
|
||||||
dataset_element = next(
|
dataset_element = next(
|
||||||
(item for item in dataset_list if item["name"] == dataset_to_run),
|
(item for item in dataset_list if item["name"] == dataset_to_run), None,
|
||||||
None,
|
|
||||||
)
|
)
|
||||||
if not dataset_list:
|
if not dataset_list:
|
||||||
raise ValueError("Invalid dataset name")
|
raise ValueError("Invalid dataset name")
|
||||||
|
|
|
@ -59,9 +59,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
# Load census data
|
# Load census data
|
||||||
census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
|
census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
|
||||||
self.census_df = pd.read_csv(
|
self.census_df = pd.read_csv(
|
||||||
census_csv,
|
census_csv, dtype={self.GEOID_FIELD_NAME: "string"}, low_memory=False,
|
||||||
dtype={self.GEOID_FIELD_NAME: "string"},
|
|
||||||
low_memory=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Load housing and transportation data
|
# Load housing and transportation data
|
||||||
|
@ -123,8 +121,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
# Define a named tuple that will be used for each data set input.
|
# Define a named tuple that will be used for each data set input.
|
||||||
DataSet = collections.namedtuple(
|
DataSet = collections.namedtuple(
|
||||||
typename="DataSet",
|
typename="DataSet", field_names=["input_field", "renamed_field", "bucket"],
|
||||||
field_names=["input_field", "renamed_field", "bucket"],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
data_sets = [
|
data_sets = [
|
||||||
|
@ -141,9 +138,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
bucket=None,
|
bucket=None,
|
||||||
),
|
),
|
||||||
DataSet(
|
DataSet(
|
||||||
input_field="ACSTOTPOP",
|
input_field="ACSTOTPOP", renamed_field="Total population", bucket=None,
|
||||||
renamed_field="Total population",
|
|
||||||
bucket=None,
|
|
||||||
),
|
),
|
||||||
# The following data sets have buckets, because they're used in the score
|
# The following data sets have buckets, because they're used in the score
|
||||||
DataSet(
|
DataSet(
|
||||||
|
@ -249,9 +244,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
}
|
}
|
||||||
|
|
||||||
self.df.rename(
|
self.df.rename(
|
||||||
columns=renaming_dict,
|
columns=renaming_dict, inplace=True, errors="raise",
|
||||||
inplace=True,
|
|
||||||
errors="raise",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
columns_to_keep = [data_set.renamed_field for data_set in data_sets]
|
columns_to_keep = [data_set.renamed_field for data_set in data_sets]
|
||||||
|
|
|
@ -46,9 +46,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
logger.info("Reading score CSV")
|
logger.info("Reading score CSV")
|
||||||
self.score_usa_df = pd.read_csv(
|
self.score_usa_df = pd.read_csv(
|
||||||
self.TILE_SCORE_CSV,
|
self.TILE_SCORE_CSV, dtype={"GEOID10": "string"}, low_memory=False,
|
||||||
dtype={"GEOID10": "string"},
|
|
||||||
low_memory=False,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
|
@ -70,8 +68,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
].reset_index(drop=True)
|
].reset_index(drop=True)
|
||||||
|
|
||||||
usa_simplified.rename(
|
usa_simplified.rename(
|
||||||
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
|
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO}, inplace=True,
|
||||||
inplace=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Aggregating into tracts (~5 minutes)")
|
logger.info("Aggregating into tracts (~5 minutes)")
|
||||||
|
|
|
@ -43,8 +43,7 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
def extract(self) -> None:
|
def extract(self) -> None:
|
||||||
super().extract(
|
super().extract(
|
||||||
self.CENSUS_COUNTIES_ZIP_URL,
|
self.CENSUS_COUNTIES_ZIP_URL, self.TMP_PATH,
|
||||||
self.TMP_PATH,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Reading Counties CSV")
|
logger.info("Reading Counties CSV")
|
||||||
|
@ -68,8 +67,7 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
# rename some of the columns to prepare for merge
|
# rename some of the columns to prepare for merge
|
||||||
self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]]
|
self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]]
|
||||||
self.counties_df.rename(
|
self.counties_df.rename(
|
||||||
columns={"USPS": "State Abbreviation", "NAME": "County Name"},
|
columns={"USPS": "State Abbreviation", "NAME": "County Name"}, inplace=True,
|
||||||
inplace=True,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# remove unnecessary columns
|
# remove unnecessary columns
|
||||||
|
|
|
@ -28,8 +28,7 @@ class CalEnviroScreenETL(ExtractTransformLoad):
|
||||||
def extract(self) -> None:
|
def extract(self) -> None:
|
||||||
logger.info("Downloading CalEnviroScreen Data")
|
logger.info("Downloading CalEnviroScreen Data")
|
||||||
super().extract(
|
super().extract(
|
||||||
self.CALENVIROSCREEN_FTP_URL,
|
self.CALENVIROSCREEN_FTP_URL, self.TMP_PATH,
|
||||||
self.TMP_PATH,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
|
|
|
@ -39,9 +39,7 @@ def download_census_csvs(data_path: Path) -> None:
|
||||||
# But using 2010 for now
|
# But using 2010 for now
|
||||||
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
|
cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip"
|
||||||
unzip_file_from_url(
|
unzip_file_from_url(
|
||||||
cbg_state_url,
|
cbg_state_url, data_path / "tmp", data_path / "census" / "shp" / fips,
|
||||||
data_path / "tmp",
|
|
||||||
data_path / "census" / "shp" / fips,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
cmd = (
|
cmd = (
|
||||||
|
@ -80,32 +78,22 @@ def download_census_csvs(data_path: Path) -> None:
|
||||||
csv_dir_path / f"{state_id}.csv", mode="w", newline=""
|
csv_dir_path / f"{state_id}.csv", mode="w", newline=""
|
||||||
) as cbg_csv_file:
|
) as cbg_csv_file:
|
||||||
cbg_csv_file_writer = csv.writer(
|
cbg_csv_file_writer = csv.writer(
|
||||||
cbg_csv_file,
|
cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
|
||||||
delimiter=",",
|
|
||||||
quotechar='"',
|
|
||||||
quoting=csv.QUOTE_MINIMAL,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
for geoid10 in geoid10_list:
|
for geoid10 in geoid10_list:
|
||||||
cbg_csv_file_writer.writerow(
|
cbg_csv_file_writer.writerow(
|
||||||
[
|
[geoid10,]
|
||||||
geoid10,
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
## write US csv
|
## write US csv
|
||||||
with open(csv_dir_path / "us.csv", mode="w", newline="") as cbg_csv_file:
|
with open(csv_dir_path / "us.csv", mode="w", newline="") as cbg_csv_file:
|
||||||
cbg_csv_file_writer = csv.writer(
|
cbg_csv_file_writer = csv.writer(
|
||||||
cbg_csv_file,
|
cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL,
|
||||||
delimiter=",",
|
|
||||||
quotechar='"',
|
|
||||||
quoting=csv.QUOTE_MINIMAL,
|
|
||||||
)
|
)
|
||||||
for geoid10 in cbg_national:
|
for geoid10 in cbg_national:
|
||||||
cbg_csv_file_writer.writerow(
|
cbg_csv_file_writer.writerow(
|
||||||
[
|
[geoid10,]
|
||||||
geoid10,
|
|
||||||
]
|
|
||||||
)
|
)
|
||||||
|
|
||||||
## create national geojson
|
## create national geojson
|
||||||
|
|
|
@ -18,8 +18,7 @@ class EJScreenETL(ExtractTransformLoad):
|
||||||
def extract(self) -> None:
|
def extract(self) -> None:
|
||||||
logger.info("Downloading EJScreen Data")
|
logger.info("Downloading EJScreen Data")
|
||||||
super().extract(
|
super().extract(
|
||||||
self.EJSCREEN_FTP_URL,
|
self.EJSCREEN_FTP_URL, self.TMP_PATH,
|
||||||
self.TMP_PATH,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
|
|
|
@ -34,8 +34,7 @@ class HudHousingETL(ExtractTransformLoad):
|
||||||
def extract(self) -> None:
|
def extract(self) -> None:
|
||||||
logger.info("Extracting HUD Housing Data")
|
logger.info("Extracting HUD Housing Data")
|
||||||
super().extract(
|
super().extract(
|
||||||
self.HOUSING_FTP_URL,
|
self.HOUSING_FTP_URL, self.HOUSING_ZIP_FILE_DIR,
|
||||||
self.HOUSING_ZIP_FILE_DIR,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
|
@ -49,10 +48,7 @@ class HudHousingETL(ExtractTransformLoad):
|
||||||
/ "140"
|
/ "140"
|
||||||
/ "Table8.csv"
|
/ "Table8.csv"
|
||||||
)
|
)
|
||||||
self.df = pd.read_csv(
|
self.df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path, encoding="latin-1",)
|
||||||
filepath_or_buffer=tmp_csv_file_path,
|
|
||||||
encoding="latin-1",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Rename and reformat block group ID
|
# Rename and reformat block group ID
|
||||||
self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)
|
self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)
|
||||||
|
|
|
@ -70,8 +70,7 @@ class TreeEquityScoreETL(ExtractTransformLoad):
|
||||||
logger.info("Downloading Tree Equity Score Data")
|
logger.info("Downloading Tree Equity Score Data")
|
||||||
for state in self.states:
|
for state in self.states:
|
||||||
super().extract(
|
super().extract(
|
||||||
f"{self.TES_URL}{state}.zip.zip",
|
f"{self.TES_URL}{state}.zip.zip", f"{self.TMP_PATH}/{state}",
|
||||||
f"{self.TMP_PATH}/{state}",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
|
|
|
@ -97,10 +97,7 @@ def remove_all_dirs_from_dir(dir_path: Path) -> None:
|
||||||
|
|
||||||
|
|
||||||
def unzip_file_from_url(
|
def unzip_file_from_url(
|
||||||
file_url: str,
|
file_url: str, download_path: Path, unzipped_file_path: Path, verify: bool = False,
|
||||||
download_path: Path,
|
|
||||||
unzipped_file_path: Path,
|
|
||||||
verify: bool = False,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Downloads a zip file from a remote URL location and unzips it in a specific directory, removing the temporary file after
|
"""Downloads a zip file from a remote URL location and unzips it in a specific directory, removing the temporary file after
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue