mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-26 18:21:17 -07:00
Run ETL processes in parallel (#1253)
* WIP on parallelizing * switching to get_tmp_path for nri * switching to get_tmp_path everywhere necessary * fixing linter errors * moving heavy ETLs to front of line * add hold * moving cdc places up * removing unnecessary print * moving h&t up * adding parallel to geo post * better census labels * switching to concurrent futures * fixing output
This commit is contained in:
parent
389eb59ac4
commit
a0d6e55f0a
30 changed files with 286 additions and 160 deletions
|
@ -16,7 +16,7 @@ class EJSCREENETL(ExtractTransformLoad):
|
|||
|
||||
def __init__(self):
|
||||
self.EJSCREEN_FTP_URL = "https://edap-arcgiscloud-data-commons.s3.amazonaws.com/EJSCREEN2020/EJSCREEN_Tract_2020_USPR.csv.zip"
|
||||
self.EJSCREEN_CSV = self.TMP_PATH / "EJSCREEN_Tract_2020_USPR.csv"
|
||||
self.EJSCREEN_CSV = self.get_tmp_path() / "EJSCREEN_Tract_2020_USPR.csv"
|
||||
self.CSV_PATH = self.DATA_PATH / "dataset" / "ejscreen_2019"
|
||||
self.df: pd.DataFrame
|
||||
|
||||
|
@ -45,7 +45,7 @@ class EJSCREENETL(ExtractTransformLoad):
|
|||
logger.info("Downloading EJScreen Data")
|
||||
super().extract(
|
||||
self.EJSCREEN_FTP_URL,
|
||||
self.TMP_PATH,
|
||||
self.get_tmp_path(),
|
||||
verify=False, # EPA EJScreen end point has certificate issues often
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue