mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 10:51:16 -07:00
Run ETL processes in parallel (#1253)
* WIP on parallelizing * switching to get_tmp_path for nri * switching to get_tmp_path everywhere necessary * fixing linter errors * moving heavy ETLs to front of line * add hold * moving cdc places up * removing unnecessary print * moving h&t up * adding parallel to geo post * better census labels * switching to concurrent futures * fixing output
This commit is contained in:
parent
389eb59ac4
commit
a0d6e55f0a
30 changed files with 286 additions and 160 deletions
|
@ -101,6 +101,7 @@ class CensusACS2010ETL(ExtractTransformLoad):
|
|||
self.df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Starting Census 2010 ACS Transform")
|
||||
# Define the variables to retrieve
|
||||
variables = (
|
||||
self.UNEMPLOYED_FIELDS
|
||||
|
@ -118,7 +119,7 @@ class CensusACS2010ETL(ExtractTransformLoad):
|
|||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
logger.info("Starting Census ACS Transform")
|
||||
logger.info("Starting Census 2010 ACS Transform")
|
||||
|
||||
df = self.df
|
||||
|
||||
|
@ -184,7 +185,7 @@ class CensusACS2010ETL(ExtractTransformLoad):
|
|||
self.df = output_df
|
||||
|
||||
def load(self) -> None:
|
||||
logger.info("Saving Census ACS Data")
|
||||
logger.info("Saving Census 2010 ACS Data")
|
||||
|
||||
# mkdir census
|
||||
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue