Run ETL processes in parallel (#1253)

* WIP on parallelizing * switching to get_tmp_path for nri * switching to get_tmp_path everywhere necessary * fixing linter errors * moving heavy ETLs to front of line * add hold * moving cdc places up * removing unnecessary print * moving h&t up * adding parallel to geo post * better census labels * switching to concurrent futures * fixing output
2025-07-27 22:31:16 -07:00 · 2022-02-11 14:04:53 -05:00 · 2022-02-11 14:04:53 -05:00 · a0d6e55f0a
commit a0d6e55f0a
parent 389eb59ac4
30 changed files with 286 additions and 160 deletions
--- a/data/data-pipeline/data_pipeline/etl/sources/cdc_life_expectancy/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/cdc_life_expectancy/etl.py
@ -29,7 +29,9 @@ class CDCLifeExpectancy(ExtractTransformLoad):
    def extract(self) -> None:
        logger.info("Starting data download.")

-        download_file_name = self.TMP_PATH / "cdc_life_expectancy" / "usa.csv"
+        download_file_name = (
+            self.get_tmp_path() / "cdc_life_expectancy" / "usa.csv"
+        )
        download_file_from_url(
            file_url=self.FILE_URL,
            download_file_name=download_file_name,