Adding back census to list of potential datasets, but separating out from standard list

Error this addresses: File "/Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/etl/runner.py", line 71, in etl_runner f"data_pipeline.etl.sources.{dataset['module_dir']}.etl" TypeError: 'NoneType' object is not subscriptable
2025-02-23 01:54:18 -08:00 · 2021-08-06 18:21:37 -04:00 · 2021-08-06 18:21:37 -04:00 · 61d0624966
commit 61d0624966
parent f51b0d69d9
5 changed files with 80 additions and 51 deletions
--- a/data/data-pipeline/data_pipeline/etl/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/constants.py
@ -0,0 +1,42 @@
 DATASET_LIST = [
    {
        "name": "tree_equity_score",
        "module_dir": "tree_equity_score",
        "class_name": "TreeEquityScoreETL",
    },
    {
        "name": "census_acs",
        "module_dir": "census_acs",
        "class_name": "CensusACSETL",
    },
    {
        "name": "ejscreen",
        "module_dir": "ejscreen",
        "class_name": "EJScreenETL",
    },
    {
        "name": "housing_and_transportation",
        "module_dir": "housing_and_transportation",
        "class_name": "HousingTransportationETL",
    },
    {
        "name": "hud_housing",
        "module_dir": "hud_housing",
        "class_name": "HudHousingETL",
    },
    {
        "name": "calenviroscreen",
        "module_dir": "calenviroscreen",
        "class_name": "CalEnviroScreenETL",
    },
    {
        "name": "hud_recap",
        "module_dir": "hud_recap",
        "class_name": "HudRecapETL",
    },
 ]
 CENSUS_INFO = {
    "name": "census",
    "module_dir": "census",
    "class_name": "CensusETL",
 }
--- a/data/data-pipeline/data_pipeline/etl/runner.py
+++ b/data/data-pipeline/data_pipeline/etl/runner.py
@ -4,6 +4,33 @@ from data_pipeline.etl.score.etl_score import ScoreETL
 from data_pipeline.etl.score.etl_score_geo import GeoScoreETL
 from data_pipeline.etl.score.etl_score_post import PostScoreETL
 from . import constants
 def get_datasets_to_run(dataset_to_run: str):
    """Returns a list of appropriate datasets to run given input args
    Args:
        dataset_to_run (str): Run a specific ETL process. If missing, runs all processes (optional)
    Returns:
        None
    """
    dataset_list = constants.DATASET_LIST
    etls_to_search = dataset_list + [constants.CENSUS_INFO]
    if dataset_to_run:
        dataset_element = next(
            (item for item in etls_to_search if item["name"] == dataset_to_run),
            None,
        )
        if not dataset_element:
            raise ValueError("Invalid dataset name")
        else:
            # reset the list to just the dataset
            dataset_list = [dataset_element]
    return dataset_list
 def etl_runner(dataset_to_run: str = None) -> None:
    """Runs all etl processes or a specific one
@ -14,56 +41,7 @@ def etl_runner(dataset_to_run: str = None) -> None:
    Returns:
        None
    """
-
+    dataset_list = get_datasets_to_run(dataset_to_run)
    # this list comes from YAMLs
    dataset_list = [
        {
            "name": "tree_equity_score",
            "module_dir": "tree_equity_score",
            "class_name": "TreeEquityScoreETL",
        },
        {
            "name": "census_acs",
            "module_dir": "census_acs",
            "class_name": "CensusACSETL",
        },
        {
            "name": "ejscreen",
            "module_dir": "ejscreen",
            "class_name": "EJScreenETL",
        },
        {
            "name": "housing_and_transportation",
            "module_dir": "housing_and_transportation",
            "class_name": "HousingTransportationETL",
        },
        {
            "name": "hud_housing",
            "module_dir": "hud_housing",
            "class_name": "HudHousingETL",
        },
        {
            "name": "calenviroscreen",
            "module_dir": "calenviroscreen",
            "class_name": "CalEnviroScreenETL",
        },
        {
            "name": "hud_recap",
            "module_dir": "hud_recap",
            "class_name": "HudRecapETL",
        },
    ]
    if dataset_to_run:
        dataset_element = next(
            (item for item in dataset_list if item["name"] == dataset_to_run),
            None,
        )
        if not dataset_list:
            raise ValueError("Invalid dataset name")
        else:
            # reset the list to just the dataset
            dataset_list = [dataset_element]
    # Run the ETLs for the dataset_list
    for dataset in dataset_list:
--- a/data/data-pipeline/data_pipeline/etl/sources/census/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census/etl.py
@ -43,7 +43,7 @@ class CensusETL(ExtractTransformLoad):
        Returns:
            Path on disk to the file_type file corresponding to this FIPS
        """
-        file_path : Path
+        file_path: Path
        if file_type == GeoFileType.SHP:
            file_path = Path(
                self.SHP_BASE_PATH / fips_code / f"tl_2010_{fips_code}_bg10.shp"
--- a/data/data-pipeline/data_pipeline/etl/tests/init.py
+++ b/data/data-pipeline/data_pipeline/etl/tests/init.py
--- a/data/data-pipeline/data_pipeline/etl/tests/test_etl.py
+++ b/data/data-pipeline/data_pipeline/etl/tests/test_etl.py
@ -0,0 +1,9 @@
 import pytest
 from data_pipeline.etl import constants, runner
 def test_get_datasets_to_run():
    assert runner.get_datasets_to_run(None) == constants.DATASET_LIST
    assert runner.get_datasets_to_run("census") == [constants.CENSUS_INFO]
    with pytest.raises(ValueError):
        runner.get_datasets_to_run("doesnt_exist")