PyPi Packaging of Data Pipeline (#1464)

* PyPi Packaging of Data Pipeline * package rename * adding python version * trigger data checks * print env vars * python version 2 * trigger data check * python version 3 * update caching for other GHAs
2025-07-28 06:51:18 -07:00 · 2022-03-21 18:55:15 -04:00 · 2022-03-21 18:55:15 -04:00 · dd723b6c19
commit dd723b6c19
parent 53e35427f2
6 changed files with 22 additions and 9 deletions
--- a/data/data-pipeline/data_pipeline/etl/base.py
+++ b/data/data-pipeline/data_pipeline/etl/base.py
@ -61,7 +61,7 @@ class ExtractTransformLoad:
    #  in the output file based on this geography level.
    GEO_LEVEL: ValidGeoLevel = None

-    # COLUMNS_TO_KEEP to used to identify which columns to keep in the output df.
+    # COLUMNS_TO_KEEP is used to identify which columns to keep in the output df.
    COLUMNS_TO_KEEP: typing.List[str] = None

    # Thirteen digits in a census block group ID.
@ -70,7 +70,7 @@ class ExtractTransformLoad:
    #  be from CBGs at different time periods.
    EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 250000

-    # Eleven digits in a census tract ID.
+    # There should be Eleven digits in a census tract ID.
    EXPECTED_CENSUS_TRACTS_CHARACTER_LENGTH: int = 11
    # TODO: investigate. Census says there are only 74,134 tracts in the United States,
    #  Puerto Rico, and island areas. This might be from tracts at different time