PyPi Packaging of Data Pipeline (#1464)

* PyPi Packaging of Data Pipeline * package rename * adding python version * trigger data checks * print env vars * python version 2 * trigger data check * python version 3 * update caching for other GHAs
2025-07-28 07:41:16 -07:00 · 2022-03-21 18:55:15 -04:00 · 2022-03-21 18:55:15 -04:00 · dd723b6c19
commit dd723b6c19
parent 53e35427f2
6 changed files with 22 additions and 9 deletions
--- a/.github/workflows/combine-tilefy.yml
+++ b/.github/workflows/combine-tilefy.yml
@ -31,7 +31,7 @@ jobs:
        id: cached-poetry-dependencies
        with:
          path: ~/.cache/pypoetry/virtualenvs
-          key: env-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/combine-tilefy.yml') }}
+          key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/combine-tilefy.yml') }}
      - name: Install poetry
        uses: snok/install-poetry@v1
      - name: Print Poetry settings
--- a/.github/workflows/data-checks.yml
+++ b/.github/workflows/data-checks.yml
@ -23,12 +23,14 @@ jobs:
        uses: actions/setup-python@v2
        with:
          python-version: ${{ matrix.python-version }}
      - name: Print variables to help debug
        uses: hmarr/debug-action@v2
      - name: Load cached Poetry installation
        id: cached-poetry-dependencies
        uses: actions/cache@v2
        with:
          path: ~/.cache/pypoetry/virtualenvs
-          key: env-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/data-checks.yml') }}
+          key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/data-checks.yml') }}
      - name: Install poetry
        uses: snok/install-poetry@v1
      - name: Print Poetry settings
--- a/.github/workflows/deploy_be_staging.yml
+++ b/.github/workflows/deploy_be_staging.yml
@ -31,7 +31,7 @@ jobs:
        uses: actions/cache@v2
        with:
          path: ~/.cache/pypoetry/virtualenvs
-          key: env-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_be_staging.yml') }}
+          key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_be_staging.yml') }}
      - name: Install poetry
        uses: snok/install-poetry@v1
      - name: Print Poetry settings
--- a/.github/workflows/generate-score.yml
+++ b/.github/workflows/generate-score.yml
@ -30,7 +30,7 @@ jobs:
        uses: actions/cache@v2
        with:
          path: ~/.cache/pypoetry/virtualenvs
-          key: env-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/generate-score.yml') }}
+          key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/generate-score.yml') }}
      - name: Install poetry
        uses: snok/install-poetry@v1
      - name: Print Poetry settings
--- a/data/data-pipeline/data_pipeline/etl/base.py
+++ b/data/data-pipeline/data_pipeline/etl/base.py
@ -61,7 +61,7 @@ class ExtractTransformLoad:
    #  in the output file based on this geography level.
    GEO_LEVEL: ValidGeoLevel = None
-    # COLUMNS_TO_KEEP to used to identify which columns to keep in the output df.
+    # COLUMNS_TO_KEEP is used to identify which columns to keep in the output df.
    COLUMNS_TO_KEEP: typing.List[str] = None
    # Thirteen digits in a census block group ID.
@ -70,7 +70,7 @@ class ExtractTransformLoad:
    #  be from CBGs at different time periods.
    EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 250000
-    # Eleven digits in a census tract ID.
+    # There should be Eleven digits in a census tract ID.
    EXPECTED_CENSUS_TRACTS_CHARACTER_LENGTH: int = 11
    # TODO: investigate. Census says there are only 74,134 tracts in the United States,
    #  Puerto Rico, and island areas. This might be from tracts at different time
--- a/data/data-pipeline/pyproject.toml
+++ b/data/data-pipeline/pyproject.toml
@ -1,8 +1,19 @@
 [tool.poetry]
-authors = ["Your Name <you@example.com>"]
+name = "justice40-data-pipeline"
 description = "ETL and Generation of Justice 40 Score"
 name = "data-pipeline"
 version = "0.1.0"
 description = "ETL, Score and Map Generation of Justice 40 Tool"
 authors = ["Justice40 Engineering <j40-engineering@lists.usds.gov>"]
 keywords = ["justice40", "environmental_justice", "python", "etl"]
 readme = "README.md"
 license = "MIT"
 homepage = "https://github.com/usds/justice40-tool/tree/main/data/data-pipeline"
 repository = "https://github.com/usds/justice40-tool"
 include = [
    "LICENSE",
 ]
 packages = [
  {include = "data_pipeline"}
 ]
 [tool.poetry.dependencies]
 CensusData = "^1.13"