PyPi Packaging of Data Pipeline (#1464)

* PyPi Packaging of Data Pipeline

* package rename

* adding python version

* trigger data checks

* print env vars

* python version 2

* trigger data check

* python version 3

* update caching for other GHAs
This commit is contained in:
Jorge Escobar 2022-03-21 18:55:15 -04:00 committed by GitHub
commit dd723b6c19
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 22 additions and 9 deletions

View file

@ -61,7 +61,7 @@ class ExtractTransformLoad:
# in the output file based on this geography level.
GEO_LEVEL: ValidGeoLevel = None
# COLUMNS_TO_KEEP to used to identify which columns to keep in the output df.
# COLUMNS_TO_KEEP is used to identify which columns to keep in the output df.
COLUMNS_TO_KEEP: typing.List[str] = None
# Thirteen digits in a census block group ID.
@ -70,7 +70,7 @@ class ExtractTransformLoad:
# be from CBGs at different time periods.
EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 250000
# Eleven digits in a census tract ID.
# There should be Eleven digits in a census tract ID.
EXPECTED_CENSUS_TRACTS_CHARACTER_LENGTH: int = 11
# TODO: investigate. Census says there are only 74,134 tracts in the United States,
# Puerto Rico, and island areas. This might be from tracts at different time

View file

@ -1,8 +1,19 @@
[tool.poetry]
authors = ["Your Name <you@example.com>"]
description = "ETL and Generation of Justice 40 Score"
name = "data-pipeline"
name = "justice40-data-pipeline"
version = "0.1.0"
description = "ETL, Score and Map Generation of Justice 40 Tool"
authors = ["Justice40 Engineering <j40-engineering@lists.usds.gov>"]
keywords = ["justice40", "environmental_justice", "python", "etl"]
readme = "README.md"
license = "MIT"
homepage = "https://github.com/usds/justice40-tool/tree/main/data/data-pipeline"
repository = "https://github.com/usds/justice40-tool"
include = [
"LICENSE",
]
packages = [
{include = "data_pipeline"}
]
[tool.poetry.dependencies]
CensusData = "^1.13"