mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 07:51:16 -07:00
S3 Parallel Upload and Deletions (#1410)
* installation step * trigger action * installing to home dir * dry-run * pyenv * py 2.8 * trying s4cmd * removing pyenv * poetry s4cmd * num-threads * public read * poetry cache * s4cmd all around * poetry cache * poetry cache * install poetry packages * poetry echo * let's do this * s4cmd install on run * s4cmd * ad aws back * add aws back * testing census api key and poetry caching * census api key * census api * census api key #3 * 250 * poetry update * poetry change * check census api key * force flag * update score gen and tilefy; remove cached fips * small gdal update * invalidation * missing cache ids
This commit is contained in:
parent
e31a4f3b94
commit
7b05ee9c76
8 changed files with 307 additions and 197 deletions
|
@ -72,7 +72,7 @@ class ExtractTransformLoad:
|
|||
|
||||
# Eleven digits in a census tract ID.
|
||||
EXPECTED_CENSUS_TRACTS_CHARACTER_LENGTH: int = 11
|
||||
# TODO: investigate. Census says there are only 74,134 tracts in the US,
|
||||
# TODO: investigate. Census says there are only 74,134 tracts in the United States,
|
||||
# Puerto Rico, and island areas. This might be from tracts at different time
|
||||
# periods. https://github.com/usds/justice40-tool/issues/964
|
||||
EXPECTED_MAX_CENSUS_TRACTS: int = 74160
|
||||
|
|
|
@ -41,14 +41,12 @@ def get_state_fips_codes(data_path: Path) -> list:
|
|||
"""Returns a list with state data"""
|
||||
fips_csv_path = data_path / "census" / "csv" / "fips_states_2010.csv"
|
||||
|
||||
# check if file exists
|
||||
if not os.path.isfile(fips_csv_path):
|
||||
logger.info("Downloading fips from S3 repository")
|
||||
unzip_file_from_url(
|
||||
settings.AWS_JUSTICE40_DATASOURCES_URL + "/fips_states_2010.zip",
|
||||
data_path / "tmp",
|
||||
data_path / "census" / "csv",
|
||||
)
|
||||
logger.info("Downloading fips from S3 repository")
|
||||
unzip_file_from_url(
|
||||
settings.AWS_JUSTICE40_DATASOURCES_URL + "/fips_states_2010.zip",
|
||||
data_path / "tmp",
|
||||
data_path / "census" / "csv",
|
||||
)
|
||||
|
||||
fips_state_list = []
|
||||
with open(fips_csv_path, encoding="utf-8") as csv_file:
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
import censusdata
|
||||
|
@ -33,8 +34,11 @@ def retrieve_census_acs_data(
|
|||
f"Skipping download for state/territory with FIPS code {fips}"
|
||||
)
|
||||
else:
|
||||
census_api_key = ""
|
||||
if os.environ.get("CENSUS_API_KEY"):
|
||||
census_api_key = "with API key"
|
||||
logger.info(
|
||||
f"Downloading data for state/territory with FIPS code {fips}"
|
||||
f"Downloading data for state/territory with FIPS code {fips} {census_api_key}"
|
||||
)
|
||||
|
||||
try:
|
||||
|
@ -45,6 +49,7 @@ def retrieve_census_acs_data(
|
|||
[("state", fips), ("county", "*"), ("tract", "*")]
|
||||
),
|
||||
var=variables,
|
||||
key=os.environ.get("CENSUS_API_KEY"),
|
||||
)
|
||||
dfs.append(response)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue