mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 09:41:26 -08:00
Hotfix for fips zip download location + added full-score-run command (#465)
* Hotfix for S3 locations of data sources * updated README * lint failures Co-authored-by: Nat Hillard <Nathaniel.K.Hillard@omb.eop.gov>
This commit is contained in:
parent
5cb00ef0ce
commit
4d7465c833
6 changed files with 26 additions and 74 deletions
|
@ -123,9 +123,9 @@ Once completed, run `docker-compose up` and then open a new tab or terminal wind
|
|||
Here's a list of commands:
|
||||
|
||||
- Get help: `docker exec j40_data_pipeline_1 python3 application.py --help"`
|
||||
- Clean up the census data directories: `docker exec j40_data_pipeline_1 python3 application.py census-cleanup"`
|
||||
- Clean up the data directories: `docker exec j40_data_pipeline_1 python3 application.py data-cleanup"`
|
||||
- Generate census data: `docker exec j40_data_pipeline_1 python3 application.py census-data-download"`
|
||||
- Run all ETL and Generate score: `docker exec j40_data_pipeline_1 python3 application.py score-full-run`
|
||||
- Clean up the data directories: `docker exec j40_data_pipeline_1 python3 application.py data-cleanup"`
|
||||
- Run all ETL processes: `docker exec j40_data_pipeline_1 python3 application.py etl-run"`
|
||||
- Generate Score: `docker exec j40_data_pipeline_1 python3 application.py score-run"`
|
||||
- Generate Score with Geojson and high and low versions: `docker exec j40_data_pipeline_1 python3 application.py geo-score`
|
||||
|
|
|
@ -22,21 +22,6 @@ def cli():
|
|||
pass
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Clean up all census data folders",
|
||||
)
|
||||
def census_cleanup():
|
||||
"""CLI command to clean up the census data folder"""
|
||||
|
||||
data_path = settings.APP_ROOT / "data"
|
||||
|
||||
# census directories
|
||||
logger.info("Initializing all census data")
|
||||
census_reset(data_path)
|
||||
|
||||
logger.info("Cleaned up all census data files")
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Clean up all data folders",
|
||||
)
|
||||
|
@ -57,8 +42,12 @@ def census_data_download():
|
|||
"""CLI command to download all census shape files from the Census FTP and extract the geojson
|
||||
to generate national and by state Census Block Group CSVs"""
|
||||
|
||||
logger.info("Downloading census data")
|
||||
data_path = settings.APP_ROOT / "data"
|
||||
|
||||
logger.info("Initializing all census data")
|
||||
census_reset(data_path)
|
||||
|
||||
logger.info("Downloading census data")
|
||||
download_census_csvs(data_path)
|
||||
|
||||
logger.info("Completed downloading census data")
|
||||
|
@ -90,6 +79,19 @@ def score_run():
|
|||
score_generate()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Run ETL + Score Generation",
|
||||
)
|
||||
def score_full_run():
|
||||
"""CLI command to run ETL and generate the score in one command"""
|
||||
|
||||
data_folder_cleanup()
|
||||
score_folder_cleanup()
|
||||
temp_folder_cleanup()
|
||||
etl_runner()
|
||||
score_generate()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Generate Geojson files with scores baked in",
|
||||
)
|
||||
|
|
|
@ -1,53 +0,0 @@
|
|||
fips,state_name,state_abbreviation,region,division
|
||||
01,Alabama,AL,South,East South Central
|
||||
02,Alaska,AK,West,Pacific
|
||||
04,Arizona,AZ,West,Mountain
|
||||
05,Arkansas,AR,South,West South Central
|
||||
06,California,CA,West,Pacific
|
||||
08,Colorado,CO,West,Mountain
|
||||
09,Connecticut,CT,Northeast,New England
|
||||
10,Delaware,DE,South,South Atlantic
|
||||
11,District of Columbia,DC,South,South Atlantic
|
||||
12,Florida,FL,South,South Atlantic
|
||||
13,Georgia,GA,South,South Atlantic
|
||||
15,Hawaii,HI,West,Pacific
|
||||
16,Idaho,ID,West,Mountain
|
||||
17,Illinois,IL,Midwest,East North Central
|
||||
18,Indiana,IN,Midwest,East North Central
|
||||
19,Iowa,IA,Midwest,West North Central
|
||||
20,Kansas,KS,Midwest,West North Central
|
||||
21,Kentucky,KY,South,East South Central
|
||||
22,Louisiana,LA,South,West South Central
|
||||
23,Maine,ME,Northeast,New England
|
||||
24,Maryland,MD,South,South Atlantic
|
||||
25,Massachusetts,MA,Northeast,New England
|
||||
26,Michigan,MI,Midwest,East North Central
|
||||
27,Minnesota,MN,Midwest,West North Central
|
||||
28,Mississippi,MS,South,East South Central
|
||||
29,Missouri,MO,Midwest,West North Central
|
||||
30,Montana,MT,West,Mountain
|
||||
31,Nebraska,NE,Midwest,West North Central
|
||||
32,Nevada,NV,West,Mountain
|
||||
33,New Hampshire,NH,Northeast,New England
|
||||
34,New Jersey,NJ,Northeast,Middle Atlantic
|
||||
35,New Mexico,NM,West,Mountain
|
||||
36,New York,NY,Northeast,Middle Atlantic
|
||||
37,North Carolina,NC,South,South Atlantic
|
||||
38,North Dakota,ND,Midwest,West North Central
|
||||
39,Ohio,OH,Midwest,East North Central
|
||||
40,Oklahoma,OK,South,West South Central
|
||||
41,Oregon,OR,West,Pacific
|
||||
42,Pennsylvania,PA,Northeast,Middle Atlantic
|
||||
44,Rhode Island,RI,Northeast,New England
|
||||
45,South Carolina,SC,South,South Atlantic
|
||||
46,South Dakota,SD,Midwest,West North Central
|
||||
47,Tennessee,TN,South,East South Central
|
||||
48,Texas,TX,South,West South Central
|
||||
49,Utah,UT,West,Mountain
|
||||
50,Vermont,VT,Northeast,New England
|
||||
51,Virginia,VA,South,South Atlantic
|
||||
53,Washington,WA,West,Pacific
|
||||
54,West Virginia,WV,South,South Atlantic
|
||||
55,Wisconsin,WI,Midwest,East North Central
|
||||
56,Wyoming,WY,West,Mountain
|
||||
72,Puerto Rico,PR,Puerto Rico,Puerto Rico
|
|
|
@ -2,13 +2,16 @@ import pandas as pd
|
|||
|
||||
from etl.base import ExtractTransformLoad
|
||||
from utils import get_module_logger
|
||||
from config import settings
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class CalEnviroScreenETL(ExtractTransformLoad):
|
||||
def __init__(self):
|
||||
self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/data-sources/CalEnviroScreen_4.0_2021.zip"
|
||||
self.CALENVIROSCREEN_FTP_URL = (
|
||||
settings.AWS_JUSTICE40_DATASOURCES_URL + "/CalEnviroScreen_4.0_2021.zip"
|
||||
)
|
||||
self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
|
||||
self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4"
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ def get_state_fips_codes(data_path: Path) -> list:
|
|||
if not os.path.isfile(fips_csv_path):
|
||||
logger.info("Downloading fips from S3 repository")
|
||||
unzip_file_from_url(
|
||||
settings.AWS_JUSTICE40_DATA_URL + "/Census/fips_states_2010.zip",
|
||||
settings.AWS_JUSTICE40_DATASOURCES_URL + "/fips_states_2010.zip",
|
||||
data_path / "tmp",
|
||||
data_path / "census" / "csv",
|
||||
)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[default]
|
||||
AWS_JUSTICE40_DATA_URL = "https://justice40-data.s3.amazonaws.com"
|
||||
AWS_JUSTICE40_DATASOURCES_URL = "https://justice40-data.s3.amazonaws.com/data-sources"
|
||||
|
||||
[development]
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue