mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 17:44:20 -08:00
Hotfix for fips zip download location + added full-score-run command (#465)
* Hotfix for S3 locations of data sources * updated README * lint failures Co-authored-by: Nat Hillard <Nathaniel.K.Hillard@omb.eop.gov>
This commit is contained in:
parent
5cb00ef0ce
commit
4d7465c833
6 changed files with 26 additions and 74 deletions
|
@ -123,9 +123,9 @@ Once completed, run `docker-compose up` and then open a new tab or terminal wind
|
||||||
Here's a list of commands:
|
Here's a list of commands:
|
||||||
|
|
||||||
- Get help: `docker exec j40_data_pipeline_1 python3 application.py --help"`
|
- Get help: `docker exec j40_data_pipeline_1 python3 application.py --help"`
|
||||||
- Clean up the census data directories: `docker exec j40_data_pipeline_1 python3 application.py census-cleanup"`
|
|
||||||
- Clean up the data directories: `docker exec j40_data_pipeline_1 python3 application.py data-cleanup"`
|
|
||||||
- Generate census data: `docker exec j40_data_pipeline_1 python3 application.py census-data-download"`
|
- Generate census data: `docker exec j40_data_pipeline_1 python3 application.py census-data-download"`
|
||||||
|
- Run all ETL and Generate score: `docker exec j40_data_pipeline_1 python3 application.py score-full-run`
|
||||||
|
- Clean up the data directories: `docker exec j40_data_pipeline_1 python3 application.py data-cleanup"`
|
||||||
- Run all ETL processes: `docker exec j40_data_pipeline_1 python3 application.py etl-run"`
|
- Run all ETL processes: `docker exec j40_data_pipeline_1 python3 application.py etl-run"`
|
||||||
- Generate Score: `docker exec j40_data_pipeline_1 python3 application.py score-run"`
|
- Generate Score: `docker exec j40_data_pipeline_1 python3 application.py score-run"`
|
||||||
- Generate Score with Geojson and high and low versions: `docker exec j40_data_pipeline_1 python3 application.py geo-score`
|
- Generate Score with Geojson and high and low versions: `docker exec j40_data_pipeline_1 python3 application.py geo-score`
|
||||||
|
|
|
@ -22,21 +22,6 @@ def cli():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
|
||||||
help="Clean up all census data folders",
|
|
||||||
)
|
|
||||||
def census_cleanup():
|
|
||||||
"""CLI command to clean up the census data folder"""
|
|
||||||
|
|
||||||
data_path = settings.APP_ROOT / "data"
|
|
||||||
|
|
||||||
# census directories
|
|
||||||
logger.info("Initializing all census data")
|
|
||||||
census_reset(data_path)
|
|
||||||
|
|
||||||
logger.info("Cleaned up all census data files")
|
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(
|
||||||
help="Clean up all data folders",
|
help="Clean up all data folders",
|
||||||
)
|
)
|
||||||
|
@ -57,8 +42,12 @@ def census_data_download():
|
||||||
"""CLI command to download all census shape files from the Census FTP and extract the geojson
|
"""CLI command to download all census shape files from the Census FTP and extract the geojson
|
||||||
to generate national and by state Census Block Group CSVs"""
|
to generate national and by state Census Block Group CSVs"""
|
||||||
|
|
||||||
logger.info("Downloading census data")
|
|
||||||
data_path = settings.APP_ROOT / "data"
|
data_path = settings.APP_ROOT / "data"
|
||||||
|
|
||||||
|
logger.info("Initializing all census data")
|
||||||
|
census_reset(data_path)
|
||||||
|
|
||||||
|
logger.info("Downloading census data")
|
||||||
download_census_csvs(data_path)
|
download_census_csvs(data_path)
|
||||||
|
|
||||||
logger.info("Completed downloading census data")
|
logger.info("Completed downloading census data")
|
||||||
|
@ -90,6 +79,19 @@ def score_run():
|
||||||
score_generate()
|
score_generate()
|
||||||
|
|
||||||
|
|
||||||
|
@cli.command(
|
||||||
|
help="Run ETL + Score Generation",
|
||||||
|
)
|
||||||
|
def score_full_run():
|
||||||
|
"""CLI command to run ETL and generate the score in one command"""
|
||||||
|
|
||||||
|
data_folder_cleanup()
|
||||||
|
score_folder_cleanup()
|
||||||
|
temp_folder_cleanup()
|
||||||
|
etl_runner()
|
||||||
|
score_generate()
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(
|
||||||
help="Generate Geojson files with scores baked in",
|
help="Generate Geojson files with scores baked in",
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,53 +0,0 @@
|
||||||
fips,state_name,state_abbreviation,region,division
|
|
||||||
01,Alabama,AL,South,East South Central
|
|
||||||
02,Alaska,AK,West,Pacific
|
|
||||||
04,Arizona,AZ,West,Mountain
|
|
||||||
05,Arkansas,AR,South,West South Central
|
|
||||||
06,California,CA,West,Pacific
|
|
||||||
08,Colorado,CO,West,Mountain
|
|
||||||
09,Connecticut,CT,Northeast,New England
|
|
||||||
10,Delaware,DE,South,South Atlantic
|
|
||||||
11,District of Columbia,DC,South,South Atlantic
|
|
||||||
12,Florida,FL,South,South Atlantic
|
|
||||||
13,Georgia,GA,South,South Atlantic
|
|
||||||
15,Hawaii,HI,West,Pacific
|
|
||||||
16,Idaho,ID,West,Mountain
|
|
||||||
17,Illinois,IL,Midwest,East North Central
|
|
||||||
18,Indiana,IN,Midwest,East North Central
|
|
||||||
19,Iowa,IA,Midwest,West North Central
|
|
||||||
20,Kansas,KS,Midwest,West North Central
|
|
||||||
21,Kentucky,KY,South,East South Central
|
|
||||||
22,Louisiana,LA,South,West South Central
|
|
||||||
23,Maine,ME,Northeast,New England
|
|
||||||
24,Maryland,MD,South,South Atlantic
|
|
||||||
25,Massachusetts,MA,Northeast,New England
|
|
||||||
26,Michigan,MI,Midwest,East North Central
|
|
||||||
27,Minnesota,MN,Midwest,West North Central
|
|
||||||
28,Mississippi,MS,South,East South Central
|
|
||||||
29,Missouri,MO,Midwest,West North Central
|
|
||||||
30,Montana,MT,West,Mountain
|
|
||||||
31,Nebraska,NE,Midwest,West North Central
|
|
||||||
32,Nevada,NV,West,Mountain
|
|
||||||
33,New Hampshire,NH,Northeast,New England
|
|
||||||
34,New Jersey,NJ,Northeast,Middle Atlantic
|
|
||||||
35,New Mexico,NM,West,Mountain
|
|
||||||
36,New York,NY,Northeast,Middle Atlantic
|
|
||||||
37,North Carolina,NC,South,South Atlantic
|
|
||||||
38,North Dakota,ND,Midwest,West North Central
|
|
||||||
39,Ohio,OH,Midwest,East North Central
|
|
||||||
40,Oklahoma,OK,South,West South Central
|
|
||||||
41,Oregon,OR,West,Pacific
|
|
||||||
42,Pennsylvania,PA,Northeast,Middle Atlantic
|
|
||||||
44,Rhode Island,RI,Northeast,New England
|
|
||||||
45,South Carolina,SC,South,South Atlantic
|
|
||||||
46,South Dakota,SD,Midwest,West North Central
|
|
||||||
47,Tennessee,TN,South,East South Central
|
|
||||||
48,Texas,TX,South,West South Central
|
|
||||||
49,Utah,UT,West,Mountain
|
|
||||||
50,Vermont,VT,Northeast,New England
|
|
||||||
51,Virginia,VA,South,South Atlantic
|
|
||||||
53,Washington,WA,West,Pacific
|
|
||||||
54,West Virginia,WV,South,South Atlantic
|
|
||||||
55,Wisconsin,WI,Midwest,East North Central
|
|
||||||
56,Wyoming,WY,West,Mountain
|
|
||||||
72,Puerto Rico,PR,Puerto Rico,Puerto Rico
|
|
|
|
@ -2,13 +2,16 @@ import pandas as pd
|
||||||
|
|
||||||
from etl.base import ExtractTransformLoad
|
from etl.base import ExtractTransformLoad
|
||||||
from utils import get_module_logger
|
from utils import get_module_logger
|
||||||
|
from config import settings
|
||||||
|
|
||||||
logger = get_module_logger(__name__)
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class CalEnviroScreenETL(ExtractTransformLoad):
|
class CalEnviroScreenETL(ExtractTransformLoad):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/data-sources/CalEnviroScreen_4.0_2021.zip"
|
self.CALENVIROSCREEN_FTP_URL = (
|
||||||
|
settings.AWS_JUSTICE40_DATASOURCES_URL + "/CalEnviroScreen_4.0_2021.zip"
|
||||||
|
)
|
||||||
self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
|
self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
|
||||||
self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4"
|
self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4"
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ def get_state_fips_codes(data_path: Path) -> list:
|
||||||
if not os.path.isfile(fips_csv_path):
|
if not os.path.isfile(fips_csv_path):
|
||||||
logger.info("Downloading fips from S3 repository")
|
logger.info("Downloading fips from S3 repository")
|
||||||
unzip_file_from_url(
|
unzip_file_from_url(
|
||||||
settings.AWS_JUSTICE40_DATA_URL + "/Census/fips_states_2010.zip",
|
settings.AWS_JUSTICE40_DATASOURCES_URL + "/fips_states_2010.zip",
|
||||||
data_path / "tmp",
|
data_path / "tmp",
|
||||||
data_path / "census" / "csv",
|
data_path / "census" / "csv",
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
[default]
|
[default]
|
||||||
AWS_JUSTICE40_DATA_URL = "https://justice40-data.s3.amazonaws.com"
|
AWS_JUSTICE40_DATASOURCES_URL = "https://justice40-data.s3.amazonaws.com/data-sources"
|
||||||
|
|
||||||
[development]
|
[development]
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue