Hotfix for fips zip download location + added full-score-run command (#465)

* Hotfix for S3 locations of data sources

* updated README

* lint failures

Co-authored-by: Nat Hillard <Nathaniel.K.Hillard@omb.eop.gov>
This commit is contained in:
Jorge Escobar 2021-08-05 12:55:21 -04:00 committed by GitHub
parent 5cb00ef0ce
commit 4d7465c833
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 26 additions and 74 deletions

View file

@ -123,9 +123,9 @@ Once completed, run `docker-compose up` and then open a new tab or terminal wind
Here's a list of commands: Here's a list of commands:
- Get help: `docker exec j40_data_pipeline_1 python3 application.py --help"` - Get help: `docker exec j40_data_pipeline_1 python3 application.py --help"`
- Clean up the census data directories: `docker exec j40_data_pipeline_1 python3 application.py census-cleanup"`
- Clean up the data directories: `docker exec j40_data_pipeline_1 python3 application.py data-cleanup"`
- Generate census data: `docker exec j40_data_pipeline_1 python3 application.py census-data-download"` - Generate census data: `docker exec j40_data_pipeline_1 python3 application.py census-data-download"`
- Run all ETL and Generate score: `docker exec j40_data_pipeline_1 python3 application.py score-full-run`
- Clean up the data directories: `docker exec j40_data_pipeline_1 python3 application.py data-cleanup"`
- Run all ETL processes: `docker exec j40_data_pipeline_1 python3 application.py etl-run"` - Run all ETL processes: `docker exec j40_data_pipeline_1 python3 application.py etl-run"`
- Generate Score: `docker exec j40_data_pipeline_1 python3 application.py score-run"` - Generate Score: `docker exec j40_data_pipeline_1 python3 application.py score-run"`
- Generate Score with Geojson and high and low versions: `docker exec j40_data_pipeline_1 python3 application.py geo-score` - Generate Score with Geojson and high and low versions: `docker exec j40_data_pipeline_1 python3 application.py geo-score`

View file

@ -22,21 +22,6 @@ def cli():
pass pass
@cli.command(
help="Clean up all census data folders",
)
def census_cleanup():
"""CLI command to clean up the census data folder"""
data_path = settings.APP_ROOT / "data"
# census directories
logger.info("Initializing all census data")
census_reset(data_path)
logger.info("Cleaned up all census data files")
@cli.command( @cli.command(
help="Clean up all data folders", help="Clean up all data folders",
) )
@ -57,8 +42,12 @@ def census_data_download():
"""CLI command to download all census shape files from the Census FTP and extract the geojson """CLI command to download all census shape files from the Census FTP and extract the geojson
to generate national and by state Census Block Group CSVs""" to generate national and by state Census Block Group CSVs"""
logger.info("Downloading census data")
data_path = settings.APP_ROOT / "data" data_path = settings.APP_ROOT / "data"
logger.info("Initializing all census data")
census_reset(data_path)
logger.info("Downloading census data")
download_census_csvs(data_path) download_census_csvs(data_path)
logger.info("Completed downloading census data") logger.info("Completed downloading census data")
@ -90,6 +79,19 @@ def score_run():
score_generate() score_generate()
@cli.command(
help="Run ETL + Score Generation",
)
def score_full_run():
"""CLI command to run ETL and generate the score in one command"""
data_folder_cleanup()
score_folder_cleanup()
temp_folder_cleanup()
etl_runner()
score_generate()
@cli.command( @cli.command(
help="Generate Geojson files with scores baked in", help="Generate Geojson files with scores baked in",
) )

View file

@ -1,53 +0,0 @@
fips,state_name,state_abbreviation,region,division
01,Alabama,AL,South,East South Central
02,Alaska,AK,West,Pacific
04,Arizona,AZ,West,Mountain
05,Arkansas,AR,South,West South Central
06,California,CA,West,Pacific
08,Colorado,CO,West,Mountain
09,Connecticut,CT,Northeast,New England
10,Delaware,DE,South,South Atlantic
11,District of Columbia,DC,South,South Atlantic
12,Florida,FL,South,South Atlantic
13,Georgia,GA,South,South Atlantic
15,Hawaii,HI,West,Pacific
16,Idaho,ID,West,Mountain
17,Illinois,IL,Midwest,East North Central
18,Indiana,IN,Midwest,East North Central
19,Iowa,IA,Midwest,West North Central
20,Kansas,KS,Midwest,West North Central
21,Kentucky,KY,South,East South Central
22,Louisiana,LA,South,West South Central
23,Maine,ME,Northeast,New England
24,Maryland,MD,South,South Atlantic
25,Massachusetts,MA,Northeast,New England
26,Michigan,MI,Midwest,East North Central
27,Minnesota,MN,Midwest,West North Central
28,Mississippi,MS,South,East South Central
29,Missouri,MO,Midwest,West North Central
30,Montana,MT,West,Mountain
31,Nebraska,NE,Midwest,West North Central
32,Nevada,NV,West,Mountain
33,New Hampshire,NH,Northeast,New England
34,New Jersey,NJ,Northeast,Middle Atlantic
35,New Mexico,NM,West,Mountain
36,New York,NY,Northeast,Middle Atlantic
37,North Carolina,NC,South,South Atlantic
38,North Dakota,ND,Midwest,West North Central
39,Ohio,OH,Midwest,East North Central
40,Oklahoma,OK,South,West South Central
41,Oregon,OR,West,Pacific
42,Pennsylvania,PA,Northeast,Middle Atlantic
44,Rhode Island,RI,Northeast,New England
45,South Carolina,SC,South,South Atlantic
46,South Dakota,SD,Midwest,West North Central
47,Tennessee,TN,South,East South Central
48,Texas,TX,South,West South Central
49,Utah,UT,West,Mountain
50,Vermont,VT,Northeast,New England
51,Virginia,VA,South,South Atlantic
53,Washington,WA,West,Pacific
54,West Virginia,WV,South,South Atlantic
55,Wisconsin,WI,Midwest,East North Central
56,Wyoming,WY,West,Mountain
72,Puerto Rico,PR,Puerto Rico,Puerto Rico
1 fips state_name state_abbreviation region division
2 01 Alabama AL South East South Central
3 02 Alaska AK West Pacific
4 04 Arizona AZ West Mountain
5 05 Arkansas AR South West South Central
6 06 California CA West Pacific
7 08 Colorado CO West Mountain
8 09 Connecticut CT Northeast New England
9 10 Delaware DE South South Atlantic
10 11 District of Columbia DC South South Atlantic
11 12 Florida FL South South Atlantic
12 13 Georgia GA South South Atlantic
13 15 Hawaii HI West Pacific
14 16 Idaho ID West Mountain
15 17 Illinois IL Midwest East North Central
16 18 Indiana IN Midwest East North Central
17 19 Iowa IA Midwest West North Central
18 20 Kansas KS Midwest West North Central
19 21 Kentucky KY South East South Central
20 22 Louisiana LA South West South Central
21 23 Maine ME Northeast New England
22 24 Maryland MD South South Atlantic
23 25 Massachusetts MA Northeast New England
24 26 Michigan MI Midwest East North Central
25 27 Minnesota MN Midwest West North Central
26 28 Mississippi MS South East South Central
27 29 Missouri MO Midwest West North Central
28 30 Montana MT West Mountain
29 31 Nebraska NE Midwest West North Central
30 32 Nevada NV West Mountain
31 33 New Hampshire NH Northeast New England
32 34 New Jersey NJ Northeast Middle Atlantic
33 35 New Mexico NM West Mountain
34 36 New York NY Northeast Middle Atlantic
35 37 North Carolina NC South South Atlantic
36 38 North Dakota ND Midwest West North Central
37 39 Ohio OH Midwest East North Central
38 40 Oklahoma OK South West South Central
39 41 Oregon OR West Pacific
40 42 Pennsylvania PA Northeast Middle Atlantic
41 44 Rhode Island RI Northeast New England
42 45 South Carolina SC South South Atlantic
43 46 South Dakota SD Midwest West North Central
44 47 Tennessee TN South East South Central
45 48 Texas TX South West South Central
46 49 Utah UT West Mountain
47 50 Vermont VT Northeast New England
48 51 Virginia VA South South Atlantic
49 53 Washington WA West Pacific
50 54 West Virginia WV South South Atlantic
51 55 Wisconsin WI Midwest East North Central
52 56 Wyoming WY West Mountain
53 72 Puerto Rico PR Puerto Rico Puerto Rico

View file

@ -2,13 +2,16 @@ import pandas as pd
from etl.base import ExtractTransformLoad from etl.base import ExtractTransformLoad
from utils import get_module_logger from utils import get_module_logger
from config import settings
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
class CalEnviroScreenETL(ExtractTransformLoad): class CalEnviroScreenETL(ExtractTransformLoad):
def __init__(self): def __init__(self):
self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/data-sources/CalEnviroScreen_4.0_2021.zip" self.CALENVIROSCREEN_FTP_URL = (
settings.AWS_JUSTICE40_DATASOURCES_URL + "/CalEnviroScreen_4.0_2021.zip"
)
self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv" self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4" self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4"

View file

@ -38,7 +38,7 @@ def get_state_fips_codes(data_path: Path) -> list:
if not os.path.isfile(fips_csv_path): if not os.path.isfile(fips_csv_path):
logger.info("Downloading fips from S3 repository") logger.info("Downloading fips from S3 repository")
unzip_file_from_url( unzip_file_from_url(
settings.AWS_JUSTICE40_DATA_URL + "/Census/fips_states_2010.zip", settings.AWS_JUSTICE40_DATASOURCES_URL + "/fips_states_2010.zip",
data_path / "tmp", data_path / "tmp",
data_path / "census" / "csv", data_path / "census" / "csv",
) )

View file

@ -1,5 +1,5 @@
[default] [default]
AWS_JUSTICE40_DATA_URL = "https://justice40-data.s3.amazonaws.com" AWS_JUSTICE40_DATASOURCES_URL = "https://justice40-data.s3.amazonaws.com/data-sources"
[development] [development]