AWS Sync Public Read (#508)

* adding layer to mvts

* small fix for GHA

* AWS Sync Public Read

* removed temp file

* updated state media income ftp
This commit is contained in:
Jorge Escobar 2021-08-12 14:17:25 -04:00 committed by GitHub
commit 773c035493
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 38 additions and 70 deletions

View file

@ -96,7 +96,7 @@ jobs:
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
- name: Deploy to Geoplatform AWS
run: aws s3 sync ./public/ s3://usds-geoplatform-justice40-website/justice40-tool/${{env.DESTINATION_FOLDER}} --delete
run: aws s3 sync ./public/ s3://usds-geoplatform-justice40-website/justice40-tool/${{env.DESTINATION_FOLDER}} --acl public-read --delete
- name: Update PR with deployed URL
uses: mshick/add-pr-comment@v1
if: github.event_name == 'pull_request' && github.event.action == 'opened' || github.event_name == 'push' # Only comment if the PR has been opened or a push has updated it
@ -111,4 +111,4 @@ jobs:
run: |
echo "Github pages: https://usds.github.io/justice40-tool/$DESTINATION_FOLDER/en"
echo "Standard S3 bucket version (http only) : http://usds-geoplatform-justice40-website.s3-website-us-east-1.amazonaws.com/justice40-tool/$DESTINATION_FOLDER/en"
echo "Cloudfront https: https://d2zjid6n5ja2pt.cloudfront.net/justice40-tool/$DESTINATION_FOLDER/en"
echo "Cloudfront https: https://d2zjid6n5ja2pt.cloudfront.net/justice40-tool/$DESTINATION_FOLDER/en"

View file

@ -49,9 +49,9 @@ jobs:
aws-region: us-east-1
- name: Deploy to Geoplatform AWS
run: |
aws s3 sync ./data_pipeline/data/dataset/ s3://justice40-data/data-pipeline/data/dataset --delete
aws s3 sync ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline/data/score/csv --delete
aws s3 sync ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline/data/score/downloadable --delete
aws s3 sync ./data_pipeline/data/dataset/ s3://justice40-data/data-pipeline/data/dataset --acl public-read --delete
aws s3 sync ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline/data/score/csv --acl public-read --delete
aws s3 sync ./data_pipeline/data/score/downloadable/ s3://justice40-data/data-pipeline/data/score/downloadable --acl public-read --delete
- name: Update PR with Comment about deployment
uses: mshick/add-pr-comment@v1
with:

View file

@ -193,7 +193,7 @@ If you want to run tile generation, please install TippeCanoe [following these i
- Start a terminal
- Change to the package directory (i.e. `cd data/data-pipeline/data_pipeline`)
- Then run `poetry run generate_tiles`
- If you have S3 keys, you can sync to the dev repo by doing `aws s3 sync ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline/data/score/tiles --delete`
- If you have S3 keys, you can sync to the dev repo by doing `aws s3 sync ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline/data/score/tiles --acl public-read --delete`
### Serve the map locally

View file

@ -1,53 +0,0 @@
GEOID2,Median household income (State)
01,50536
02,77640
04,58945
05,47597
06,75235
08,72331
09,78444
10,68287
11,86420
12,55660
13,58700
15,81275
16,55785
17,65886
18,56303
19,60523
20,59597
21,50589
22,49469
23,57918
24,84805
25,81215
26,57144
27,71306
28,45081
29,55461
30,54970
31,61439
32,60365
33,76768
34,82545
35,49754
36,68486
37,54602
38,64894
39,56602
40,52919
41,62818
42,61744
44,67167
45,53199
46,58275
47,53320
48,61874
49,71621
50,61973
51,74222
53,73775
54,46711
55,61747
56,64049
72,20539
1 GEOID2 Median household income (State)
2 01 50536
3 02 77640
4 04 58945
5 05 47597
6 06 75235
7 08 72331
8 09 78444
9 10 68287
10 11 86420
11 12 55660
12 13 58700
13 15 81275
14 16 55785
15 17 65886
16 18 56303
17 19 60523
18 20 59597
19 21 50589
20 22 49469
21 23 57918
22 24 84805
23 25 81215
24 26 57144
25 27 71306
26 28 45081
27 29 55461
28 30 54970
29 31 61439
30 32 60365
31 33 76768
32 34 82545
33 35 49754
34 36 68486
35 37 54602
36 38 64894
37 39 56602
38 40 52919
39 41 62818
40 42 61744
41 44 67167
42 45 53199
43 46 58275
44 47 53320
45 48 61874
46 49 71621
47 50 61973
48 51 74222
49 53 73775
50 54 46711
51 55 61747
52 56 64049
53 72 20539

View file

@ -4,6 +4,7 @@ import censusdata
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
from data_pipeline.utils import get_module_logger
from data_pipeline.config import settings
logger = get_module_logger(__name__)
@ -11,10 +12,14 @@ logger = get_module_logger(__name__)
class CensusACSETL(ExtractTransformLoad):
def __init__(self):
self.ACS_YEAR = 2019
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / f"census_acs_{self.ACS_YEAR}"
self.OUTPUT_PATH = (
self.DATA_PATH / "dataset" / f"census_acs_{self.ACS_YEAR}"
)
self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)"
self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)"
self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = "Linguistic isolation (total)"
self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = (
"Linguistic isolation (total)"
)
self.LINGUISTIC_ISOLATION_FIELDS = [
"C16002_001E",
"C16002_004E",
@ -23,7 +28,9 @@ class CensusACSETL(ExtractTransformLoad):
"C16002_013E",
]
self.MEDIAN_INCOME_FIELD = "B19013_001E"
self.MEDIAN_INCOME_FIELD_NAME = "Median household income in the past 12 months"
self.MEDIAN_INCOME_FIELD_NAME = (
"Median household income in the past 12 months"
)
self.MEDIAN_INCOME_STATE_FIELD_NAME = "Median household income (State)"
self.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD_NAME = (
"Median household income (% of state median household income)"
@ -32,22 +39,32 @@ class CensusACSETL(ExtractTransformLoad):
self.df: pd.DataFrame
self.state_median_income_df: pd.DataFrame
# TODO: refactor this to put this file on s3 and download it from there
self.STATE_MEDIAN_INCOME_FTP_URL = (
settings.AWS_JUSTICE40_DATASOURCES_URL
+ "/2014_to_2019_state_median_income.zip"
)
self.STATE_MEDIAN_INCOME_FILE_PATH = (
self.DATA_PATH
/ "needs_to_be_moved_to_s3"
/ "2014_to_2019_state_median_income.csv"
self.TMP_PATH / "2014_to_2019_state_median_income.csv"
)
def _fips_from_censusdata_censusgeo(self, censusgeo: censusdata.censusgeo) -> str:
def _fips_from_censusdata_censusgeo(
self, censusgeo: censusdata.censusgeo
) -> str:
"""Create a FIPS code from the proprietary censusgeo index."""
fips = "".join([value for (key, value) in censusgeo.params()])
return fips
def extract(self) -> None:
# Extract state median income
super().extract(
self.STATE_MEDIAN_INCOME_FTP_URL,
self.TMP_PATH,
)
dfs = []
for fips in get_state_fips_codes(self.DATA_PATH):
logger.info(f"Downloading data for state/territory with FIPS code {fips}")
logger.info(
f"Downloading data for state/territory with FIPS code {fips}"
)
dfs.append(
censusdata.download(
@ -82,7 +99,9 @@ class CensusACSETL(ExtractTransformLoad):
logger.info("Starting Census ACS Transform")
# Rename median income
self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[self.MEDIAN_INCOME_FIELD]
self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[
self.MEDIAN_INCOME_FIELD
]
# TODO: handle null values for CBG median income, which are `-666666666`.
@ -104,7 +123,9 @@ class CensusACSETL(ExtractTransformLoad):
# Calculate percent unemployment.
# TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
self.df[self.UNEMPLOYED_FIELD_NAME] = self.df.B23025_005E / self.df.B23025_003E
self.df[self.UNEMPLOYED_FIELD_NAME] = (
self.df.B23025_005E / self.df.B23025_003E
)
# Calculate linguistic isolation.
individual_limited_english_fields = [