mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-09-29 18:33:17 -07:00
Merge pull request #31 from agilesix/cfelix/merge-v2-20241204
CEQ-J40 merge v2 code - 20241204
This commit is contained in:
commit
8dbe96fa90
84 changed files with 85579 additions and 5114 deletions
1
.github/CODEOWNERS
vendored
1
.github/CODEOWNERS
vendored
|
@ -1 +0,0 @@
|
|||
* @vim-usds @travis-newby @sampowers-usds @mattbowen-usds
|
6
.github/workflows/codeql-analysis.yml
vendored
6
.github/workflows/codeql-analysis.yml
vendored
|
@ -45,7 +45,7 @@ jobs:
|
|||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v1
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
|
@ -56,7 +56,7 @@ jobs:
|
|||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v1
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
|
@ -70,4 +70,4 @@ jobs:
|
|||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v1
|
||||
uses: github/codeql-action/analyze@v2
|
||||
|
|
9
.github/workflows/data-checks.yml
vendored
9
.github/workflows/data-checks.yml
vendored
|
@ -2,9 +2,6 @@
|
|||
name: Data Checks
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- "**/release/**"
|
||||
paths:
|
||||
- "data/**"
|
||||
jobs:
|
||||
|
@ -16,11 +13,11 @@ jobs:
|
|||
strategy:
|
||||
matrix:
|
||||
# checks all of the versions allowed in pyproject.toml
|
||||
python-version: [3.8, 3.9]
|
||||
python-version: [3.10.15]
|
||||
steps:
|
||||
# installs Python
|
||||
# one execution of the tests per version listed above
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
|
@ -31,7 +28,7 @@ jobs:
|
|||
uses: hmarr/debug-action@v2
|
||||
- name: Load cached Poetry installation
|
||||
id: cached-poetry-dependencies
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/data-checks.yml') }}
|
||||
|
|
132
.github/workflows/deploy_backend_main.yml
vendored
Normal file
132
.github/workflows/deploy_backend_main.yml
vendored
Normal file
|
@ -0,0 +1,132 @@
|
|||
name: Deploy Backend Main
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "data/**"
|
||||
env:
|
||||
CENSUS_API_KEY: ${{ secrets.CENSUS_API_KEY }}
|
||||
J40_VERSION_LABEL_STRING: ${{ vars.SCORE_VERSION }}
|
||||
|
||||
jobs:
|
||||
generate-score-tiles:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: data/data-pipeline
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.10']
|
||||
environment: Staging
|
||||
steps:
|
||||
- name: Checkout source
|
||||
uses: actions/checkout@v4
|
||||
- name: Print variables to help debug
|
||||
uses: hmarr/debug-action@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Load cached Poetry installation
|
||||
id: cached-poetry-dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }}
|
||||
- name: Install poetry
|
||||
uses: snok/install-poetry@v1
|
||||
- name: Print Poetry settings
|
||||
run: poetry show -v
|
||||
- name: Install dependencies
|
||||
run: poetry add s4cmd && poetry install
|
||||
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: us-east-1
|
||||
- name: Install GDAL/ogr2ogr
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install gdal-bin
|
||||
ogrinfo --version
|
||||
- name: Get Census Data
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py pull-census-data -s aws
|
||||
- name: Generate Score
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py score-full-run
|
||||
- name: Generate Score Post
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py generate-score-post -s aws
|
||||
- name: Confirm we generated the version of the score we think we did
|
||||
if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' || env.J40_VERSION_LABEL_STRING == 'test' }}
|
||||
run: |
|
||||
grep "Identified as disadvantaged due to tribal overlap" data_pipeline/data/score/downloadable/* > /dev/null
|
||||
- name: Confirm we generated the version of the score we think we did
|
||||
if: ${{ env.J40_VERSION_LABEL_STRING == '2.0' || env.J40_VERSION_LABEL_STRING == 'beta' }}
|
||||
run: |
|
||||
grep -v "Identified as disadvantaged due to tribal overlap" data_pipeline/data/score/downloadable/* > /dev/null
|
||||
- name: Generate Score Geo
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py geo-score
|
||||
- name: Run smoketest for 1.0
|
||||
if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' }}
|
||||
run: |
|
||||
poetry run pytest data_pipeline/ -m smoketest
|
||||
- name: Deploy Score to Geoplatform AWS
|
||||
run: |
|
||||
poetry run s4cmd put ./data_pipeline/data/score/csv/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/csv --sync-check --recursive --force
|
||||
poetry run s4cmd put ./data_pipeline/files/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/downloadable --sync-check --recursive --force
|
||||
poetry run s4cmd put ./data_pipeline/data/score/downloadable/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/downloadable --sync-check --recursive --force
|
||||
- name: Deploy 1.0 score post
|
||||
if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' }}
|
||||
run: |
|
||||
curl "${{secrets.DATA_URL}}/data-versions/1.0/data/score/downloadable/1.0-shapefile-codebook.zip" -s -f -I -o /dev/null && \
|
||||
curl "${{secrets.DATA_URL}}/data-versions/1.0/data/score/downloadable/1.0-communities.xlsx" -s -f -I -o /dev/null && \
|
||||
curl "${{secrets.DATA_URL}}/data-versions/1.0/data/score/downloadable/1.0-communities.csv" -s -f -I -o /dev/null && \
|
||||
curl "${{secrets.DATA_URL}}/data-versions/1.0/data/score/downloadable/cejst-technical-support-document.pdf" -s -f -I -o /dev/null && \
|
||||
curl "${{secrets.DATA_URL}}/data-versions/1.0/data/score/downloadable/draft-communities-list.pdf" -s -f -I -o /dev/null
|
||||
- name: Deploy 2.0 score post
|
||||
if: ${{ env.J40_VERSION_LABEL_STRING == '2.0' }}
|
||||
run: |
|
||||
curl "${{secrets.DATA_URL}}/data-versions/2.0/data/score/downloadable/2.0-data-documentation.zip" -s -f -I -o /dev/null && \
|
||||
curl "${{secrets.DATA_URL}}/data-versions/2.0/data/score/downloadable/2.0-shapefile-codebook.zip" -s -f -I -o /dev/null
|
||||
curl "${{secrets.DATA_URL}}/data-versions/2.0/data/score/downloadable/2.0-communities.xlsx" -s -f -I -o /dev/null && \
|
||||
curl "${{secrets.DATA_URL}}/data-versions/2.0/data/score/downloadable/2.0-communities.csv" -s -f -I -o /dev/null && \
|
||||
curl "${{secrets.DATA_URL}}/data-versions/2.0/data/score/downloadable/cejst-technical-support-document.pdf" -s -f -I -o /dev/null && \
|
||||
curl "${{secrets.DATA_URL}}/data-versions/2.0/data/score/downloadable/draft-communities-list.pdf" -s -f -I -o /dev/null
|
||||
- name: Set timezone for tippecanoe
|
||||
uses: szenius/set-timezone@v2.0
|
||||
with:
|
||||
timezoneLinux: "America/Los_Angeles"
|
||||
- name: Get tippecanoe
|
||||
run: |
|
||||
sudo apt-get install -y software-properties-common libsqlite3-dev zlib1g-dev
|
||||
sudo apt-add-repository -y ppa:git-core/ppa
|
||||
sudo mkdir -p /tmp/tippecanoe-src
|
||||
sudo git clone https://github.com/mapbox/tippecanoe.git /tmp/tippecanoe-src
|
||||
- name: Make tippecanoe
|
||||
working-directory: /tmp/tippecanoe-src
|
||||
run: |
|
||||
sudo /usr/bin/bash -c make
|
||||
mkdir -p /usr/local/bin
|
||||
cp tippecanoe /usr/local/bin/tippecanoe
|
||||
tippecanoe -v
|
||||
- name: Generate Tiles
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py generate-map-tiles
|
||||
- name: Deploy Map to Geoplatform AWS
|
||||
run: |
|
||||
poetry run s4cmd put ./data_pipeline/data/score/geojson/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/geojson --sync-check --recursive --force --delete-removed --num-threads=250
|
||||
poetry run s4cmd put ./data_pipeline/data/score/shapefile/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/shapefile --sync-check --recursive --force --delete-removed
|
||||
poetry run s4cmd put ./data_pipeline/data/score/tiles/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/tiles --sync-check --recursive --force --delete-removed --num-threads=250
|
||||
- name: Invalidate cache on AWS CDN
|
||||
uses: chetan/invalidate-cloudfront-action@master
|
||||
env:
|
||||
DISTRIBUTION: ${{secrets.DATA_CDN_ID}}
|
||||
PATHS: "/*"
|
||||
AWS_REGION: "us-east-1"
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.CLIENT_DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.CLIENT_DEV_AWS_SECRET_ACCESS_KEY }}
|
85
.github/workflows/deploy_frontend_main.yml
vendored
Normal file
85
.github/workflows/deploy_frontend_main.yml
vendored
Normal file
|
@ -0,0 +1,85 @@
|
|||
name: Deploy Frontend Main
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "client/**/*"
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
environment: Staging
|
||||
defaults:
|
||||
run:
|
||||
working-directory: client
|
||||
strategy:
|
||||
matrix:
|
||||
node-version: [14.x]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Use Node.js ${{ matrix.node-version }}
|
||||
uses: actions/setup-node@v2
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
- name: Set DESTINATION_FOLDER for main
|
||||
run: |
|
||||
echo "DESTINATION_FOLDER=main" >> $GITHUB_ENV
|
||||
- name: Install
|
||||
run: npm ci
|
||||
- name: Build
|
||||
run: npm run build --if-present
|
||||
env:
|
||||
# See the client readme for more info on environment variables:
|
||||
# https://github.com/usds/justice40-tool/blob/main/client/README.md
|
||||
DATA_SOURCE: cdn
|
||||
# TODO: Update main URL when either is back up
|
||||
SITE_URL: "${{ secrets.SITE_URL }}"
|
||||
MAPBOX_STYLES_READ_TOKEN: "${{ secrets.MAPBOX_STYLES_READ_TOKEN }}"
|
||||
- name: Get directory contents
|
||||
run: ls -la public
|
||||
- name: Lint
|
||||
run: npm run lint
|
||||
# Disabling for now due to jsonlint - TODO: put this back
|
||||
# - name: License Check
|
||||
# run: npm run licenses
|
||||
- name: Test
|
||||
run: npm test
|
||||
# - name: Check for security vulnerabilities
|
||||
# run: npm audit --production
|
||||
- name: Upload Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: J40Static
|
||||
# Upload-artifact does not support the default working directory
|
||||
# See more: https://github.com/actions/upload-artifact/issues/87
|
||||
path: ./client/public
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
environment: Staging
|
||||
steps:
|
||||
- name: Checkout source
|
||||
uses: actions/checkout@v4
|
||||
- name: Download Artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: J40Static
|
||||
path: ./public
|
||||
- name: Set DESTINATION_FOLDER for main
|
||||
run: |
|
||||
echo "DESTINATION_FOLDER=main" >> $GITHUB_ENV
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.CLIENT_DEV_AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.CLIENT_DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: us-east-1
|
||||
- name: Deploy to Geoplatform AWS
|
||||
run: aws s3 sync ./public/ s3://${{secrets.S3_WEBSITE_BUCKET}}/justice40-tool/${{env.DESTINATION_FOLDER}} --delete
|
||||
- name: Invalidate cache on AWS CDNs
|
||||
uses: chetan/invalidate-cloudfront-action@master
|
||||
env:
|
||||
DISTRIBUTION: ${{secrets.WEB_CDN_ID}}
|
||||
PATHS: "/*"
|
||||
AWS_REGION: "us-east-1"
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.CLIENT_DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.CLIENT_DEV_AWS_SECRET_ACCESS_KEY }}
|
4
.github/workflows/e2e.yml
vendored
4
.github/workflows/e2e.yml
vendored
|
@ -17,9 +17,9 @@ jobs:
|
|||
working-directory: ./client
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v4
|
||||
- name: Cypress nightly tests 🌃
|
||||
uses: cypress-io/github-action@v2
|
||||
uses: cypress-io/github-action@v6
|
||||
with:
|
||||
working-directory: ${{env.working-directory}}
|
||||
start: npm start
|
||||
|
|
17
.github/workflows/markdown-link-check.yml
vendored
17
.github/workflows/markdown-link-check.yml
vendored
|
@ -1,15 +1,16 @@
|
|||
name: Check Markdown links
|
||||
name: Check Markdown Links
|
||||
on:
|
||||
pull_request:
|
||||
# The branches below must be a subset of the branches above
|
||||
branches: [ main ]
|
||||
# Run only if the pull request changes a markdown file:
|
||||
# https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions#onpushpull_requestpaths
|
||||
paths:
|
||||
- '**.md'
|
||||
jobs:
|
||||
markdown-link-check:
|
||||
runs-on: ubuntu-latest
|
||||
check-links:
|
||||
name: runner / linkspector
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: gaurav-nelson/github-action-markdown-link-check@v1
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run linkspector
|
||||
uses: umbrelladocs/action-linkspector@v1
|
||||
with:
|
||||
fail_on_error: true
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -150,3 +150,6 @@ node_modules
|
|||
.python-version
|
||||
.DS_Store
|
||||
temp_dir
|
||||
|
||||
# asdf tooling
|
||||
.tool-versions
|
||||
|
|
|
@ -3,14 +3,14 @@
|
|||
|
||||
# Feature Tiles env variables:
|
||||
# The TILES_BASE_URL will be determined by the DATA_SOURCE env variable
|
||||
GATSBY_CDN_TILES_BASE_URL=https://static-data-screeningtool.geoplatform.gov
|
||||
GATSBY_CDN_TILES_BASE_URL=https://dig0wsohit6js.cloudfront.net
|
||||
GATSBY_LOCAL_TILES_BASE_URL=http://localhost:5000/data/data-pipeline
|
||||
|
||||
GATSBY_DATA_PIPELINE_SCORE_PATH_LOCAL=data_pipeline/data/score
|
||||
GATSBY_DATA_PIPELINE_TRIBAL_PATH=data-pipeline/data/tribal
|
||||
|
||||
GATSBY_BETA_SCORE_PATH = data-versions/beta/data/score
|
||||
GATSBY_1_0_SCORE_PATH = data-versions/1.0/data/score
|
||||
GATSBY_2_0_SCORE_PATH = data-versions/2.0/data/score
|
||||
|
||||
GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_XLS=downloadable/beta-communities.xlsx
|
||||
GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_CSV=downloadable/beta-communities.csv
|
||||
|
@ -19,17 +19,17 @@ GATSBY_FILE_DL_PATH_BETA_DATA_DOC=downloadable/beta-data-documentation.zip
|
|||
GATSBY_FILE_DL_PATH_BETA_TRAINING_SLIDES_PPT=downloadable/technical-training-slides.pptx
|
||||
|
||||
|
||||
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_XLS=downloadable/1.0-communities.xlsx
|
||||
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_CSV=downloadable/1.0-communities.csv
|
||||
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_PDF=downloadable/1.0-communities-list.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_ES_PDF=downloadable/1.0-communities-list-es.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_SHAPE_FILE_ZIP=downloadable/1.0-shapefile-codebook.zip
|
||||
GATSBY_FILE_DL_PATH_1_0_INSTRUCT_PDF=downloadable/CEQ-CEJST-Instructions.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_INSTRUCT_ES_PDF=downloadable/CEQ-CEJST-Instructions-es.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_COMP_CHART_PDF=downloadable/total-comparison-chart.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_TSD_PDF=downloadable/1.0-cejst-technical-support-document.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_TOOL_COMP_PDF=downloadable/1.0-climate-and-environmental-burden-tool-comparison.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_M_23_09_SIGNED_PDF=downloadable/M-23-09_Signed_CEQ_CPO_es.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_XLS=downloadable/2.0-communities.xlsx
|
||||
GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_CSV=downloadable/2.0-communities.csv
|
||||
GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_PDF=downloadable/2.0-communities-list.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_ES_PDF=downloadable/2.0-communities-list-es.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_SHAPE_FILE_ZIP=downloadable/2.0-shapefile-codebook.zip
|
||||
GATSBY_FILE_DL_PATH_2_0_INSTRUCT_PDF=downloadable/CEQ-CEJST-Instructions.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_INSTRUCT_ES_PDF=downloadable/CEQ-CEJST-Instructions-es.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_COMP_CHART_PDF=downloadable/total-comparison-chart.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_TSD_PDF=downloadable/2.0-cejst-technical-support-document.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_TOOL_COMP_PDF=downloadable/2.0-climate-and-environmental-burden-tool-comparison.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_M_23_09_SIGNED_PDF=downloadable/M-23-09_Signed_CEQ_CPO_es.pdf
|
||||
|
||||
GATSBY_FILE_DL_PATH_TSD_ES_PDF=downloadable/cejst-technical-support-document-es.pdf
|
||||
GATSBY_FILE_DL_PATH_HOW_TO_COMMUNITIES_PDF=downloadable/draft-communities-list.pdf
|
||||
|
|
|
@ -3,12 +3,12 @@
|
|||
|
||||
# Feature Tiles env variables:
|
||||
# The TILES_BASE_URL will always point to the CDN
|
||||
GATSBY_CDN_TILES_BASE_URL=https://static-data-screeningtool.geoplatform.gov
|
||||
GATSBY_CDN_TILES_BASE_URL=https://dig0wsohit6js.cloudfront.net
|
||||
|
||||
GATSBY_DATA_PIPELINE_TRIBAL_PATH=data-pipeline/data/tribal
|
||||
|
||||
GATSBY_BETA_SCORE_PATH = data-versions/beta/data/score
|
||||
GATSBY_1_0_SCORE_PATH = data-versions/1.0/data/score
|
||||
GATSBY_2_0_SCORE_PATH = data-versions/2.0/data/score
|
||||
|
||||
GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_XLS=downloadable/beta-communities.xlsx
|
||||
GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_CSV=downloadable/beta-communities.csv
|
||||
|
@ -16,17 +16,17 @@ GATSBY_FILE_DL_PATH_BETA_SHAPE_FILE_ZIP=downloadable/beta-shapefile-codebook.zip
|
|||
GATSBY_FILE_DL_PATH_BETA_DATA_DOC=downloadable/beta-data-documentation.zip
|
||||
GATSBY_FILE_DL_PATH_BETA_TRAINING_SLIDES_PPT=downloadable/technical-training-slides.pptx
|
||||
|
||||
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_XLS=downloadable/1.0-communities.xlsx
|
||||
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_CSV=downloadable/1.0-communities.csv
|
||||
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_PDF=downloadable/1.0-communities-list.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_ES_PDF=downloadable/1.0-communities-list-es.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_SHAPE_FILE_ZIP=downloadable/1.0-shapefile-codebook.zip
|
||||
GATSBY_FILE_DL_PATH_1_0_INSTRUCT_PDF=downloadable/CEQ-CEJST-Instructions.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_INSTRUCT_ES_PDF=downloadable/CEQ-CEJST-Instructions-es.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_COMP_CHART_PDF=downloadable/total-comparison-chart.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_TSD_PDF=downloadable/1.0-cejst-technical-support-document.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_TOOL_COMP_PDF=downloadable/1.0-climate-and-environmental-burden-tool-comparison.pdf
|
||||
GATSBY_FILE_DL_PATH_1_0_M_23_09_SIGNED_PDF=downloadable/M-23-09_Signed_CEQ_CPO_es.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_XLS=downloadable/2.0-communities.xlsx
|
||||
GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_CSV=downloadable/2.0-communities.csv
|
||||
GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_PDF=downloadable/2.0-communities-list.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_ES_PDF=downloadable/2.0-communities-list-es.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_SHAPE_FILE_ZIP=downloadable/2.0-shapefile-codebook.zip
|
||||
GATSBY_FILE_DL_PATH_2_0_INSTRUCT_PDF=downloadable/CEQ-CEJST-Instructions.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_INSTRUCT_ES_PDF=downloadable/CEQ-CEJST-Instructions-es.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_COMP_CHART_PDF=downloadable/total-comparison-chart.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_TSD_PDF=downloadable/2.0-cejst-technical-support-document.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_TOOL_COMP_PDF=downloadable/2.0-climate-and-environmental-burden-tool-comparison.pdf
|
||||
GATSBY_FILE_DL_PATH_2_0_M_23_09_SIGNED_PDF=downloadable/M-23-09_Signed_CEQ_CPO_es.pdf
|
||||
|
||||
GATSBY_FILE_DL_PATH_TSD_ES_PDF=downloadable/cejst-technical-support-document-es.pdf
|
||||
GATSBY_FILE_DL_PATH_HOW_TO_COMMUNITIES_PDF=downloadable/draft-communities-list.pdf
|
||||
|
|
|
@ -214,6 +214,7 @@ When developing, to use a flag:
|
|||
6. Set breakpoints in VS code!
|
||||
|
||||
|
||||
|
||||
## Package Versions
|
||||
|
||||
The following attemps to explain why certain packages versions have been chosen and what their current limitations are
|
||||
|
|
261
client/package-lock.json
generated
261
client/package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
@ -68,6 +68,7 @@
|
|||
"husky": "^7.0.4",
|
||||
"identity-obj-proxy": "^3.0.0",
|
||||
"jest": "^27.5.1",
|
||||
"jest-environment-jsdom": "^27.5.1",
|
||||
"license-checker": "^25.0.1",
|
||||
"prettier": "^2.6.0",
|
||||
"react-test-renderer": "^17.0.2",
|
||||
|
@ -84,6 +85,7 @@
|
|||
"gatsby-plugin-env-variables": "^2.2.0",
|
||||
"gatsby-plugin-robots-txt": "^1.7.0",
|
||||
"gatsby-plugin-sitemap": "^4.10.0",
|
||||
"js-search": "^2.0.1",
|
||||
"mapbox-gl": "^1.13.2",
|
||||
"maplibre-gl": "^1.14.0",
|
||||
"query-string": "^7.1.3",
|
||||
|
|
|
@ -1146,6 +1146,12 @@ const AreaDetail = ({properties}: IAreaDetailProps) => {
|
|||
isAdjacencyLowIncome={
|
||||
properties[constants.ADJACENCY_LOW_INCOME_EXCEEDS_THRESH]
|
||||
}
|
||||
isIslandLowIncome={
|
||||
properties[constants.IS_FEDERAL_POVERTY_LEVEL_200] &&
|
||||
constants.TILES_ISLAND_AREA_FIPS_CODES.some((code) => {
|
||||
return properties[constants.GEOID_PROPERTY].startsWith(code);
|
||||
})
|
||||
}
|
||||
tribalCountAK={
|
||||
properties[constants.TRIBAL_AREAS_COUNT_AK] >= 1 ?
|
||||
properties[constants.TRIBAL_AREAS_COUNT_AK] :
|
||||
|
|
|
@ -130,7 +130,7 @@ const J40Map = ({location}: IJ40Interface) => {
|
|||
const onClick = (event: MapEvent | React.MouseEvent<HTMLButtonElement>) => {
|
||||
// Stop all propagation / bubbling / capturing
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
(event as React.MouseEvent<HTMLButtonElement>).stopPropagation?.();
|
||||
|
||||
// Check if the click is for territories. Given the territories component's design, it can be
|
||||
// guaranteed that each territory control will have an id. We use this ID to determine
|
||||
|
@ -167,8 +167,9 @@ const J40Map = ({location}: IJ40Interface) => {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// This else clause will fire when the ID is null or empty. This is the case where the map is clicked
|
||||
} else if (event.target && (event.target as HTMLElement).nodeName == 'DIV' ) {
|
||||
// This else clause will fire when the user clicks on the map and will ignore other controls
|
||||
// such as the search box and buttons.
|
||||
|
||||
// @ts-ignore
|
||||
const feature = event.features && event.features[0];
|
||||
|
|
|
@ -4,6 +4,8 @@ import {LngLatBoundsLike} from 'maplibre-gl';
|
|||
import {useIntl} from 'gatsby-plugin-intl';
|
||||
import {Search} from '@trussworks/react-uswds';
|
||||
import {useWindowSize} from 'react-use';
|
||||
import * as JsSearch from 'js-search';
|
||||
import * as constants from '../../data/constants';
|
||||
|
||||
import MapSearchMessage from '../MapSearchMessage';
|
||||
|
||||
|
@ -14,6 +16,16 @@ interface IMapSearch {
|
|||
goToPlace(bounds: LngLatBoundsLike):void;
|
||||
}
|
||||
|
||||
interface ISearchResult {
|
||||
addresstype: string;
|
||||
lat: string;
|
||||
lon: string;
|
||||
boundingbox: string[];
|
||||
type: string;
|
||||
// eslint-disable-next-line camelcase
|
||||
place_rank: number;
|
||||
}
|
||||
|
||||
const MapSearch = ({goToPlace}:IMapSearch) => {
|
||||
// State to hold if the search results are empty or not:
|
||||
const [isSearchResultsNull, setIsSearchResultsNull] = useState(false);
|
||||
|
@ -30,44 +42,118 @@ const MapSearch = ({goToPlace}:IMapSearch) => {
|
|||
*/
|
||||
const {width, height} = useWindowSize();
|
||||
const [placeholderText, setPlaceholderText]= useState(EXPLORE_COPY.MAP.SEARCH_PLACEHOLDER);
|
||||
const [tractSearch, setTractSearch] = useState<JsSearch | null>(null);
|
||||
|
||||
/**
|
||||
* Gets the tract search data and loads in the state.
|
||||
*/
|
||||
const getTractSearchData = async () => {
|
||||
const searchDataUrl = `${constants.TILE_BASE_URL}/${constants.MAP_TRACT_SEARCH_PATH}`;
|
||||
fetch(searchDataUrl)
|
||||
.then((response) => {
|
||||
if (response.ok) {
|
||||
return response.json();
|
||||
} else {
|
||||
throw new Error(`${response.statusText} error with status code of ${response.status}`);
|
||||
}
|
||||
})
|
||||
.then((data) => {
|
||||
// We use JsSearch to make it easy to load and quick to search.
|
||||
const search = new JsSearch.Search('GEOID10');
|
||||
search.indexStrategy = new JsSearch.ExactWordIndexStrategy();
|
||||
search.addIndex('GEOID10');
|
||||
search.addDocuments(data);
|
||||
setTractSearch(search);
|
||||
})
|
||||
.catch((error) =>
|
||||
console.error('Unable to read search tract table:', error));
|
||||
};
|
||||
|
||||
useEffect( () => {
|
||||
width > height ? setPlaceholderText(EXPLORE_COPY.MAP.SEARCH_PLACEHOLDER): setPlaceholderText(EXPLORE_COPY.MAP.SEARCH_PLACEHOLDER_MOBILE);
|
||||
}, [width]);
|
||||
|
||||
useEffect(()=>{
|
||||
getTractSearchData();
|
||||
}, []);
|
||||
|
||||
/**
|
||||
* Searchs for a given Census tract ID.
|
||||
* @param {string} tract the 11 digit tract ID as a string
|
||||
* @return {Array} an array of one search result, or null if no result found
|
||||
*/
|
||||
const searchForTract = (tract: string): [ISearchResult] | [] => {
|
||||
// We create a bounding box just to get the tract in the view box.
|
||||
// The size is not important.
|
||||
const BOUNDING_BOX_SIZE_DD = 0.2;
|
||||
if (tractSearch) {
|
||||
// Convert 10 digit tracts to 11.
|
||||
const searchTerm = tract.length == 10 ? '0' + tract : tract;
|
||||
const result = tractSearch.search(searchTerm);
|
||||
if (result.length > 0) {
|
||||
const lat = Number(result[0].INTPTLAT10);
|
||||
const lon = Number(result[0].INTPTLON10);
|
||||
return [{
|
||||
addresstype: 'tract',
|
||||
boundingbox: [
|
||||
(lat - (BOUNDING_BOX_SIZE_DD / 2)).toString(),
|
||||
(lat + (BOUNDING_BOX_SIZE_DD / 2)).toString(),
|
||||
(lon - (BOUNDING_BOX_SIZE_DD / 2)).toString(),
|
||||
(lon + (BOUNDING_BOX_SIZE_DD / 2)).toString(),
|
||||
],
|
||||
lat: result[0].INTPTLAT10,
|
||||
lon: result[0].INTPTLON10,
|
||||
type: 'tract',
|
||||
place_rank: 1,
|
||||
}];
|
||||
}
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
/*
|
||||
onSearchHandler will
|
||||
1. extract the search term from the input field
|
||||
2. fetch data from the API and return the results as JSON and results to US only
|
||||
3. if data is valid, destructure the boundingBox values from the search results
|
||||
4. pan the map to that location
|
||||
2. Determine if the search term is a Census Tract or not.
|
||||
3. If it is a Census Tract, it will search the tract table for a bounding box.
|
||||
4. If it is NOT a Census Tract, it will fetch data from the API and return the
|
||||
results as JSON and results to US only. If data is valid, destructure the
|
||||
boundingBox values from the search results.
|
||||
4. Pan the map to that location
|
||||
*/
|
||||
const onSearchHandler = async (event: React.FormEvent<HTMLFormElement>) => {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
|
||||
const searchTerm = (event.currentTarget.elements.namedItem('search') as HTMLInputElement).value;
|
||||
let searchResults = null;
|
||||
|
||||
const searchResults = await fetch(
|
||||
`https://nominatim.openstreetmap.org/search?q=${searchTerm}&format=json&countrycodes=us`,
|
||||
{
|
||||
mode: 'cors',
|
||||
})
|
||||
.then((response) => {
|
||||
if (!response.ok) {
|
||||
throw new Error('Network response was not OK');
|
||||
}
|
||||
return response.json();
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('There has been a problem with your fetch operation:', error);
|
||||
});
|
||||
|
||||
// If the search term a Census tract
|
||||
const isTract = /^\d{10,11}$/.test(searchTerm);
|
||||
if (isTract) {
|
||||
setIsSearchResultsNull(false);
|
||||
searchResults = searchForTract(searchTerm);
|
||||
} else {
|
||||
searchResults = await fetch(
|
||||
`https://nominatim.openstreetmap.org/search?q=${searchTerm}&format=json&countrycodes=us`,
|
||||
{
|
||||
mode: 'cors',
|
||||
})
|
||||
.then((response) => {
|
||||
if (!response.ok) {
|
||||
throw new Error('Network response was not OK');
|
||||
}
|
||||
return response.json();
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('There has been a problem with your fetch operation:', error);
|
||||
});
|
||||
console.log('Nominatum search results: ', searchResults);
|
||||
}
|
||||
|
||||
// If results are valid, set isSearchResultsNull to false and pan map to location:
|
||||
if (searchResults && searchResults.length > 0) {
|
||||
setIsSearchResultsNull(false);
|
||||
console.log('Nominatum search results: ', searchResults);
|
||||
|
||||
const [latMin, latMax, longMin, longMax] = searchResults[0].boundingbox;
|
||||
goToPlace([[Number(longMin), Number(latMin)], [Number(longMax), Number(latMax)]]);
|
||||
} else {
|
||||
|
|
|
@ -23,7 +23,7 @@ exports[`rendering of the MapSearch checks if component renders 1`] = `
|
|||
data-testid="textInput"
|
||||
id="search-field"
|
||||
name="search"
|
||||
placeholder="Search for an address, city, state or ZIP"
|
||||
placeholder="Search for an address, city, state, ZIP or Census Tract"
|
||||
type="search"
|
||||
/>
|
||||
<button
|
||||
|
|
|
@ -24,7 +24,7 @@ interface IMapTractLayers {
|
|||
export const featureURLForTilesetName = (tilesetName: string): string => {
|
||||
const flags = useFlags();
|
||||
|
||||
const pipelineStagingBaseURL = `https://justice40-data.s3.amazonaws.com/data-pipeline-staging`;
|
||||
const pipelineStagingBaseURL = process.env.GATSBY_CDN_TILES_BASE_URL +`/data-pipeline-staging`;
|
||||
const XYZ_SUFFIX = '{z}/{x}/{y}.pbf';
|
||||
|
||||
if ('stage_hash' in flags) {
|
||||
|
@ -38,13 +38,8 @@ export const featureURLForTilesetName = (tilesetName: string): string => {
|
|||
} else {
|
||||
// The feature tile base URL and path can either point locally or the CDN.
|
||||
// This is selected based on the DATA_SOURCE env variable.
|
||||
const featureTileBaseURL = process.env.DATA_SOURCE === 'local' ?
|
||||
process.env.GATSBY_LOCAL_TILES_BASE_URL :
|
||||
process.env.GATSBY_CDN_TILES_BASE_URL;
|
||||
|
||||
const featureTilePath = process.env.DATA_SOURCE === 'local' ?
|
||||
process.env.GATSBY_DATA_PIPELINE_SCORE_PATH_LOCAL :
|
||||
process.env.GATSBY_1_0_SCORE_PATH;
|
||||
const featureTileBaseURL = constants.TILE_BASE_URL;
|
||||
const featureTilePath = constants.TILE_PATH;
|
||||
|
||||
return [
|
||||
featureTileBaseURL,
|
||||
|
|
|
@ -9,6 +9,7 @@ interface IPrioritizationCopy {
|
|||
totalBurdensPrioritized: number
|
||||
isAdjacencyThreshMet: boolean,
|
||||
isAdjacencyLowIncome: boolean,
|
||||
isIslandLowIncome: boolean,
|
||||
tribalCountAK: number | null,
|
||||
tribalCountUS: null, // when this signal is supported add number type
|
||||
percentTractTribal: number | null
|
||||
|
@ -26,6 +27,7 @@ interface IPrioritizationCopy {
|
|||
* @param {number} totalBurdensPrioritized
|
||||
* @param {boolean} isAdjacencyThreshMet
|
||||
* @param {boolean} isAdjacencyLowIncome
|
||||
* @param {boolean} isIslandLowIncome
|
||||
* @param {number | null} tribalCountAK
|
||||
* @param {number | null} tribalCountUS
|
||||
* @param {number | null} percentTractTribal
|
||||
|
@ -36,6 +38,7 @@ const PrioritizationCopy =
|
|||
totalBurdensPrioritized,
|
||||
isAdjacencyThreshMet,
|
||||
isAdjacencyLowIncome,
|
||||
isIslandLowIncome,
|
||||
tribalCountAK,
|
||||
tribalCountUS,
|
||||
percentTractTribal,
|
||||
|
@ -48,6 +51,9 @@ const PrioritizationCopy =
|
|||
if (isAdjacencyThreshMet && isAdjacencyLowIncome) {
|
||||
prioCopyRendered = EXPLORE_COPY.PRIORITIZATION_COPY.PRIO_SURR_LI;
|
||||
// if 1-2
|
||||
} else if (isIslandLowIncome) {
|
||||
prioCopyRendered = EXPLORE_COPY.PRIORITIZATION_COPY.PRIO_ISLAND_LI;
|
||||
// if 1-3
|
||||
} else if (isAdjacencyThreshMet && !isAdjacencyLowIncome) {
|
||||
// if 1-2-1
|
||||
if ( tribalCountAK === null && tribalCountUS === null) {
|
||||
|
|
|
@ -77,195 +77,195 @@ const ReleaseUpdate = ({}: IReleaseUpdateProps) => {
|
|||
<div>
|
||||
|
||||
<div className={styles.releaseHeader}>
|
||||
{DOWNLOAD_COPY.RELEASE_1_0.UPDATE_1}
|
||||
{DOWNLOAD_COPY.RELEASE_2_0.UPDATE_1}
|
||||
</div>
|
||||
|
||||
<div className={styles.releaseSectionTitle}>
|
||||
{DOWNLOAD_COPY.RELEASE_1_0.SECTION1}
|
||||
{DOWNLOAD_COPY.RELEASE_2_0.SECTION1}
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2}</li>
|
||||
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_1}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_1_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_1_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_1_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_1_2}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_1_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_1_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_2_1}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_3}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_3_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_3_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_3_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_3_2}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_4}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_4_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_4_1}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_5}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_5}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_5_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B2_5_1}</li>
|
||||
</ul>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B4}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B4_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B4_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B4_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B4_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B4_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B4_3}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B5}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B5}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B5_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B5_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B5_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B5_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B5_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B5_3}</li>
|
||||
</ul>
|
||||
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_1}</li>
|
||||
{/* <li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_1}</li>
|
||||
{/* <li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_1}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_1_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_1_1}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_4}</li>
|
||||
</ul>
|
||||
</ul> */}
|
||||
</ul>
|
||||
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_2_1}</li>
|
||||
{/* <li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_2_1}</li>
|
||||
{/* <li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_1}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_1_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_1_1}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_4}</li>
|
||||
</ul>
|
||||
</ul> */}
|
||||
</ul>
|
||||
|
||||
{/* <li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_3}</li>
|
||||
{/* <li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_3}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_1}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_1_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_1_1}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_4}</li>
|
||||
</ul>
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_4}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_1}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_1_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_1_1}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_4}</li>
|
||||
</ul>
|
||||
</ul>
|
||||
</ul> */}
|
||||
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_5}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_5}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_5_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_5_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_5_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_5_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_5_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_5_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_5_2_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_5_2_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_5_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_5_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_5_2_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_5_2_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2_2}</li>
|
||||
</ul>
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
{/* <li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B7}</li>
|
||||
{/* <li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B7}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_1}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B7_1_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B7_1_1}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B6_1_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B6_1_2_2}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B7_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B7_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B7_2_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B7_2_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B7_2_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B7_2_3}</li>
|
||||
</ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B7_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B7_3}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B7_3_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B7_3_1}</li>
|
||||
</ul>
|
||||
</ul> */}
|
||||
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B8}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B8}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B8_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B8_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B8_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B8_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B8_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B8_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B8_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B8_4}</li>
|
||||
</ul>
|
||||
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B9}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B9}</li>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B9_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B9_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B9_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B9_4}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B9_1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B9_2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B9_3}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B9_4}</li>
|
||||
</ul>
|
||||
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B10}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION1_B10}</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div className={styles.releaseSectionTitle}>
|
||||
{DOWNLOAD_COPY.RELEASE_1_0.SECTION2}
|
||||
{DOWNLOAD_COPY.RELEASE_2_0.SECTION2}
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<ul>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION2_P1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION2_P2}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION2_P1}</li>
|
||||
<li>{DOWNLOAD_COPY.RELEASE_2_0.SECTION2_P2}</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
{DOWNLOAD_COPY.RELEASE_1_0.FOOTER}
|
||||
{DOWNLOAD_COPY.RELEASE_2_0.FOOTER}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
import { LngLatBoundsLike } from "maplibre-gl";
|
||||
import { isMobile as isMobileReactDeviceDetect } from "react-device-detect";
|
||||
/* eslint quotes: [2, "double"] */
|
||||
|
||||
import {LngLatBoundsLike} from "maplibre-gl";
|
||||
import {isMobile as isMobileReactDeviceDetect} from "react-device-detect";
|
||||
|
||||
export const isMobile = isMobileReactDeviceDetect;
|
||||
|
||||
|
@ -77,6 +79,13 @@ export const SIDE_PANEL_STATE_VALUES = {
|
|||
ISLAND_AREAS: "Island Areas",
|
||||
};
|
||||
|
||||
/**
|
||||
* Note that the FIPS code is a string
|
||||
* The FIPS codes listed are:
|
||||
* 60: American Samoa, 66: Guam, 69: N. Mariana Islands, 78: US Virgin Islands
|
||||
*/
|
||||
export const TILES_ISLAND_AREA_FIPS_CODES = ["60", "66", "69", "78"];
|
||||
|
||||
// Climate category
|
||||
export const IS_CLIMATE_FACTOR_DISADVANTAGED = "N_CLT";
|
||||
export const IS_CLIMATE_EXCEED_ONE_OR_MORE_INDICATORS = "N_CLT_EOMI";
|
||||
|
@ -221,7 +230,7 @@ export const IS_EXCEEDS_THRESH_FOR_ISLAND_AREA_BELOW_100_POVERTY = "IA_POV_ET";
|
|||
|
||||
export const IS_WORKFORCE_EXCEED_BOTH_SOCIO_INDICATORS = "N_WKFC_EBSI";
|
||||
|
||||
export const HIGH_SCHOOL_PROPERTY_PERCENTILE = `HSEF`;
|
||||
export const HIGH_SCHOOL_PROPERTY_PERCENTILE = "HSEF";
|
||||
export const IS_LOW_HS_EDUCATION_LOW_HIGHER_ED_PRIORITIZED = "LHE";
|
||||
export const ISLAND_AREAS_HS_EDU_PERCENTAGE_FIELD = "IAHSEF";
|
||||
export const ISLAND_AREA_LOW_HS_EDU = "IALHE";
|
||||
|
@ -371,3 +380,13 @@ export const CENSUS_TRACT_SURVEY_LINKS = {
|
|||
EN: "https://eop.gov1.qualtrics.com/jfe/form/SV_8J5wGa8Ya4dMP9c",
|
||||
ES: "https://eop.gov1.qualtrics.com/jfe/form/SV_eJXos5X4yekq6cC",
|
||||
};
|
||||
|
||||
export const TILE_BASE_URL = process.env.DATA_SOURCE === "local" ?
|
||||
process.env.GATSBY_LOCAL_TILES_BASE_URL :
|
||||
process.env.GATSBY_CDN_TILES_BASE_URL;
|
||||
|
||||
export const TILE_PATH = process.env.DATA_SOURCE === "local" ?
|
||||
process.env.GATSBY_DATA_PIPELINE_SCORE_PATH_LOCAL :
|
||||
process.env.GATSBY_1_0_SCORE_PATH;
|
||||
|
||||
export const MAP_TRACT_SEARCH_PATH = "data_pipeline/data/score/search/tracts.json";
|
||||
|
|
|
@ -12,16 +12,16 @@ export const FAQS_LINK = 'https://www.whitehouse.gov/wp-content/uploads/2022/02/
|
|||
export const FED_RECOGNIZED_INDIAN_ENTITIES = `https://www.federalregister.gov/documents/2022/01/28/2022-01789/indian-entities-recognized-by-and-eligible-to-receive-services-from-the-united-states-bureau-of`;
|
||||
export const EJSCREEN = 'https://www.epa.gov/ejscreen/how-does-epa-use-ejscreen';
|
||||
|
||||
export const CEJST_INSTRUCT = `https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/CEQ-CEJST-Instructions.pdf`;
|
||||
export const CEJST_INSTRUCT_ES = `https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/CEQ-CEJST-Instructions-es.pdf`;
|
||||
export const CEJST_INSTRUCT = process.env.GATSBY_CDN_TILES_BASE_URL +`/data-versions/2.0/data/score/downloadable/CEQ-CEJST-Instructions.pdf`;
|
||||
export const CEJST_INSTRUCT_ES = process.env.GATSBY_CDN_TILES_BASE_URL +`/data-versions/2.0/data/score/downloadable/CEQ-CEJST-Instructions-es.pdf`;
|
||||
|
||||
export const CEJST_MEMO = `https://www.whitehouse.gov/wp-content/uploads/2023/01/M-23-09_Signed_CEQ_CPO.pdf`;
|
||||
export const CEJST_MEMO_ES = `https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/M-23-09_Signed_CEQ_CPO_es.pdf`;
|
||||
export const CEJST_MEMO_ES = process.env.GATSBY_CDN_TILES_BASE_URL +`/data-versions/2.0/data/score/downloadable/M-23-09_Signed_CEQ_CPO_es.pdf`;
|
||||
|
||||
export const USE_MAP_TUTORIAL_LINK = `https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/Using-the-CEJST-Tutorial.pdf`;
|
||||
export const USE_MAP_TUTORIAL_LINK_ES = `https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/Using-the-CEJST-Tutorial-es.pdf`;
|
||||
export const USE_DATA_TUTORIAL_LINK = `https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/Using-the-CEJST-Spreadsheet-Tutorial.pdf`;
|
||||
export const USE_DATA_TUTORIAL_LINK_ES = `https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/Using-the-CEJST-Spreadsheet-Tutorial-es.pdf`;
|
||||
export const USE_MAP_TUTORIAL_LINK = process.env.GATSBY_CDN_TILES_BASE_URL +`/data-versions/2.0/data/score/downloadable/Using-the-CEJST-Tutorial.pdf`;
|
||||
export const USE_MAP_TUTORIAL_LINK_ES = process.env.GATSBY_CDN_TILES_BASE_URL +`/data-versions/2.0/data/score/downloadable/Using-the-CEJST-Tutorial-es.pdf`;
|
||||
export const USE_DATA_TUTORIAL_LINK = process.env.GATSBY_CDN_TILES_BASE_URL +`/data-versions/2.0/data/score/downloadable/Using-the-CEJST-Spreadsheet-Tutorial.pdf`;
|
||||
export const USE_DATA_TUTORIAL_LINK_ES = process.env.GATSBY_CDN_TILES_BASE_URL +`/data-versions/2.0/data/score/downloadable/Using-the-CEJST-Spreadsheet-Tutorial-es.pdf`;
|
||||
|
||||
export const GITHUB_LINK = 'https://github.com/usds/justice40-tool';
|
||||
export const GITHUB_LINK_ES = 'https://github.com/usds/justice40-tool/blob/main/README-es.md';
|
||||
|
|
|
@ -28,7 +28,7 @@ export const linkFn = (to:string | IDefineMessage, isInternal:boolean, isOpenNew
|
|||
|
||||
export const FEEDBACK_EMAIL = 'Screeningtool-Support@omb.eop.gov';
|
||||
|
||||
export const METH_1_0_RELEASE_DATE = new Date(2022, 10, 22, 11, 59, 59); // Nov 22 2022
|
||||
export const METH_2_0_RELEASE_DATE = new Date(2022, 10, 22, 11, 59, 59); // Nov 22 2022
|
||||
export const METH_BETA_RELEASE_DATE = new Date(2022, 1, 18, 11, 59, 59); // Feb 18 2022
|
||||
|
||||
|
||||
|
@ -40,7 +40,7 @@ export const BETA_BANNER_CONTENT = <FormattedMessage
|
|||
values={{
|
||||
bold1: boldFn,
|
||||
relDate: <FormattedDate
|
||||
value={METH_1_0_RELEASE_DATE}
|
||||
value={METH_2_0_RELEASE_DATE}
|
||||
year="numeric"
|
||||
month="short"
|
||||
day="numeric"
|
||||
|
@ -75,7 +75,7 @@ export const ALERTS = {
|
|||
values={{
|
||||
link1: linkFn('https://www.whitehouse.gov/ceq/news-updates/2022/11/22/biden-harris-administration-launches-version-1-0-of-climate-and-economic-justice-screening-tool-key-step-in-implementing-president-bidens-justice40-initiative/', false, true),
|
||||
ver1RelDate: <FormattedDate
|
||||
value={METH_1_0_RELEASE_DATE}
|
||||
value={METH_2_0_RELEASE_DATE}
|
||||
year="numeric"
|
||||
month="numeric"
|
||||
day="numeric"
|
||||
|
|
|
@ -31,7 +31,7 @@ export const PAGE_INTRO = defineMessages({
|
|||
export const getDownloadFileUrl = (filePath:string | undefined, isBeta: boolean) => {
|
||||
return [
|
||||
process.env.GATSBY_CDN_TILES_BASE_URL,
|
||||
(isBeta ? process.env.GATSBY_BETA_SCORE_PATH : process.env.GATSBY_1_0_SCORE_PATH),
|
||||
(isBeta ? process.env.GATSBY_BETA_SCORE_PATH : process.env.GATSBY_2_0_SCORE_PATH),
|
||||
filePath,
|
||||
].join('/');
|
||||
};
|
||||
|
@ -41,58 +41,58 @@ export const DOWNLOAD_FILES = {
|
|||
NARWAL: {
|
||||
COMMUNITIES_LIST_XLS: {
|
||||
SIZE: 35.6, // MB
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_XLS, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_XLS, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
COMMUNITIES_LIST_CSV: {
|
||||
SIZE: 42, // MB
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_CSV, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_CSV, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
SHAPE_FILE: {
|
||||
SIZE: 356.8, // MB
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_SHAPE_FILE_ZIP, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_SHAPE_FILE_ZIP, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
TSD: {
|
||||
SIZE: 4.4, // MB
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_TSD_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_TSD_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
TSD_ES: {
|
||||
SIZE: 4.8, // MB
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_TSD_ES_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
HOW_TO_COMMUNITIES: {
|
||||
SIZE: 687.9, // KB
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
HOW_TO_COMMUNITIES_ES: {
|
||||
SIZE: 761.8, // KB
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_ES_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_COMMUNITIES_LIST_ES_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
INSTRUCTIONS: {
|
||||
SIZE: 228.4, // KB // Todo: Update when actual file is uploaded
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_INSTRUCT_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_INSTRUCT_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
INSTRUCTIONS_ES: {
|
||||
SIZE: 173.6, // KB // Todo: Update when actual file is uploaded
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_INSTRUCT_ES_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_INSTRUCT_ES_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
COMP_CHART: {
|
||||
SIZE: 33.1, // KB
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_TOOL_COMP_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_TOOL_COMP_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
M_23_09_ES: {
|
||||
SIZE: 120.5, // KB
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_M_23_09_SIGNED_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
|
||||
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_2_0_M_23_09_SIGNED_PDF, false),
|
||||
LAST_UPDATED: COMMON_COPY.METH_2_0_RELEASE_DATE,
|
||||
},
|
||||
},
|
||||
BETA: {
|
||||
|
@ -138,7 +138,7 @@ export const getDownloadIconAltTag = () => defineMessages({
|
|||
},
|
||||
});
|
||||
|
||||
export const RELEASE_1_0 = {
|
||||
export const RELEASE_2_0 = {
|
||||
UPDATE_1: <FormattedMessage
|
||||
id={'download.page.release.update.title.1'}
|
||||
defaultMessage={`Version {release} Release update - {date}`}
|
||||
|
@ -146,7 +146,7 @@ export const RELEASE_1_0 = {
|
|||
values={{
|
||||
release: VERSION_NUMBER,
|
||||
date: <FormattedDate
|
||||
value={COMMON_COPY.METH_1_0_RELEASE_DATE}
|
||||
value={COMMON_COPY.METH_2_0_RELEASE_DATE}
|
||||
year="numeric"
|
||||
month="short"
|
||||
day="numeric"
|
||||
|
|
|
@ -92,7 +92,7 @@ export const MAP = defineMessages({
|
|||
},
|
||||
SEARCH_PLACEHOLDER: {
|
||||
id: 'explore.map.page.map.search.placeholder.text',
|
||||
defaultMessage: 'Search for an address, city, state or ZIP',
|
||||
defaultMessage: 'Search for an address, city, state, ZIP or Census Tract',
|
||||
description: 'On the explore the map page, on the map, the placeholder text for search',
|
||||
},
|
||||
SEARCH_PLACEHOLDER_MOBILE: {
|
||||
|
@ -644,6 +644,14 @@ export const PRIORITIZATION_COPY = {
|
|||
bold: boldFn,
|
||||
}}
|
||||
/>,
|
||||
PRIO_ISLAND_LI: <FormattedMessage
|
||||
id={'explore.map.page.side.panel.prio.copy.prio.island.li'}
|
||||
defaultMessage={'This tract is considered disadvantaged because it meets the low income threshold <bold>AND</bold> is located in a U.S. Territory.'}
|
||||
description={`Navigate to the explore the map page. Click on tract, The side panel will show This tract is considered disadvantaged. It is an island territory that meets an adjusted low income threshold.`}
|
||||
values={{
|
||||
bold: boldFn,
|
||||
}}
|
||||
/>,
|
||||
PRIO_SURR_LI: <FormattedMessage
|
||||
id={'explore.map.page.side.panel.prio.copy.prio.donut'}
|
||||
defaultMessage={'This tract is considered disadvantaged. It is completely surrounded by tracts that are disadvantaged <bold>AND</bold> meets an adjusted low income threshold. The adjustment does not apply to any of the categories.'}
|
||||
|
@ -1064,7 +1072,7 @@ export const SIDE_PANEL_INDICATORS = defineMessages({
|
|||
POVERTY: {
|
||||
id: 'explore.map.page.side.panel.indicator.poverty',
|
||||
defaultMessage: 'Poverty',
|
||||
description: `Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show Unemployment`,
|
||||
description: `Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show Poverty`,
|
||||
},
|
||||
UNEMPLOY: {
|
||||
id: 'explore.map.page.side.panel.indicator.unemploy',
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import React from 'react';
|
||||
import {FormattedDate, FormattedMessage, defineMessages} from 'gatsby-plugin-intl';
|
||||
import {METH_1_0_RELEASE_DATE, METH_BETA_RELEASE_DATE} from './common';
|
||||
import {METH_2_0_RELEASE_DATE, METH_BETA_RELEASE_DATE} from './common';
|
||||
|
||||
export const PAGE = defineMessages({
|
||||
TITLE: {
|
||||
|
@ -29,7 +29,7 @@ export const CARD = {
|
|||
day="numeric"
|
||||
/>,
|
||||
releaseDate: <FormattedDate
|
||||
value={METH_1_0_RELEASE_DATE}
|
||||
value={METH_2_0_RELEASE_DATE}
|
||||
year="numeric"
|
||||
month="short"
|
||||
day="numeric"
|
||||
|
|
|
@ -688,7 +688,7 @@
|
|||
"description": "On the explore the map page, on the map, the placeholder text for search"
|
||||
},
|
||||
"explore.map.page.map.search.placeholder.text": {
|
||||
"defaultMessage": "Search for an address, city, state or ZIP",
|
||||
"defaultMessage": "Search for an address, city, state, ZIP or Census Tract",
|
||||
"description": "On the explore the map page, on the map, the placeholder text for search"
|
||||
},
|
||||
"explore.map.page.map.search.results.empty.text": {
|
||||
|
@ -741,7 +741,7 @@
|
|||
},
|
||||
"explore.map.page.map.territoryFocus.lower48.short": {
|
||||
"defaultMessage": "48",
|
||||
"description": "On the explore the map page, on the map, the abbreviated name indicating the bounds of \n the Lower 48 states\n "
|
||||
"description": "On the explore the map page, on the map, the abbreviated name indicating the bounds of\n the Lower 48 states\n "
|
||||
},
|
||||
"explore.map.page.map.territoryFocus.puerto_rico.long": {
|
||||
"defaultMessage": "Puerto Rico",
|
||||
|
@ -933,11 +933,11 @@
|
|||
},
|
||||
"explore.map.page.side.panel.indicator.description.exp.bld.loss": {
|
||||
"defaultMessage": "Economic loss to building value resulting from natural hazards each year",
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side \n panel will show an indicator description of Economic loss rate to buildings resulting from natural hazards"
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side\n panel will show an indicator description of Economic loss rate to buildings resulting from natural hazards"
|
||||
},
|
||||
"explore.map.page.side.panel.indicator.description.exp.pop.loss": {
|
||||
"defaultMessage": "Fatalities and injuries resulting from natural hazards each year",
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show an indicator description of Economic loss rate to the population in fatalities and \n injuries resulting from natural hazards"
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show an indicator description of Economic loss rate to the population in fatalities and\n injuries resulting from natural hazards"
|
||||
},
|
||||
"explore.map.page.side.panel.indicator.description.flooding": {
|
||||
"defaultMessage": "Projected risk to properties from projected floods, from tides, rain, riverine and storm surges within 30 years",
|
||||
|
@ -953,7 +953,7 @@
|
|||
},
|
||||
"explore.map.page.side.panel.indicator.description.high.ed": {
|
||||
"defaultMessage": "Percent of the census tract's population 15 or older not enrolled in college, university, or graduate school",
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show an indicator description of Percent of the census tract's population 15 or older not \n enrolled in college, university, or graduate school"
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show an indicator description of Percent of the census tract's population 15 or older not\n enrolled in college, university, or graduate school"
|
||||
},
|
||||
"explore.map.page.side.panel.indicator.description.high.school": {
|
||||
"defaultMessage": "Percent of people ages 25 years or older whose high school education is less than a high school diploma",
|
||||
|
@ -977,7 +977,7 @@
|
|||
},
|
||||
"explore.map.page.side.panel.indicator.description.leadPaint": {
|
||||
"defaultMessage": "Share of homes that are likely to have lead paint",
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show an indicator description of Share of homes that are likely to have lead paint \n "
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show an indicator description of Share of homes that are likely to have lead paint\n "
|
||||
},
|
||||
"explore.map.page.side.panel.indicator.description.leaky.tanks": {
|
||||
"defaultMessage": "Formula of the density of leaking underground storage tanks and number of all active underground storage tanks within 1500 feet of the census tract boundaries",
|
||||
|
@ -1025,7 +1025,7 @@
|
|||
},
|
||||
"explore.map.page.side.panel.indicator.description.unemploy": {
|
||||
"defaultMessage": "Number of unemployed people as a part of the labor force",
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side \n panel will show an indicator description of Number of unemployed people as a part of the labor force"
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side\n panel will show an indicator description of Number of unemployed people as a part of the labor force"
|
||||
},
|
||||
"explore.map.page.side.panel.indicator.description.wasteWater": {
|
||||
"defaultMessage": "Modeled toxic concentrations at parts of streams within 500 meters",
|
||||
|
@ -1145,7 +1145,7 @@
|
|||
},
|
||||
"explore.map.page.side.panel.indicator.poverty": {
|
||||
"defaultMessage": "Poverty",
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show Unemployment"
|
||||
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show Poverty"
|
||||
},
|
||||
"explore.map.page.side.panel.indicator.prox.haz": {
|
||||
"defaultMessage": "Proximity to hazardous waste facilities",
|
||||
|
@ -1427,6 +1427,10 @@
|
|||
"defaultMessage": "The {numPoints} that are Federally Recognized Tribes in this tract are are {also} considered disadvantaged.",
|
||||
"description": "Navigate to the explore the map page. Click on tract, The {numPoints} that are Federally Recognized Tribes in this tract ares are {also} considered disadvantaged."
|
||||
},
|
||||
"explore.map.page.side.panel.prio.copy.prio.island.li": {
|
||||
"defaultMessage": "This tract is considered disadvantaged because it meets the low income threshold <bold>AND</bold> is located in a U.S. Territory.",
|
||||
"description": "Navigate to the explore the map page. Click on tract, The side panel will show This tract is considered disadvantaged. It is an island territory that meets an adjusted low income threshold."
|
||||
},
|
||||
"explore.map.page.side.panel.prio.copy.prio.n.burden": {
|
||||
"defaultMessage": "This tract is considered disadvantaged because it meets {burdens} burden threshold <bold>AND</bold> the associated socioeconomic threshold.",
|
||||
"description": "Navigate to the explore the map page. Click on tract, This tract is considered disadvantaged because it meets {burdens} burden thresholds <bold>AND</bold> the associated socioeconomic threshold."
|
||||
|
@ -1453,7 +1457,7 @@
|
|||
},
|
||||
"explore.map.page.side.panel.tribalInfo.landAreaName": {
|
||||
"defaultMessage": "Land Area Name:",
|
||||
"description": "Navigate to the explore the map page. Click on Tribal Lands, when the map is in view, \n click on the map. The side panel will show the land area name of the feature selected"
|
||||
"description": "Navigate to the explore the map page. Click on Tribal Lands, when the map is in view,\n click on the map. The side panel will show the land area name of the feature selected"
|
||||
},
|
||||
"explore.map.page.side.panel.version.title": {
|
||||
"defaultMessage": "Methodology version {version}",
|
||||
|
@ -1465,7 +1469,7 @@
|
|||
},
|
||||
"explore.map.page.under.map.download.draft.ptag.1": {
|
||||
"defaultMessage": "<link1>Download the data sources</link1> used in the CEJST (.csv, .xlxs, .pdf that describes how to use the list, and a codebook, {downloadFileSize} unzipped). Last updated: {dateUpdated}.",
|
||||
"description": "\n Navigate to the explore the map page. Under the map, you will see a link that is placed below the \n map that will download the data packet\n "
|
||||
"description": "\n Navigate to the explore the map page. Under the map, you will see a link that is placed below the\n map that will download the data packet\n "
|
||||
},
|
||||
"explore.map.page.under.map.note.on.territories.intro": {
|
||||
"defaultMessage": "U.S. territories note",
|
||||
|
|
|
@ -26,10 +26,10 @@
|
|||
"about.page.title.text": "Información básica",
|
||||
"about.page.use.data.heading": "Cómo utilizar los datos ",
|
||||
"about.page.use.data.paragraph": "Los datos de la herramienta están disponibles para <link1>descargar</link1>. Estos datos se pueden usar para filtrar por estado o condado.",
|
||||
"about.page.use.data.tutorial":"Descarga el tutorial de la hoja de cálculo (.pdf 5,4 MB)",
|
||||
"about.page.use.data.tutorial": "Descarga el tutorial de la hoja de cálculo (.pdf 5,4 MB)",
|
||||
"about.page.use.map.heading": "Utilización del mapa",
|
||||
"about.page.use.map.para": "Amplíe y seleccione cualquier distrito censal para ver si se considera como desfavorecido.",
|
||||
"about.page.use.map.tutorial":"Descarga el tutorial CEJST (.pdf 9.6 MB)",
|
||||
"about.page.use.map.tutorial": "Descarga el tutorial CEJST (.pdf 9.6 MB)",
|
||||
"common.pages.alerts.additional_docs_available.description": "Descargue un nuevo <link1>documento de apoyo técnico</link1> y otra documentación, y <link2>envíe sus comentarios</link2>.",
|
||||
"common.pages.alerts.banner.beta.content": "<bold1>Herramienta actualizada.</bold1> La versión 1.0 de la herramienta fue publicada el {relDate}.",
|
||||
"common.pages.alerts.census.tract.title": "Ya hay más documentación disponible",
|
||||
|
@ -172,7 +172,7 @@
|
|||
"explore.map.page.map.layer.selector.tribal.long": "Tierras tribales",
|
||||
"explore.map.page.map.layer.selector.tribal.short": "Tribal",
|
||||
"explore.map.page.map.search.placeholder.mobile.text": "Búsqueda de ubicaciones ",
|
||||
"explore.map.page.map.search.placeholder.text": "Busque una dirección, ciudad, estado o código postal.",
|
||||
"explore.map.page.map.search.placeholder.text": "Busque una dirección, ciudad, estado, código postal o distrito censal.",
|
||||
"explore.map.page.map.search.results.empty.text": "No se encontró la ubicación o ubicación desconocida. Intente una búsqueda distinta.",
|
||||
"explore.map.page.map.territoryFocus.alaska.long": "Alaska",
|
||||
"explore.map.page.map.territoryFocus.alaska.short": "AK",
|
||||
|
@ -348,6 +348,7 @@
|
|||
"explore.map.page.side.panel.num.categories.exceeded": "en {numberOfDisCategories, plural, one {# categoría} otras {# categorías}}",
|
||||
"explore.map.page.side.panel.partial.community.of.focus": "PARCIALMENTE",
|
||||
"explore.map.page.side.panel.prio.copy.not.prio": "Este distrito censal no se considera desfavorecido. No cumple con ninguno de los umbrales <bold>O</bold> con al menos un umbral socioeconómico relacionado.",
|
||||
"explore.map.page.side.panel.prio.copy.prio.island.li": "Este distrito censal se considera desfavorecido porque cumple con el umbral de bajos ingresos <bold>Y</bold> está ubicado en un territorio de los EE. UU.",
|
||||
"explore.map.page.side.panel.prio.copy.not.prio.n.burden": "Este distrito censal no se considera desfavorecido. Cumple con más de 1 umbral de carga <bold>PERO</bold> con ningún umbral socioeconómico asociado.",
|
||||
"explore.map.page.side.panel.prio.copy.not.prio.one.burden": "Este distrito censal no se considera desfavorecido. Cumple con 1 umbral de carga <bold>PERO</bold> con ningún umbral socioeconómico asociado.",
|
||||
"explore.map.page.side.panel.prio.copy.not.prio.surr.li": "Este distrito censal no se considera desfavorecido. Está rodeado de distritos censales desfavorecidos <bold>PERO</bold> no cumple con el umbral ajustado de bajos ingresos. El ajuste no corresponde a ninguna de las categorías.",
|
||||
|
|
|
@ -490,7 +490,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
|
|||
<a
|
||||
class="usa-link usa-link--external"
|
||||
data-cy=""
|
||||
href="https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/CEQ-CEJST-Instructions.pdf"
|
||||
href="undefined/data-versions/2.0/data/score/downloadable/CEQ-CEJST-Instructions.pdf"
|
||||
rel="noreferrer"
|
||||
target="_blank"
|
||||
>
|
||||
|
@ -614,7 +614,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
|
|||
<a
|
||||
class="usa-link usa-link--external"
|
||||
data-cy=""
|
||||
href="https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/Using-the-CEJST-Tutorial.pdf"
|
||||
href="undefined/data-versions/2.0/data/score/downloadable/Using-the-CEJST-Tutorial.pdf"
|
||||
rel="noreferrer"
|
||||
target="_blank"
|
||||
>
|
||||
|
@ -681,7 +681,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
|
|||
<a
|
||||
class="usa-link usa-link--external"
|
||||
data-cy=""
|
||||
href="https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/Using-the-CEJST-Spreadsheet-Tutorial.pdf"
|
||||
href="undefined/data-versions/2.0/data/score/downloadable/Using-the-CEJST-Spreadsheet-Tutorial.pdf"
|
||||
rel="noreferrer"
|
||||
target="_blank"
|
||||
>
|
||||
|
|
|
@ -490,7 +490,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
|
|||
<a
|
||||
class="usa-link usa-link--external"
|
||||
data-cy=""
|
||||
href="https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/CEQ-CEJST-Instructions.pdf"
|
||||
href="undefined/data-versions/2.0/data/score/downloadable/CEQ-CEJST-Instructions.pdf"
|
||||
rel="noreferrer"
|
||||
target="_blank"
|
||||
>
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
[flake8]
|
||||
# E266, to many leading '#' for block comment
|
||||
# W503, line break before binary operator
|
||||
# TODO: Uncomment and fix the issues here that we care about, see
|
||||
# https://github.com/usds/justice40-tool/issues/1123
|
||||
ignore =
|
||||
E266, # to many leading '#' for block comment
|
||||
W503, # line break before binary operator
|
||||
# TODO: Uncomment and fix the issues here that we care about, see
|
||||
# https://github.com/usds/justice40-tool/issues/1123
|
||||
E266,
|
||||
W503,
|
||||
PD002,
|
||||
PD003,
|
||||
PD004,
|
||||
|
|
|
@ -33,6 +33,31 @@ dataset_cli_help = "Grab the data from either 'local' for local access or 'aws'
|
|||
|
||||
LOG_LINE_WIDTH = 60
|
||||
|
||||
use_cache_option = click.option(
|
||||
"-u",
|
||||
"--use-cache",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Check if data source has been downloaded already, and if it has, use the cached version of the data source.",
|
||||
)
|
||||
|
||||
dataset_option = click.option(
|
||||
"-d",
|
||||
"--dataset",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
|
||||
data_source_option = click.option(
|
||||
"-s",
|
||||
"--data-source",
|
||||
default="local",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli():
|
||||
|
@ -51,7 +76,6 @@ def census_cleanup():
|
|||
census_reset(data_path)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(help="Clean up all data folders")
|
||||
|
@ -70,7 +94,6 @@ def data_cleanup():
|
|||
geo_score_folder_cleanup()
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
|
@ -82,13 +105,7 @@ def data_cleanup():
|
|||
is_flag=True,
|
||||
help="Upload to AWS S3 a zipped archive of the census data.",
|
||||
)
|
||||
@click.option(
|
||||
"-u",
|
||||
"--use-cache",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Check if data source has been downloaded already, and if it has, use the cached version of the data source.",
|
||||
)
|
||||
@use_cache_option
|
||||
def census_data_download(zip_compress, use_cache):
|
||||
"""CLI command to download all census shape files from the Census FTP and extract the geojson
|
||||
to generate national and by state Census Block Group CSVs"""
|
||||
|
@ -105,18 +122,10 @@ def census_data_download(zip_compress, use_cache):
|
|||
zip_census_data()
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(help="Retrieve census data from source")
|
||||
@click.option(
|
||||
"-s",
|
||||
"--data-source",
|
||||
default="local",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
@data_source_option
|
||||
def pull_census_data(data_source: str):
|
||||
|
||||
log_title("Pull Census Data")
|
||||
|
@ -126,26 +135,13 @@ def pull_census_data(data_source: str):
|
|||
check_census_data_source(data_path, data_source)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Run all ETL processes or a specific one",
|
||||
)
|
||||
@click.option(
|
||||
"-d",
|
||||
"--dataset",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
@click.option(
|
||||
"-u",
|
||||
"--use-cache",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Check if data source has been downloaded already, and if it has, use the cached version of the data source.",
|
||||
)
|
||||
@dataset_option
|
||||
@use_cache_option
|
||||
def etl_run(dataset: str, use_cache: bool):
|
||||
"""Run a specific or all ETL processes
|
||||
|
||||
|
@ -161,7 +157,6 @@ def etl_run(dataset: str, use_cache: bool):
|
|||
etl_runner(dataset, use_cache)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
|
@ -178,19 +173,12 @@ def score_run():
|
|||
score_generate()
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Run ETL + Score Generation",
|
||||
)
|
||||
@click.option(
|
||||
"-u",
|
||||
"--use-cache",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Check if data source has been downloaded already, and if it has, use the cached version of the data source.",
|
||||
)
|
||||
@use_cache_option
|
||||
def score_full_run(use_cache: bool):
|
||||
"""CLI command to run ETL and generate the score in one command"""
|
||||
log_title("Score Full Run", "Run ETL and Generate Score (no tiles)")
|
||||
|
@ -207,20 +195,12 @@ def score_full_run(use_cache: bool):
|
|||
score_generate()
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Run etl_score_post to create score csv, tile csv, and downloadable zip"
|
||||
)
|
||||
@click.option(
|
||||
"-s",
|
||||
"--data-source",
|
||||
default="local",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
@data_source_option
|
||||
def generate_score_post(data_source: str):
|
||||
"""CLI command to generate score, tile, and downloadable files
|
||||
|
||||
|
@ -244,18 +224,10 @@ def generate_score_post(data_source: str):
|
|||
score_post(data_source)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(help="Generate GeoJSON files with scores baked in")
|
||||
@click.option(
|
||||
"-s",
|
||||
"--data-source",
|
||||
default="local",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
@data_source_option
|
||||
def geo_score(data_source: str):
|
||||
"""CLI command to combine score with GeoJSON data and generate low and high files
|
||||
|
||||
|
@ -280,7 +252,6 @@ def geo_score(data_source: str):
|
|||
score_geo(data_source=data_source)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
|
@ -304,7 +275,6 @@ def generate_map_tiles(generate_tribal_layer):
|
|||
generate_tiles(data_path, generate_tribal_layer)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
|
@ -316,21 +286,8 @@ def generate_map_tiles(generate_tribal_layer):
|
|||
is_flag=True,
|
||||
help="Check if data run has been run before, and don't run it if so.",
|
||||
)
|
||||
@click.option(
|
||||
"-s",
|
||||
"--data-source",
|
||||
default="local",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
@click.option(
|
||||
"-u",
|
||||
"--use-cache",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Check if data source has been downloaded already, and if it has, use the cached version of the data source.",
|
||||
)
|
||||
@data_source_option
|
||||
@use_cache_option
|
||||
def data_full_run(check: bool, data_source: str, use_cache: bool):
|
||||
"""CLI command to run ETL, score, JSON combine and generate tiles in one command
|
||||
|
||||
|
@ -388,19 +345,12 @@ def data_full_run(check: bool, data_source: str, use_cache: bool):
|
|||
call(cmd, shell=True)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Print data sources for all ETL processes (or a specific one)",
|
||||
)
|
||||
@click.option(
|
||||
"-d",
|
||||
"--dataset",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
@dataset_option
|
||||
def print_data_sources(dataset: str):
|
||||
"""Print data sources for all ETL processes (or a specific one)
|
||||
|
||||
|
@ -421,26 +371,13 @@ def print_data_sources(dataset: str):
|
|||
log_info(s)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Fetch data sources for all ETL processes (or a specific one)",
|
||||
)
|
||||
@click.option(
|
||||
"-d",
|
||||
"--dataset",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
@click.option(
|
||||
"-u",
|
||||
"--use-cache",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Check if data source has been downloaded already, and if it has, use the cached version of the data source.",
|
||||
)
|
||||
@dataset_option
|
||||
@use_cache_option
|
||||
def extract_data_sources(dataset: str, use_cache: bool):
|
||||
"""Extract and cache data source(s) for all ETL processes (or a specific one)
|
||||
|
||||
|
@ -457,19 +394,12 @@ def extract_data_sources(dataset: str, use_cache: bool):
|
|||
extract_ds(dataset, use_cache)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Clear data source cache for all ETL processes (or a specific one)",
|
||||
)
|
||||
@click.option(
|
||||
"-d",
|
||||
"--dataset",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
@dataset_option
|
||||
def clear_data_source_cache(dataset: str):
|
||||
"""Clear data source(s) cache for all ETL processes (or a specific one)
|
||||
|
||||
|
@ -485,7 +415,33 @@ def clear_data_source_cache(dataset: str):
|
|||
clear_ds_cache(dataset)
|
||||
|
||||
log_goodbye()
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Generate scoring and tiles",
|
||||
)
|
||||
@click.pass_context
|
||||
def full_post_etl(ctx):
|
||||
"""Generate scoring and tiles"""
|
||||
ctx.invoke(score_run)
|
||||
ctx.invoke(generate_score_post, data_source=None)
|
||||
ctx.invoke(geo_score, data_source=None)
|
||||
ctx.invoke(generate_map_tiles, generate_tribal_layer=False)
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Run all downloads, extracts, and generate scores and tiles",
|
||||
)
|
||||
@use_cache_option
|
||||
@click.pass_context
|
||||
def full_run(ctx, use_cache):
|
||||
"""Run all downloads, ETLs, and generate scores and tiles"""
|
||||
if not use_cache:
|
||||
ctx.invoke(data_cleanup)
|
||||
ctx.invoke(census_data_download, zip_compress=False, use_cache=use_cache)
|
||||
ctx.invoke(extract_data_sources, dataset=None, use_cache=use_cache)
|
||||
ctx.invoke(etl_run, dataset=None, use_cache=use_cache)
|
||||
ctx.invoke(full_post_etl)
|
||||
|
||||
|
||||
def log_title(title: str, subtitle: str = None):
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
import sys
|
||||
import click
|
||||
import difflib
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
|
||||
from data_pipeline.etl.score import constants
|
||||
from data_pipeline.utils import get_module_logger, download_file_from_url
|
||||
from data_pipeline.application import log_title, log_info, log_goodbye
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
@ -15,6 +16,42 @@ pd.set_option("display.max_rows", None)
|
|||
pd.set_option("display.width", 10000)
|
||||
pd.set_option("display.colheader_justify", "left")
|
||||
|
||||
result_text = []
|
||||
|
||||
|
||||
def _add_text(text: str):
|
||||
"""Add a line to the output result.
|
||||
|
||||
Args:
|
||||
line (str): a line to add
|
||||
"""
|
||||
result_text.append(text)
|
||||
|
||||
|
||||
def _get_result_doc() -> str:
|
||||
"""Gets the document with results.
|
||||
|
||||
Returns:
|
||||
str: the results document as text
|
||||
"""
|
||||
return "".join(result_text)
|
||||
|
||||
|
||||
def _read_from_file(file_path: Path):
|
||||
"""Read a CSV file into a Dataframe."""
|
||||
if not file_path.is_file():
|
||||
logger.error(
|
||||
f"- No score file exists at {file_path}. "
|
||||
"Please generate the score and try again."
|
||||
)
|
||||
sys.exit(1)
|
||||
return pd.read_csv(
|
||||
file_path,
|
||||
index_col="GEOID10_TRACT",
|
||||
dtype={"GEOID10_TRACT": str},
|
||||
low_memory=False,
|
||||
).sort_index()
|
||||
|
||||
|
||||
@click.group()
|
||||
def cli():
|
||||
|
@ -33,8 +70,24 @@ def cli():
|
|||
default="1.0",
|
||||
required=False,
|
||||
type=str,
|
||||
help="Set the production score version to compare to",
|
||||
)
|
||||
def compare_score(compare_to_version: str):
|
||||
@click.option(
|
||||
"-f",
|
||||
"--compare_to_file",
|
||||
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
||||
help="Compare to the specified score CSV file instead of downloading from production",
|
||||
)
|
||||
@click.option(
|
||||
"-l",
|
||||
"--local_score_file",
|
||||
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
||||
default=constants.DATA_SCORE_CSV_FULL_FILE_PATH,
|
||||
help="Compare to the specified score CSV file instead of downloading from production",
|
||||
)
|
||||
def compare_score(
|
||||
compare_to_version: str, compare_to_file: str, local_score_file: str
|
||||
):
|
||||
"""Compares the score in the production environment to the locally generated score. The
|
||||
algorithm is pretty simple:
|
||||
|
||||
|
@ -50,43 +103,27 @@ def compare_score(compare_to_version: str):
|
|||
FLOAT_ROUNDING_PLACES = 2
|
||||
WORKING_PATH = constants.TMP_PATH / "Comparator" / "Score"
|
||||
|
||||
summary = "# Score Comparison Summary\n"
|
||||
summary += f"Hi! I'm the Score Comparator. I compared the score in production (version {compare_to_version}) to the"
|
||||
summary += " locally calculated score. Here are the results.\n"
|
||||
|
||||
log_title("Compare Score", "Compare production score to local score")
|
||||
|
||||
locally_generated_score_path = constants.DATA_SCORE_CSV_FULL_FILE_PATH
|
||||
if not locally_generated_score_path.is_file():
|
||||
logger.error(
|
||||
f"- No score file exists at {locally_generated_score_path}. Please generate the score and try again."
|
||||
if compare_to_file:
|
||||
log_info(f"Comparing to file {compare_to_file}...")
|
||||
production_score_path = compare_to_file
|
||||
else:
|
||||
# TODO: transition to downloader code when it's available
|
||||
production_score_url = f"https://justice40-data.s3.amazonaws.com/data-versions/{compare_to_version}/data/score/csv/full/usa.csv"
|
||||
production_score_path = WORKING_PATH / "usa.csv"
|
||||
|
||||
log_info(f"Fetching score version {compare_to_version} from AWS")
|
||||
production_score_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
download_file_from_url(
|
||||
file_url=production_score_url,
|
||||
download_file_name=production_score_path,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# TODO: transition to downloader code when it's available
|
||||
production_score_url = f"https://justice40-data.s3.amazonaws.com/data-versions/{compare_to_version}/data/score/csv/full/usa.csv"
|
||||
production_score_path = WORKING_PATH / "usa.csv"
|
||||
|
||||
log_info(f"Fetching score version {compare_to_version} from AWS")
|
||||
production_score_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
download_file_from_url(
|
||||
file_url=production_score_url, download_file_name=production_score_path
|
||||
)
|
||||
|
||||
log_info("Loading files into pandas for comparisons")
|
||||
|
||||
local_score_df = pd.read_csv(
|
||||
locally_generated_score_path,
|
||||
index_col="GEOID10_TRACT",
|
||||
dtype={"GEOID10_TRACT": str},
|
||||
low_memory=False,
|
||||
).sort_index()
|
||||
production_score_df = pd.read_csv(
|
||||
production_score_path,
|
||||
index_col="GEOID10_TRACT",
|
||||
dtype={"GEOID10_TRACT": str},
|
||||
low_memory=False,
|
||||
).sort_index()
|
||||
log_info(f"Loading local score from {local_score_file}")
|
||||
local_score_df = _read_from_file(local_score_file)
|
||||
log_info(f"Loading production score from {production_score_path}")
|
||||
production_score_df = _read_from_file(production_score_path)
|
||||
|
||||
# Because of variations in Python versions and machine-level calculations, some of
|
||||
# our numbers can be really close but not the same. That throws off our comparisons.
|
||||
|
@ -99,67 +136,82 @@ def compare_score(compare_to_version: str):
|
|||
production_score_df_columns = sorted(
|
||||
production_score_df.columns.array.tolist()
|
||||
)
|
||||
|
||||
log_info("Comparing columns (production vs local). Differences are: ")
|
||||
summary += "\n## Columns\n"
|
||||
summary += "I compared the columns. Here's what I found.\n"
|
||||
|
||||
col_diff = difflib.unified_diff(
|
||||
production_score_df_columns, local_score_df_columns
|
||||
extra_cols_in_local = set(local_score_df_columns) - set(
|
||||
production_score_df_columns
|
||||
)
|
||||
extra_cols_in_prod = set(production_score_df_columns) - set(
|
||||
local_score_df_columns
|
||||
)
|
||||
col_diff_res = ""
|
||||
for d in col_diff:
|
||||
col_diff_res += str(d) + "\n"
|
||||
|
||||
if len(col_diff_res) == 0:
|
||||
log_info("None. Columns are the same")
|
||||
summary += "* There are no differences in the column names.\n"
|
||||
_add_text("# Score Comparison Summary\n")
|
||||
_add_text(
|
||||
f"Hi! I'm the Score Comparator. I compared the score in production (version {compare_to_version}) to the"
|
||||
" locally calculated score. Here are the results:\n\n"
|
||||
)
|
||||
|
||||
#####################
|
||||
# Compare the columns
|
||||
#####################
|
||||
log_info("Comparing columns (production vs local)")
|
||||
_add_text("## Columns\n")
|
||||
if len(extra_cols_in_local) == 0 and len(extra_cols_in_prod) == 0:
|
||||
_add_text("* There are no differences in the column names.\n")
|
||||
else:
|
||||
log_info("There are differences. The diff is:")
|
||||
log_info(col_diff_res)
|
||||
summary += f"* There are differences in the column names. Here's a diff:\n{col_diff_res}\n"
|
||||
_add_text(
|
||||
f"* There are {len(extra_cols_in_local)} columns that were added as compared to the production score."
|
||||
)
|
||||
if len(extra_cols_in_local) > 0:
|
||||
_add_text(f" Those colums are:\n{extra_cols_in_local}")
|
||||
_add_text(
|
||||
f"\n* There are {len(extra_cols_in_prod)} columns that were removed as compared to the production score."
|
||||
)
|
||||
if len(extra_cols_in_prod) > 0:
|
||||
_add_text(f" Those colums are:\n{extra_cols_in_prod}")
|
||||
|
||||
####################
|
||||
# Compare the scores
|
||||
####################
|
||||
log_info("Comparing dataframe contents (production vs local)")
|
||||
summary += "\n## Scores\n"
|
||||
summary += "I compared the scores, too. Here's what I found.\n"
|
||||
_add_text("\n\n## Scores\n")
|
||||
|
||||
production_row_count = len(production_score_df.index)
|
||||
local_row_count = len(local_score_df.index)
|
||||
|
||||
summary += f"* The production score has {production_row_count:,} census tracts, and the freshly calculated score has {local_row_count:,}."
|
||||
summary += (
|
||||
" They match!\n"
|
||||
if production_row_count == local_row_count
|
||||
else " They don't match.\n"
|
||||
# Tract comparison
|
||||
_add_text(
|
||||
f"* The production score has {production_row_count:,} census tracts, and the freshly calculated score has {local_row_count:,}."
|
||||
)
|
||||
if production_row_count == local_row_count:
|
||||
_add_text(" They match!\n")
|
||||
else:
|
||||
_add_text(" They don't match. The differences are:\n")
|
||||
_add_text(
|
||||
" * New tracts added to the local score are:\n"
|
||||
f"{local_score_df.index.difference(production_score_df.index).to_list()}"
|
||||
"\n * Tracts removed from the local score are:\n"
|
||||
f"{production_score_df.index.difference(local_score_df.index).to_list()}"
|
||||
"\n"
|
||||
)
|
||||
|
||||
production_total_population = production_score_df["Total population"].sum()
|
||||
local_total_population = local_score_df["Total population"].sum()
|
||||
# Population comparison
|
||||
production_total_population = production_score_df[
|
||||
field_names.TOTAL_POP_FIELD
|
||||
].sum()
|
||||
local_total_population = local_score_df[field_names.TOTAL_POP_FIELD].sum()
|
||||
|
||||
log_info(
|
||||
f"The total population in all census tracts in production is {production_total_population:,}."
|
||||
_add_text(
|
||||
f"* The total population in all census tracts in the production score is {production_total_population:,}. "
|
||||
f"The total population in all census tracts locally is {local_total_population:,}. "
|
||||
)
|
||||
log_info(
|
||||
f"The total population in all census tracts locally is {local_total_population:,}."
|
||||
)
|
||||
log_info(
|
||||
f"The difference in population is {abs(production_total_population - local_total_population):,}."
|
||||
)
|
||||
|
||||
summary += f"* The total population in all census tracts in the production score is {production_total_population:,}."
|
||||
summary += f" The total population in all census tracts locally is {local_total_population:,}."
|
||||
summary += (
|
||||
" They match!\n"
|
||||
_add_text(
|
||||
"They match!\n"
|
||||
if production_total_population == local_total_population
|
||||
else f" The difference is {abs(production_total_population - local_total_population):,}.\n"
|
||||
else f"The difference is {abs(production_total_population - local_total_population):,}.\n"
|
||||
)
|
||||
|
||||
production_disadvantaged_tracts_df = production_score_df.query(
|
||||
"`Definition N community, including adjacency index tracts` == True"
|
||||
)
|
||||
local_disadvantaged_tracts_df = local_score_df.query(
|
||||
"`Definition N community, including adjacency index tracts` == True"
|
||||
)
|
||||
dacs_query = f"`{field_names.FINAL_SCORE_N_BOOLEAN}` == True"
|
||||
production_disadvantaged_tracts_df = production_score_df.query(dacs_query)
|
||||
local_disadvantaged_tracts_df = local_score_df.query(dacs_query)
|
||||
|
||||
production_disadvantaged_tracts_set = set(
|
||||
production_disadvantaged_tracts_df.index.array
|
||||
|
@ -169,38 +221,27 @@ def compare_score(compare_to_version: str):
|
|||
)
|
||||
|
||||
production_pct_of_population_represented = (
|
||||
production_disadvantaged_tracts_df["Total population"].sum()
|
||||
production_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum()
|
||||
/ production_total_population
|
||||
)
|
||||
local_pct_of_population_represented = (
|
||||
local_disadvantaged_tracts_df["Total population"].sum()
|
||||
local_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum()
|
||||
/ local_total_population
|
||||
)
|
||||
|
||||
log_info(
|
||||
f"There are {len(production_disadvantaged_tracts_set):,} disadvantaged tracts in the production score."
|
||||
# DACS comparison
|
||||
_add_text(
|
||||
f"* There are {len(production_disadvantaged_tracts_set):,} disadvantaged tracts in the production score representing"
|
||||
f" {production_pct_of_population_represented:.1%} of the total population, and {len(local_disadvantaged_tracts_set):,}"
|
||||
)
|
||||
log_info(
|
||||
f"This represents {production_pct_of_population_represented:.1%} of the total population."
|
||||
_add_text(
|
||||
f" in the locally generated score representing {local_pct_of_population_represented:.1%} of the total population."
|
||||
)
|
||||
log_info(
|
||||
f"There are {len(local_disadvantaged_tracts_set):,} in the locally generated score."
|
||||
)
|
||||
log_info(
|
||||
f"This represents {local_pct_of_population_represented:.1%} of the total population."
|
||||
)
|
||||
log_info(
|
||||
f"The difference is {abs(len(production_disadvantaged_tracts_set) - len(local_disadvantaged_tracts_set)):,} tract(s)."
|
||||
)
|
||||
|
||||
summary += f"* There are {len(production_disadvantaged_tracts_set):,} disadvantaged tracts in the production score representing"
|
||||
summary += f" {production_pct_of_population_represented:.1%} of the total population, and {len(local_disadvantaged_tracts_set):,}"
|
||||
summary += f" in the locally generated score representing {local_pct_of_population_represented:.1%} of the total population."
|
||||
summary += (
|
||||
" The number of tracts match!\n"
|
||||
_add_text(
|
||||
" The number of tracts match!\n "
|
||||
if len(production_disadvantaged_tracts_set)
|
||||
== len(local_disadvantaged_tracts_set)
|
||||
else f" The difference is {abs(len(production_disadvantaged_tracts_set) - len(local_disadvantaged_tracts_set)):,} tract(s).\n"
|
||||
else f" The difference is {abs(len(production_disadvantaged_tracts_set) - len(local_disadvantaged_tracts_set))} tract(s).\n "
|
||||
)
|
||||
|
||||
removed_tracts = production_disadvantaged_tracts_set.difference(
|
||||
|
@ -209,53 +250,72 @@ def compare_score(compare_to_version: str):
|
|||
added_tracts = local_disadvantaged_tracts_set.difference(
|
||||
production_disadvantaged_tracts_set
|
||||
)
|
||||
|
||||
log_info(
|
||||
f"There are {len(removed_tracts):,} tract(s) marked as disadvantaged in the prod score that are not disadvantaged in the local score."
|
||||
)
|
||||
log_info(
|
||||
f"There are {len(added_tracts):,} tract(s) marked as disadvantaged in the local score that are not disadvantaged in the prod score."
|
||||
)
|
||||
|
||||
summary += (
|
||||
_add_text(
|
||||
f"* There are {len(removed_tracts):,} tract(s) marked as disadvantaged in the production score that are not disadvantaged in the locally"
|
||||
" generated score (i.e. disadvantaged tracts that were removed by the new score)."
|
||||
f" There are {len(added_tracts):,} tract(s) marked as disadvantaged in the locally generated score that are not disadvantaged in the"
|
||||
" production score (i.e. disadvantaged tracts that were added by the new score).\n"
|
||||
f" generated score (i.e. disadvantaged tracts that were removed by the new score). "
|
||||
)
|
||||
if len(removed_tracts) > 0:
|
||||
_add_text(f"Those tracts are:\n{removed_tracts}")
|
||||
|
||||
_add_text(
|
||||
f"\n* There are {len(added_tracts):,} tract(s) marked as disadvantaged in the locally generated score that are not disadvantaged in the"
|
||||
f" production score (i.e. disadvantaged tracts that were added by the new score). "
|
||||
)
|
||||
if len(added_tracts) > 0:
|
||||
_add_text(f"Those tracts are:\n{added_tracts}\n")
|
||||
|
||||
# Grandfathered tracts from v1.0
|
||||
grandfathered_tracts = local_score_df.loc[
|
||||
local_score_df[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0]
|
||||
].index
|
||||
if len(grandfathered_tracts) > 0:
|
||||
_add_text(
|
||||
f"* This includes {len(grandfathered_tracts)} grandfathered tract(s) from v1.0 scoring. They are:\n"
|
||||
f"{grandfathered_tracts.to_list()}\n"
|
||||
)
|
||||
else:
|
||||
_add_text("* There are NO grandfathered tracts from v1.0 scoring.\n")
|
||||
|
||||
################
|
||||
# Create a delta
|
||||
################
|
||||
_add_text("\n## Delta\n")
|
||||
# First we make the columns on two dataframes to be the same to be able to compare
|
||||
trimmed_prod_df = production_score_df.drop(extra_cols_in_prod, axis=1)
|
||||
trimmed_local_df = local_score_df.drop(extra_cols_in_local, axis=1)
|
||||
try:
|
||||
|
||||
comparison_results_df = production_score_df.compare(
|
||||
local_score_df, align_axis=1, keep_shape=False, keep_equal=False
|
||||
comparison_results_df = trimmed_prod_df.compare(
|
||||
trimmed_local_df, align_axis=1, keep_shape=False, keep_equal=False
|
||||
).rename({"self": "Production", "other": "Local"}, axis=1, level=1)
|
||||
|
||||
summary += "* I compared all values across all census tracts."
|
||||
summary += f" There are {len(comparison_results_df.index):,} tracts with at least one difference."
|
||||
summary += " Please examine the logs or run the score comparison locally to view them all.\n"
|
||||
log_info(
|
||||
f"There are {len(comparison_results_df.index)} rows with any differences."
|
||||
_add_text(
|
||||
"* I compared all values across all census tracts. Note this ignores any columns that have been added or removed."
|
||||
f" There are {len(comparison_results_df.index):,} tracts with at least one difference.\n"
|
||||
)
|
||||
|
||||
log_info("Those differences are:")
|
||||
log_info("\n" + str(comparison_results_df))
|
||||
|
||||
comparison_path = WORKING_PATH / "deltas.csv"
|
||||
comparison_results_df.to_csv(path_or_buf=comparison_path)
|
||||
|
||||
log_info(f"Wrote comparison results to {comparison_path}")
|
||||
_add_text(f"* Wrote comparison results to {comparison_path}")
|
||||
|
||||
except ValueError as e:
|
||||
summary += "* I could not run a full comparison. This is likely because there are column or index (census tract) differences."
|
||||
summary += " Please examine the logs or run the score comparison locally to find out more.\n"
|
||||
log_info(
|
||||
f"Encountered an exception while performing the comparison: {repr(e)}"
|
||||
_add_text(
|
||||
"* I could not run a full comparison. This is likely because there are column or index (census tract) differences."
|
||||
" Please examine the logs or run the score comparison locally to find out more.\n"
|
||||
)
|
||||
_add_text(
|
||||
f"Encountered an exception while performing the comparison: {repr(e)}\n"
|
||||
)
|
||||
|
||||
result_doc = _get_result_doc()
|
||||
print(result_doc)
|
||||
|
||||
# Write the report
|
||||
summary_path = WORKING_PATH / "comparison-summary.md"
|
||||
|
||||
with open(summary_path, "w", encoding="utf-8") as f:
|
||||
f.write(summary)
|
||||
f.write(result_doc)
|
||||
log_info(f"Wrote comparison summary to {summary_path}")
|
||||
|
||||
log_goodbye()
|
||||
|
|
7
data/data-pipeline/data_pipeline/constants.py
Normal file
7
data/data-pipeline/data_pipeline/constants.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
import logging
|
||||
|
||||
LOG_LEVEL = logging.DEBUG
|
||||
"""Log level for all loggers."""
|
||||
|
||||
NO_SSL_VERIFY = True
|
||||
"""Set to true to skip SSL verification when downloading files. Useful for local development."""
|
|
@ -0,0 +1,5 @@
|
|||
Static data is used in the computation of the score as stated.
|
||||
|
||||
# v1.0-score-results-usa.csv
|
||||
This is the v1.0 score results used for grandfathering computations. It is a
|
||||
stripped down version of the v1.0 score to include only the columns needed.
|
File diff suppressed because it is too large
Load diff
|
@ -22,6 +22,7 @@ from pathlib import Path
|
|||
from typing import List
|
||||
from dataclasses import dataclass
|
||||
from abc import ABC, abstractmethod
|
||||
from data_pipeline.constants import NO_SSL_VERIFY
|
||||
|
||||
from data_pipeline.etl.downloader import Downloader
|
||||
from data_pipeline.etl.sources.census_acs.etl_utils import (
|
||||
|
@ -65,7 +66,7 @@ class FileDataSource(DataSource):
|
|||
Downloader.download_file_from_url(
|
||||
file_url=self.source,
|
||||
download_file_name=self.destination,
|
||||
verify=True,
|
||||
verify=not NO_SSL_VERIFY,
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
|
@ -85,7 +86,7 @@ class ZIPDataSource(DataSource):
|
|||
Downloader.download_zip_file_from_url(
|
||||
file_url=self.source,
|
||||
unzipped_file_path=self.destination,
|
||||
verify=True,
|
||||
verify=not NO_SSL_VERIFY,
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
|
|
|
@ -6,6 +6,9 @@ import shutil
|
|||
|
||||
from pathlib import Path
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class Downloader:
|
||||
|
@ -34,13 +37,15 @@ class Downloader:
|
|||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
download_file_name.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.debug(f"Downloading {file_url}")
|
||||
response = requests.get(
|
||||
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
|
||||
)
|
||||
if response.status_code == 200:
|
||||
file_contents = response.content
|
||||
logger.debug("Downloaded.")
|
||||
else:
|
||||
# pylint: disable-next=broad-exception-raised
|
||||
raise Exception(
|
||||
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"
|
||||
)
|
||||
|
|
|
@ -9,6 +9,7 @@ from data_pipeline.score import field_names
|
|||
|
||||
# Base Paths
|
||||
DATA_PATH = Path(settings.APP_ROOT) / "data"
|
||||
STATIC_DATA_PATH = Path(settings.APP_ROOT) / "content" / "static_data"
|
||||
TMP_PATH = DATA_PATH / "tmp"
|
||||
FILES_PATH = Path(settings.APP_ROOT) / "files"
|
||||
|
||||
|
@ -23,6 +24,7 @@ DATA_CENSUS_DIR = DATA_PATH / "census"
|
|||
DATA_CENSUS_CSV_DIR = DATA_CENSUS_DIR / "csv"
|
||||
DATA_CENSUS_CSV_FILE_PATH = DATA_CENSUS_CSV_DIR / "us.csv"
|
||||
DATA_CENSUS_CSV_STATE_FILE_PATH = DATA_CENSUS_CSV_DIR / "fips_states_2010.csv"
|
||||
DATA_CENSUS_GEOJSON_FILE_PATH = DATA_CENSUS_DIR / "geojson" / "us.json"
|
||||
|
||||
# Score paths
|
||||
DATA_SCORE_DIR = DATA_PATH / "score"
|
||||
|
@ -45,6 +47,9 @@ DATA_SCORE_JSON_INDEX_FILE_PATH = (
|
|||
## Tile path
|
||||
DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
|
||||
|
||||
## Tiles search
|
||||
DATA_TILES_SEARCH_DIR = DATA_SCORE_DIR / "search"
|
||||
|
||||
# Downloadable paths
|
||||
if not os.environ.get("J40_VERSION_LABEL_STRING"):
|
||||
version_str = "beta"
|
||||
|
@ -81,6 +86,7 @@ SCORE_VERSIONING_README_FILE_NAME = f"readme-version-{version_str}.md"
|
|||
SCORE_VERSIONING_README_FILE_PATH = (
|
||||
FILES_PATH / SCORE_VERSIONING_README_FILE_NAME
|
||||
)
|
||||
SCORE_TRACT_SEARCH_FILE_PATH = DATA_TILES_SEARCH_DIR / "tracts.json"
|
||||
|
||||
# For the codebook
|
||||
CEJST_SCORE_COLUMN_NAME = "score_name"
|
||||
|
@ -275,6 +281,7 @@ TILES_SCORE_COLUMNS = {
|
|||
# temporarily update this so that it's the Narwhal score that gets visualized on the map
|
||||
# The NEW final score value INCLUDES the adjacency index.
|
||||
field_names.FINAL_SCORE_N_BOOLEAN: "SN_C",
|
||||
field_names.FINAL_SCORE_N_BOOLEAN_V1_0: "SN_C_V10",
|
||||
field_names.IS_TRIBAL_DAC: "SN_T",
|
||||
field_names.DIABETES_LOW_INCOME_FIELD: "DLI",
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD: "ALI",
|
||||
|
@ -315,18 +322,18 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "IAPLHSE",
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "IALMILHSE",
|
||||
# Percentiles for Island areas' workforce columns
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "IALMILHSE_PFS",
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAPLHSE_PFS",
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "IAULHSE_PFS",
|
||||
field_names.LOW_HS_EDUCATION_FIELD: "LHE",
|
||||
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD: "IALHE",
|
||||
# Percentage of HS Degree completion for Islands
|
||||
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009: "IAHSEF",
|
||||
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2019: "IAHSEF",
|
||||
# Booleans for the front end about the types of thresholds exceeded
|
||||
field_names.CLIMATE_THRESHOLD_EXCEEDED: "N_CLT_EOMI",
|
||||
field_names.ENERGY_THRESHOLD_EXCEEDED: "N_ENY_EOMI",
|
||||
|
|
|
@ -54,6 +54,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.eamlis_df: pd.DataFrame
|
||||
self.fuds_df: pd.DataFrame
|
||||
self.tribal_overlap_df: pd.DataFrame
|
||||
self.v1_0_score_results_df: pd.DataFrame
|
||||
|
||||
self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS: List[str] = []
|
||||
|
||||
|
@ -166,7 +167,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
census_decennial_csv = (
|
||||
constants.DATA_PATH
|
||||
/ "dataset"
|
||||
/ "census_decennial_2010"
|
||||
/ "census_decennial_2020"
|
||||
/ "usa.csv"
|
||||
)
|
||||
self.census_decennial_df = pd.read_csv(
|
||||
|
@ -205,6 +206,25 @@ class ScoreETL(ExtractTransformLoad):
|
|||
header=None,
|
||||
)
|
||||
|
||||
# Load v1.0 score results for grandfathering purposes
|
||||
score_v1_0_csv = (
|
||||
constants.STATIC_DATA_PATH / "v1.0-score-results-usa.csv"
|
||||
)
|
||||
self.v1_0_score_results_df = pd.read_csv(
|
||||
score_v1_0_csv,
|
||||
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
||||
low_memory=False,
|
||||
)
|
||||
# Only keep the columns we need and rename them as they will clash
|
||||
# with the new score DF.
|
||||
self.v1_0_score_results_df = self.v1_0_score_results_df[
|
||||
[field_names.GEOID_TRACT_FIELD, field_names.FINAL_SCORE_N_BOOLEAN]
|
||||
].rename(
|
||||
columns={
|
||||
field_names.FINAL_SCORE_N_BOOLEAN: field_names.FINAL_SCORE_N_BOOLEAN_V1_0,
|
||||
}
|
||||
)
|
||||
|
||||
def _join_tract_dfs(self, census_tract_dfs: list) -> pd.DataFrame:
|
||||
logger.debug("Joining Census Tract dataframes")
|
||||
|
||||
|
@ -364,6 +384,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.eamlis_df,
|
||||
self.fuds_df,
|
||||
self.tribal_overlap_df,
|
||||
self.v1_0_score_results_df,
|
||||
]
|
||||
|
||||
# Sanity check each data frame before merging.
|
||||
|
@ -470,12 +491,14 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
|
||||
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009,
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019,
|
||||
field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||
field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009,
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019,
|
||||
field_names.UST_FIELD,
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD,
|
||||
field_names.FUTURE_FLOOD_RISK_FIELD,
|
||||
|
@ -512,6 +535,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
|
||||
field_names.IS_TRIBAL_DAC,
|
||||
field_names.FINAL_SCORE_N_BOOLEAN_V1_0,
|
||||
]
|
||||
|
||||
# For some columns, high values are "good", so we want to reverse the percentile
|
||||
|
@ -541,8 +565,8 @@ class ScoreETL(ExtractTransformLoad):
|
|||
low_field_name=field_names.LOW_LIFE_EXPECTANCY_FIELD,
|
||||
),
|
||||
ReversePercentile(
|
||||
field_name=field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
||||
low_field_name=field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
||||
field_name=field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019,
|
||||
low_field_name=field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019,
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -657,7 +681,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
df_copy[field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010] = df_copy[
|
||||
[
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009,
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019,
|
||||
]
|
||||
].mean(axis=1, skipna=True)
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ from pathlib import Path
|
|||
import numpy as np
|
||||
from numpy import float64
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
|
||||
from data_pipeline.content.schemas.download_schemas import CodebookConfig
|
||||
from data_pipeline.content.schemas.download_schemas import CSVConfig
|
||||
|
@ -42,10 +43,12 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
self.input_counties_df: pd.DataFrame
|
||||
self.input_states_df: pd.DataFrame
|
||||
self.input_score_df: pd.DataFrame
|
||||
self.input_census_geo_df: gpd.GeoDataFrame
|
||||
|
||||
self.output_score_county_state_merged_df: pd.DataFrame
|
||||
self.output_score_tiles_df: pd.DataFrame
|
||||
self.output_downloadable_df: pd.DataFrame
|
||||
self.output_tract_search_df: pd.DataFrame
|
||||
|
||||
# Define some constants for the YAML file
|
||||
# TODO: Implement this as a marshmallow schema.
|
||||
|
@ -105,6 +108,18 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
|
||||
return df
|
||||
|
||||
def _extract_census_geojson(self, geo_path: Path) -> gpd.GeoDataFrame:
|
||||
"""
|
||||
Read in the Census Geo JSON data.
|
||||
|
||||
Returns:
|
||||
gpd.GeoDataFrame: the census geo json data
|
||||
"""
|
||||
logger.debug("Reading Census GeoJSON")
|
||||
with open(geo_path, "r", encoding="utf-8") as file:
|
||||
data = gpd.read_file(file)
|
||||
return data
|
||||
|
||||
def extract(self, use_cached_data_sources: bool = False) -> None:
|
||||
|
||||
super().extract(
|
||||
|
@ -131,6 +146,9 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
self.input_score_df = self._extract_score(
|
||||
constants.DATA_SCORE_CSV_FULL_FILE_PATH
|
||||
)
|
||||
self.input_census_geo_df = self._extract_census_geojson(
|
||||
constants.DATA_CENSUS_GEOJSON_FILE_PATH
|
||||
)
|
||||
|
||||
def _transform_counties(
|
||||
self, initial_counties_df: pd.DataFrame
|
||||
|
@ -392,7 +410,23 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
|
||||
return final_df
|
||||
|
||||
def _create_tract_search_data(
|
||||
self, census_geojson: gpd.GeoDataFrame
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Generate a dataframe with only the tract IDs and the center lat/lon of each tract.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: a dataframe with the tract search data
|
||||
"""
|
||||
logger.debug("Creating Census tract search data")
|
||||
columns_to_extract = ["GEOID10", "INTPTLAT10", "INTPTLON10"]
|
||||
return pd.DataFrame(census_geojson[columns_to_extract])
|
||||
|
||||
def transform(self) -> None:
|
||||
self.output_tract_search_df = self._create_tract_search_data(
|
||||
self.input_census_geo_df
|
||||
)
|
||||
transformed_counties = self._transform_counties(self.input_counties_df)
|
||||
transformed_states = self._transform_states(self.input_states_df)
|
||||
transformed_score = self._transform_score(self.input_score_df)
|
||||
|
@ -409,6 +443,9 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
self.output_score_county_state_merged_df = (
|
||||
output_score_county_state_merged_df
|
||||
)
|
||||
self.output_tract_search_df = self._create_tract_search_data(
|
||||
self.input_census_geo_df
|
||||
)
|
||||
|
||||
def _load_score_csv_full(
|
||||
self, score_county_state_merged: pd.DataFrame, score_csv_path: Path
|
||||
|
@ -592,6 +629,13 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
]
|
||||
zip_files(version_data_documentation_zip_path, files_to_compress)
|
||||
|
||||
def _load_search_tract_data(self, output_path: Path):
|
||||
"""Write the Census tract search data."""
|
||||
logger.debug("Writing Census tract search data")
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
# We use the records orientation to easily import the JSON in JS.
|
||||
self.output_tract_search_df.to_json(output_path, orient="records")
|
||||
|
||||
def load(self) -> None:
|
||||
self._load_score_csv_full(
|
||||
self.output_score_county_state_merged_df,
|
||||
|
@ -600,4 +644,5 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
self._load_tile_csv(
|
||||
self.output_score_tiles_df, constants.DATA_SCORE_CSV_TILES_FILE_PATH
|
||||
)
|
||||
self._load_search_tract_data(constants.SCORE_TRACT_SEARCH_FILE_PATH)
|
||||
self._load_downloadable_zip(constants.SCORE_DOWNLOADABLE_DIR)
|
||||
|
|
|
@ -3,6 +3,7 @@ from importlib import reload
|
|||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
import pytest
|
||||
from data_pipeline import config
|
||||
from data_pipeline.etl.score import etl_score_post
|
||||
|
@ -129,8 +130,28 @@ def tile_data_expected():
|
|||
return pd.read_pickle(pytest.SNAPSHOT_DIR / "tile_data_expected.pkl")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def create_tile_score_data_input():
|
||||
return pd.read_pickle(pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def create_tile_data_expected():
|
||||
return pd.read_pickle(pytest.SNAPSHOT_DIR / "create_tile_data_expected.pkl")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def downloadable_data_expected():
|
||||
return pd.read_pickle(
|
||||
pytest.SNAPSHOT_DIR / "downloadable_data_expected.pkl"
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def census_geojson_sample_data(sample_data_dir) -> gpd.GeoDataFrame:
|
||||
with open(
|
||||
sample_data_dir / "census_60.geojson", "r", encoding="utf-8"
|
||||
) as file:
|
||||
data = gpd.read_file(file)
|
||||
return data
|
||||
return None
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,23 @@
|
|||
These files are used as inputs to unit tests. Some notes in their creation is below.
|
||||
|
||||
### create_tile_data_expected.pkl
|
||||
1. Set a breakpoint in the `test_create_tile_data` method in `data_pipeline/etl/score/tests/test_score_post.py`
|
||||
after the call to `_create_tile_data` and debug the test.
|
||||
2. Extract a subset of the `output_tiles_df_actual` dataframe. Do not extract the whole score as the file
|
||||
will be too big and the test will run slow. Also, you need to extract the same tracts that are in
|
||||
the `create_tile_score_data_input.pkl` input data. For example, use the following command once the breakpoint is reached
|
||||
to extract a few rows at the top and bottom of the score. This will some capture states and territories.
|
||||
```python
|
||||
import pandas as pd
|
||||
pd.concat([output_tiles_df_actual.head(3), output_tiles_df_actual.tail(3)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl')
|
||||
```
|
||||
|
||||
### create_tile_score_data_input.pkl
|
||||
1. Set a breakpoint in the transform method in `data_pipeline/etl/score/etl_score_post.py` before the call to
|
||||
`_create_tile_data` and run the post scoring.
|
||||
2. Extract a subset of the `output_score_county_state_merged_df` dataframe. Do not extract the whole score as the file
|
||||
will be too big and the test will run slow. For example, use the following command once the breakpoint is reached
|
||||
to extract a few rows at the top and bottom of the score. This will some capture states and territories.
|
||||
```python
|
||||
pd.concat([output_score_county_state_merged_df.head(3), output_score_county_state_merged_df.tail(3)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl')
|
||||
```
|
Binary file not shown.
Binary file not shown.
|
@ -5,9 +5,12 @@ from pathlib import Path
|
|||
|
||||
import pandas.api.types as ptypes
|
||||
import pandas.testing as pdt
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
from data_pipeline.content.schemas.download_schemas import CSVConfig
|
||||
from data_pipeline.etl.score import constants
|
||||
from data_pipeline.utils import load_yaml_dict_from_file
|
||||
from data_pipeline.etl.score.etl_score_post import PostScoreETL
|
||||
|
||||
# See conftest.py for all fixtures used in these tests
|
||||
|
||||
|
@ -80,11 +83,11 @@ def test_create_score_data(
|
|||
)
|
||||
|
||||
|
||||
def test_create_tile_data(etl, score_data_expected, tile_data_expected):
|
||||
output_tiles_df_actual = etl._create_tile_data(score_data_expected)
|
||||
def test_create_tile_data(etl, create_tile_score_data_input, create_tile_data_expected):
|
||||
output_tiles_df_actual = etl._create_tile_data(create_tile_score_data_input)
|
||||
pdt.assert_frame_equal(
|
||||
output_tiles_df_actual,
|
||||
tile_data_expected,
|
||||
create_tile_data_expected,
|
||||
)
|
||||
|
||||
|
||||
|
@ -150,3 +153,16 @@ def test_load_downloadable_zip(etl, monkeypatch, score_data_expected):
|
|||
assert constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH.is_file()
|
||||
assert constants.SCORE_DOWNLOADABLE_CSV_ZIP_FILE_PATH.is_file()
|
||||
assert constants.SCORE_DOWNLOADABLE_XLS_ZIP_FILE_PATH.is_file()
|
||||
|
||||
|
||||
def test_create_tract_search_data(census_geojson_sample_data: gpd.GeoDataFrame):
|
||||
# Sanity check
|
||||
assert len(census_geojson_sample_data) > 0
|
||||
|
||||
result = PostScoreETL()._create_tract_search_data(census_geojson_sample_data)
|
||||
assert isinstance(result, pd.DataFrame)
|
||||
assert not result.columns.empty
|
||||
columns = ["GEOID10", "INTPTLAT10", "INTPTLON10"]
|
||||
for col in columns:
|
||||
assert col in result.columns
|
||||
assert len(census_geojson_sample_data) == len(result)
|
||||
|
|
|
@ -215,6 +215,7 @@ class CensusETL(ExtractTransformLoad):
|
|||
state_gdf = gpd.read_file(file_name)
|
||||
usa_df = usa_df.append(state_gdf)
|
||||
|
||||
logger.debug("Converting to CRS")
|
||||
usa_df = usa_df.to_crs(
|
||||
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
|
||||
)
|
||||
|
|
|
@ -25,6 +25,9 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
NAME = "census_acs"
|
||||
ACS_YEAR = 2019
|
||||
MINIMUM_POPULATION_REQUIRED_FOR_IMPUTATION = 1
|
||||
ImputeVariables = namedtuple(
|
||||
"ImputeVariables", ["raw_field_name", "imputed_field_name"]
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
|
||||
|
@ -284,7 +287,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
|
||||
self.COLUMNS_TO_KEEP = (
|
||||
[
|
||||
self.GEOID_TRACT_FIELD_NAME,
|
||||
field_names.GEOID_TRACT_FIELD,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
self.UNEMPLOYED_FIELD_NAME,
|
||||
self.LINGUISTIC_ISOLATION_FIELD_NAME,
|
||||
|
@ -335,15 +338,15 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
destination=self.census_acs_source,
|
||||
acs_year=self.ACS_YEAR,
|
||||
variables=variables,
|
||||
tract_output_field_name=self.GEOID_TRACT_FIELD_NAME,
|
||||
tract_output_field_name=field_names.GEOID_TRACT_FIELD,
|
||||
data_path_for_fips_codes=self.DATA_PATH,
|
||||
acs_type="acs5",
|
||||
)
|
||||
]
|
||||
|
||||
# pylint: disable=too-many-arguments
|
||||
def _merge_geojson(
|
||||
self,
|
||||
@staticmethod
|
||||
def merge_geojson(
|
||||
df: pd.DataFrame,
|
||||
usa_geo_df: gpd.GeoDataFrame,
|
||||
geoid_field: str = "GEOID10",
|
||||
|
@ -364,7 +367,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
county_code_field,
|
||||
]
|
||||
],
|
||||
left_on=[self.GEOID_TRACT_FIELD_NAME],
|
||||
left_on=[field_names.GEOID_TRACT_FIELD],
|
||||
right_on=[geoid_field],
|
||||
)
|
||||
)
|
||||
|
@ -377,7 +380,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
|
||||
self.df = pd.read_csv(
|
||||
self.census_acs_source,
|
||||
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
||||
dtype={field_names.GEOID_TRACT_FIELD: "string"},
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
|
@ -401,7 +404,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.DATA_PATH / "census" / "geojson" / "us.json",
|
||||
)
|
||||
|
||||
df = self._merge_geojson(
|
||||
df = CensusACSETL.merge_geojson(
|
||||
df=df,
|
||||
usa_geo_df=geo_df,
|
||||
)
|
||||
|
@ -608,23 +611,19 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
# we impute income for both income measures
|
||||
## TODO: Convert to pydantic for clarity
|
||||
logger.debug("Imputing income information")
|
||||
ImputeVariables = namedtuple(
|
||||
"ImputeVariables", ["raw_field_name", "imputed_field_name"]
|
||||
)
|
||||
|
||||
df = calculate_income_measures(
|
||||
impute_var_named_tup_list=[
|
||||
ImputeVariables(
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
imputed_field_name=self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
),
|
||||
ImputeVariables(
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.COLLEGE_ATTENDANCE_FIELD,
|
||||
imputed_field_name=self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
|
||||
),
|
||||
],
|
||||
geo_df=df,
|
||||
geoid_field=self.GEOID_TRACT_FIELD_NAME,
|
||||
geoid_field=field_names.GEOID_TRACT_FIELD,
|
||||
minimum_population_required_for_imputation=self.MINIMUM_POPULATION_REQUIRED_FOR_IMPUTATION,
|
||||
)
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ def _choose_best_mask(
|
|||
for mask in masks_in_priority_order:
|
||||
if any(geo_df[mask][column_to_impute].notna()):
|
||||
return mask
|
||||
# pylint: disable-next=broad-exception-raised
|
||||
raise Exception("No mask found")
|
||||
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
# Decennial Census of Island Areas
|
||||
|
||||
Adding a new ETL folder for Decennial Census of Island Areas since the Island areas aren't included in ACS.
|
||||
Decennial Census of Island Areas since the Island areas aren't included in ACS.
|
||||
|
||||
There's the American Samoa Summary File, the Guam Summary File, the Commonwealth of the Northern Mariana Islands Summary File, and the US Virgin Islands Summary File:
|
||||
|
||||
1. https://api.census.gov/data/2010/dec/as.html
|
||||
1. https://api.census.gov/data/2010/dec/gu.html
|
||||
1. https://api.census.gov/data/2010/dec/mp.html
|
||||
1. https://api.census.gov/data/2010/dec/vi.html
|
||||
1. https://api.census.gov/data/2020/dec/dhcas.html
|
||||
1. https://api.census.gov/data/2020/dec/dhcgu.html
|
||||
1. https://api.census.gov/data/2020/dec/dhcmp.html
|
||||
1. https://api.census.gov/data/2020/dec/dhcvi.html
|
||||
|
|
|
@ -0,0 +1,211 @@
|
|||
from enum import Enum
|
||||
from types import MappingProxyType
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
|
||||
class DEC_FIELD_NAMES(str, Enum):
|
||||
"""Field/column names for the decennial data"""
|
||||
|
||||
MALE_HIGH_SCHOOL_ED = "Total male high school graduates 25 and over"
|
||||
FEMALE_HIGH_SCHOOL_ED = "Total female high school graduates 25 and over"
|
||||
IMPUTED_COLLEGE_ATTENDANCE = "Percent enrollment in college, graduate or professional school, imputed"
|
||||
TOTAL_RACE_POPULATION = "Total population surveyed on racial data"
|
||||
BLACK = "Black or African American"
|
||||
AMERICAN_INDIAN = "American Indian / Alaska Native"
|
||||
ASIAN = "Asian"
|
||||
HAWAIIAN = "Native Hawaiian or Pacific"
|
||||
TWO_OR_MORE_RACES = "two or more races"
|
||||
NON_HISPANIC_WHITE = "White"
|
||||
HISPANIC = "Hispanic or Latino"
|
||||
OTHER_RACE = "other races"
|
||||
HOUSEHOLD_POVERTY_LEVEL_UNDER_0_5 = (
|
||||
"Household poverty level Under 0.50 IN 2019"
|
||||
)
|
||||
HOUSEHOLD_POVERTY_LEVEL_UNDER_0_74 = (
|
||||
"Household poverty level Under 0.74 IN 2019"
|
||||
)
|
||||
HOUSEHOLD_POVERTY_LEVEL_UNDER_0_99 = (
|
||||
"Household poverty level Under 0.99 IN 2019"
|
||||
)
|
||||
HOUSEHOLD_POVERTY_LEVEL_OVER_2_0 = (
|
||||
"Household poverty level Over 2.0 IN 2019"
|
||||
)
|
||||
IMPUTED_PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL = f"{field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019}, imputed"
|
||||
TOTAL_HOUSEHOLD_POVERTY_LEVEL = "Total Household poverty level IN 2019"
|
||||
TERRITORY_MEDIAN_INCOME = "Territory Median Income"
|
||||
EMPLOYMENT_MALE_UNEMPLOYED = "Total males not in labor force"
|
||||
EMPLOYMENT_FEMALE_UNEMPLOYED = "Total females not in labor force"
|
||||
EMPLOYMENT_MALE_IN_LABOR_FORCE = "Total males in labor force"
|
||||
EMPLOYMENT_FEMALE_IN_LABOR_FORCE = "Total females in labor force"
|
||||
COLLEGE_ATTENDANCE_TOTAL_ENROLLED = "Total asked enrolled in college or graduate school (excludes military housing)"
|
||||
COLLEGE_NON_ATTENDANCE = "Percent of population not currently enrolled in college, graduate or professional school"
|
||||
COLLEGE_ATTENDANCE_MALE_ENROLLED = "Males enrolled in college or graduate school (excludes military housing)"
|
||||
COLLEGE_ATTENDANCE_FEMALE_ENROLLED = "Females enrolled in college or graduate school (excludes military housing)"
|
||||
COLLEGE_ATTENDANCE_POPULATION = (
|
||||
"Population enrolled in college, graduate or professional school"
|
||||
)
|
||||
COLLEGE_ATTENDANCE_PERCENT = (
|
||||
"Percent enrollment in college, graduate or professional school"
|
||||
)
|
||||
IMPUTED_COLLEGE_ATTENDANCE_PERCENT = (
|
||||
f"{COLLEGE_ATTENDANCE_PERCENT}, imputed"
|
||||
)
|
||||
COLLEGE_NON_ATTENDANCE_PERCENT = "Percent of population not currently enrolled in college, graduate or professional school"
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""This method removes the need to use the value attribute from the Enums"""
|
||||
return str.__str__(self)
|
||||
|
||||
|
||||
__FIELD_NAME_COMMON_XWALK = {
|
||||
"P1_001N": field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019,
|
||||
"PBG19_005N": DEC_FIELD_NAMES.MALE_HIGH_SCHOOL_ED,
|
||||
"PBG19_012N": DEC_FIELD_NAMES.FEMALE_HIGH_SCHOOL_ED,
|
||||
"PCT31_001N": DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_TOTAL_ENROLLED,
|
||||
"PBG32_003N": DEC_FIELD_NAMES.EMPLOYMENT_MALE_IN_LABOR_FORCE,
|
||||
"PBG32_007N": DEC_FIELD_NAMES.EMPLOYMENT_MALE_UNEMPLOYED,
|
||||
"PBG32_010N": DEC_FIELD_NAMES.EMPLOYMENT_FEMALE_IN_LABOR_FORCE,
|
||||
"PBG32_014N": DEC_FIELD_NAMES.EMPLOYMENT_FEMALE_UNEMPLOYED,
|
||||
"PCT34_003N": DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_MALE_ENROLLED,
|
||||
"PCT34_016N": DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_FEMALE_ENROLLED,
|
||||
"PBG43_001N": field_names.CENSUS_DECENNIAL_MEDIAN_INCOME_2019,
|
||||
"PBG74_001N": DEC_FIELD_NAMES.TOTAL_HOUSEHOLD_POVERTY_LEVEL,
|
||||
"PBG74_002N": DEC_FIELD_NAMES.HOUSEHOLD_POVERTY_LEVEL_UNDER_0_5,
|
||||
"PBG74_003N": DEC_FIELD_NAMES.HOUSEHOLD_POVERTY_LEVEL_UNDER_0_74,
|
||||
"PBG74_004N": DEC_FIELD_NAMES.HOUSEHOLD_POVERTY_LEVEL_UNDER_0_99,
|
||||
"PBG74_010N": DEC_FIELD_NAMES.HOUSEHOLD_POVERTY_LEVEL_OVER_2_0,
|
||||
}
|
||||
"""
|
||||
Census variable to text column name mapping. For details on Census variables see:
|
||||
https://api.census.gov/data/2020/dec/dhcas/variables.html
|
||||
https://api.census.gov/data/2020/dec/dhcgu/variables.html
|
||||
https://api.census.gov/data/2020/dec/dhcmp/variables.html
|
||||
https://api.census.gov/data/2020/dec/dhcvi/variables.html
|
||||
"""
|
||||
|
||||
# Note that the 2010 census for island areas does not break out
|
||||
# hispanic and non-hispanic white, so this is slightly different from
|
||||
# our other demographic data
|
||||
__FIELD_NAME_AS_XWALK = {
|
||||
"PCT9_001N": DEC_FIELD_NAMES.TOTAL_RACE_POPULATION,
|
||||
"PCT9_003N": DEC_FIELD_NAMES.HAWAIIAN,
|
||||
"PCT9_079N": DEC_FIELD_NAMES.ASIAN,
|
||||
"PCT9_130N": DEC_FIELD_NAMES.NON_HISPANIC_WHITE,
|
||||
"PCT9_155N": DEC_FIELD_NAMES.BLACK,
|
||||
"PCT9_180N": DEC_FIELD_NAMES.AMERICAN_INDIAN,
|
||||
"PCT9_205N": DEC_FIELD_NAMES.OTHER_RACE,
|
||||
"PCT9_230N": DEC_FIELD_NAMES.TWO_OR_MORE_RACES,
|
||||
"P5_002N": DEC_FIELD_NAMES.HISPANIC,
|
||||
}
|
||||
"""American Samoa specific race fields."""
|
||||
|
||||
__FIELD_NAME_VI_XWALK = {
|
||||
"PCT7_001N": DEC_FIELD_NAMES.TOTAL_RACE_POPULATION,
|
||||
"PCT7_003N": DEC_FIELD_NAMES.BLACK,
|
||||
"PCT7_205N": DEC_FIELD_NAMES.ASIAN,
|
||||
"PCT7_230N": DEC_FIELD_NAMES.AMERICAN_INDIAN,
|
||||
"PCT7_255N": DEC_FIELD_NAMES.HAWAIIAN,
|
||||
"PCT7_280N": DEC_FIELD_NAMES.OTHER_RACE,
|
||||
"PCT7_305N": DEC_FIELD_NAMES.TWO_OR_MORE_RACES,
|
||||
"P5_021N": DEC_FIELD_NAMES.NON_HISPANIC_WHITE,
|
||||
"PCT6_003N": DEC_FIELD_NAMES.HISPANIC,
|
||||
}
|
||||
"""US Virgin Islands specific race fields."""
|
||||
|
||||
__FIELD_NAME_GU_XWALK = {
|
||||
"PCT10_001N": DEC_FIELD_NAMES.TOTAL_RACE_POPULATION,
|
||||
"PCT10_003N": DEC_FIELD_NAMES.HAWAIIAN,
|
||||
"PCT10_204N": DEC_FIELD_NAMES.ASIAN,
|
||||
"PCT10_330N": DEC_FIELD_NAMES.BLACK,
|
||||
"PCT10_355N": DEC_FIELD_NAMES.AMERICAN_INDIAN,
|
||||
"PCT10_380N": DEC_FIELD_NAMES.OTHER_RACE,
|
||||
"PCT10_405N": DEC_FIELD_NAMES.TWO_OR_MORE_RACES,
|
||||
"P5_026N": DEC_FIELD_NAMES.NON_HISPANIC_WHITE,
|
||||
"PCT9_003N": DEC_FIELD_NAMES.HISPANIC,
|
||||
}
|
||||
"""Guam specific race fields."""
|
||||
|
||||
__FIELD_NAME_MP_XWALK = {
|
||||
"PCT9_001N": DEC_FIELD_NAMES.TOTAL_RACE_POPULATION,
|
||||
"PCT9_003N": DEC_FIELD_NAMES.ASIAN,
|
||||
"PCT9_129N": DEC_FIELD_NAMES.HAWAIIAN,
|
||||
"PCT9_330N": DEC_FIELD_NAMES.BLACK,
|
||||
"PCT9_355N": DEC_FIELD_NAMES.AMERICAN_INDIAN,
|
||||
"PCT9_380N": DEC_FIELD_NAMES.OTHER_RACE,
|
||||
"PCT9_405N": DEC_FIELD_NAMES.TWO_OR_MORE_RACES,
|
||||
"P5_002N": DEC_FIELD_NAMES.HISPANIC,
|
||||
"P5_024N": DEC_FIELD_NAMES.NON_HISPANIC_WHITE,
|
||||
}
|
||||
"""Northern Mariana Islands specific race fields."""
|
||||
|
||||
OUTPUT_RACE_FIELDS = [
|
||||
DEC_FIELD_NAMES.BLACK,
|
||||
DEC_FIELD_NAMES.AMERICAN_INDIAN,
|
||||
DEC_FIELD_NAMES.ASIAN,
|
||||
DEC_FIELD_NAMES.HAWAIIAN,
|
||||
DEC_FIELD_NAMES.TWO_OR_MORE_RACES,
|
||||
DEC_FIELD_NAMES.NON_HISPANIC_WHITE,
|
||||
DEC_FIELD_NAMES.HISPANIC,
|
||||
DEC_FIELD_NAMES.OTHER_RACE,
|
||||
]
|
||||
"""Race fields to output in the results."""
|
||||
|
||||
DEC_TERRITORY_PARAMS = [
|
||||
MappingProxyType(
|
||||
{
|
||||
"state_abbreviation": "as",
|
||||
"fips": "60",
|
||||
# https://www2.census.gov/geo/docs/reference/codes2020/cou/st60_as_cou2020.txt
|
||||
"county_fips": ("010", "020", "030", "040", "050"),
|
||||
"xwalk": MappingProxyType(
|
||||
__FIELD_NAME_COMMON_XWALK | __FIELD_NAME_AS_XWALK
|
||||
),
|
||||
# Note: we hardcode the median income for each territory in this dict,
|
||||
# because that data is hard to programmatically access.
|
||||
# https://www.ruralhealthinfo.org/states/american-samoa
|
||||
"median_income": 26352,
|
||||
}
|
||||
),
|
||||
MappingProxyType(
|
||||
{
|
||||
"state_abbreviation": "gu",
|
||||
"fips": "66",
|
||||
# https://www2.census.gov/geo/docs/reference/codes2020/cou/st66_gu_cou2020.txt
|
||||
"county_fips": ("010",),
|
||||
"xwalk": MappingProxyType(
|
||||
__FIELD_NAME_COMMON_XWALK | __FIELD_NAME_GU_XWALK
|
||||
),
|
||||
# https://www.ruralhealthinfo.org/states/guam
|
||||
# https://data.census.gov/table/DECENNIALDPGU2020.DP3?g=040XX00US66&d=DECIA%20Guam%20Demographic%20Profile
|
||||
"median_income": 58289,
|
||||
}
|
||||
),
|
||||
MappingProxyType(
|
||||
{
|
||||
"state_abbreviation": "mp",
|
||||
"fips": "69",
|
||||
# https://www2.census.gov/geo/docs/reference/codes2020/cou/st69_mp_cou2020.txt
|
||||
"county_fips": ("085", "100", "110", "120"),
|
||||
"xwalk": MappingProxyType(
|
||||
__FIELD_NAME_COMMON_XWALK | __FIELD_NAME_MP_XWALK
|
||||
),
|
||||
# https://www.ruralhealthinfo.org/states/northern-mariana
|
||||
# https://data.census.gov/table/DECENNIALDPMP2020.DP3?d=DECIA%20Commonwealth%20of%20the%20Northern%20Mariana%20Islands%20Demographic%20Profile
|
||||
"median_income": 31362,
|
||||
}
|
||||
),
|
||||
MappingProxyType(
|
||||
{
|
||||
"state_abbreviation": "vi",
|
||||
"fips": "78",
|
||||
# https://www2.census.gov/geo/docs/reference/codes2020/cou/st78_vi_cou2020.txt
|
||||
"county_fips": ("010", "020", "030"),
|
||||
"xwalk": MappingProxyType(
|
||||
__FIELD_NAME_COMMON_XWALK | __FIELD_NAME_VI_XWALK
|
||||
),
|
||||
# https://www.ruralhealthinfo.org/states/us-virgin-islands
|
||||
"median_income": 40408,
|
||||
}
|
||||
),
|
||||
]
|
||||
"""Read-only list of territories to process."""
|
|
@ -1,14 +1,24 @@
|
|||
import json
|
||||
from typing import List
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
import json
|
||||
from typing import List
|
||||
from pathlib import Path
|
||||
from data_pipeline.etl.sources.census_decennial.constants import (
|
||||
DEC_TERRITORY_PARAMS,
|
||||
DEC_FIELD_NAMES,
|
||||
OUTPUT_RACE_FIELDS,
|
||||
)
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.utils import get_module_logger
|
||||
from data_pipeline.etl.datasource import DataSource
|
||||
from data_pipeline.etl.datasource import FileDataSource
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.utils import get_module_logger
|
||||
from data_pipeline.etl.sources.census_acs.etl import CensusACSETL
|
||||
from data_pipeline.etl.sources.census_acs.etl_imputations import (
|
||||
calculate_income_measures,
|
||||
)
|
||||
|
||||
pd.options.mode.chained_assignment = "raise"
|
||||
|
||||
|
@ -16,514 +26,278 @@ logger = get_module_logger(__name__)
|
|||
|
||||
|
||||
class CensusDecennialETL(ExtractTransformLoad):
|
||||
def __init__(self):
|
||||
self.DECENNIAL_YEAR = 2010
|
||||
self.OUTPUT_PATH = (
|
||||
self.DATA_PATH
|
||||
/ "dataset"
|
||||
/ f"census_decennial_{self.DECENNIAL_YEAR}"
|
||||
)
|
||||
DECENNIAL_YEAR = 2020
|
||||
OUTPUT_PATH = (
|
||||
ExtractTransformLoad.DATA_PATH
|
||||
/ "dataset"
|
||||
/ f"census_decennial_{DECENNIAL_YEAR}"
|
||||
)
|
||||
CENSUS_GEOJSON_PATH = (
|
||||
ExtractTransformLoad.DATA_PATH / "census" / "geojson" / "us.json"
|
||||
)
|
||||
|
||||
# Income Fields
|
||||
# AS, GU, and MP all share the same variable names, but VI is different
|
||||
# https://api.census.gov/data/2010/dec/as.html
|
||||
# https://api.census.gov/data/2010/dec/gu/variables.html
|
||||
# https://api.census.gov/data/2010/dec/mp/variables.html
|
||||
# https://api.census.gov/data/2010/dec/vi/variables.html
|
||||
|
||||
# Total population field is the same in all island areas
|
||||
self.TOTAL_POP_FIELD = self.TOTAL_POP_VI_FIELD = "P001001"
|
||||
self.TOTAL_POP_FIELD_NAME = "Total population in 2009"
|
||||
|
||||
self.MEDIAN_INCOME_FIELD = "PBG049001"
|
||||
self.MEDIAN_INCOME_VI_FIELD = "PBG047001"
|
||||
self.MEDIAN_INCOME_FIELD_NAME = "Median household income in 2009 ($)"
|
||||
self.AREA_MEDIAN_INCOME_FIELD_NAME = (
|
||||
"Median household income as a percent of "
|
||||
"territory median income in 2009"
|
||||
def __get_api_url(
|
||||
self,
|
||||
state_abbreviation: str,
|
||||
name_list: List[str],
|
||||
fips: str,
|
||||
county: str,
|
||||
) -> str:
|
||||
url = (
|
||||
f"https://api.census.gov/data/{self.DECENNIAL_YEAR}/dec/dhc{state_abbreviation}?get=NAME,{name_list}"
|
||||
+ f"&for=tract:*&in=state:{fips}%20county:{county}"
|
||||
)
|
||||
|
||||
self.TERRITORY_MEDIAN_INCOME_FIELD = "Territory Median Income"
|
||||
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD = "PBG083001"
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_VI_FIELD = (
|
||||
"PBG077001"
|
||||
)
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD_NAME = (
|
||||
"TOTAL; RATIO OF INCOME TO POVERTY LEVEL IN 2009"
|
||||
)
|
||||
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_FIELD = "PBG083010"
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_VI_FIELD = "PBG077010"
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_FIELD_NAME = (
|
||||
"Total!!2.00 and over; RATIO OF INCOME TO POVERTY LEVEL IN 2009"
|
||||
)
|
||||
|
||||
self.PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL_FIELD_NAME = (
|
||||
"Percentage households below 200% of federal poverty line in 2009"
|
||||
)
|
||||
|
||||
# We will combine three fields to get households < 100% FPL.
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_ONE = (
|
||||
"PBG083002" # Total!!Under .50
|
||||
)
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_TWO = (
|
||||
"PBG083003" # Total!!.50 to .74
|
||||
)
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_THREE = (
|
||||
"PBG083004" # Total!!.75 to .99
|
||||
)
|
||||
|
||||
# Same fields, for Virgin Islands.
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_VI_PART_ONE = (
|
||||
"PBG077002" # Total!!Under .50
|
||||
)
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_VI_PART_TWO = (
|
||||
"PBG077003" # Total!!.50 to .74
|
||||
)
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_VI_PART_THREE = (
|
||||
"PBG077004" # Total!!.75 to .99
|
||||
)
|
||||
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_FIELD = "PBG083010"
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_VI_FIELD = "PBG077010"
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_FIELD_NAME = (
|
||||
"Total!!2.00 and over; RATIO OF INCOME TO POVERTY LEVEL IN 2009"
|
||||
)
|
||||
|
||||
self.PERCENTAGE_HOUSEHOLDS_BELOW_100_PERC_POVERTY_LEVEL_FIELD_NAME = (
|
||||
"Percentage households below 100% of federal poverty line in 2009"
|
||||
)
|
||||
|
||||
# High School Education Fields
|
||||
self.TOTAL_POPULATION_FIELD = "PBG026001"
|
||||
self.TOTAL_POPULATION_VI_FIELD = "PCT032001"
|
||||
self.TOTAL_POPULATION_FIELD_NAME = "Total; SEX BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER"
|
||||
|
||||
self.MALE_HIGH_SCHOOL_ED_FIELD = "PBG026005"
|
||||
self.MALE_HIGH_SCHOOL_ED_VI_FIELD = "PCT032011"
|
||||
self.MALE_HIGH_SCHOOL_ED_FIELD_NAME = (
|
||||
"Total!!Male!!High school graduate, GED, or alternative; "
|
||||
"SEX BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER"
|
||||
)
|
||||
|
||||
self.FEMALE_HIGH_SCHOOL_ED_FIELD = "PBG026012"
|
||||
self.FEMALE_HIGH_SCHOOL_ED_VI_FIELD = "PCT032028"
|
||||
self.FEMALE_HIGH_SCHOOL_ED_FIELD_NAME = (
|
||||
"Total!!Female!!High school graduate, GED, or alternative; "
|
||||
"SEX BY EDUCATIONAL ATTAINMENT FOR THE POPULATION 25 YEARS AND OVER"
|
||||
)
|
||||
|
||||
self.PERCENTAGE_HIGH_SCHOOL_ED_FIELD_NAME = "Percent individuals age 25 or over with less than high school degree in 2009"
|
||||
|
||||
# Employment fields
|
||||
self.EMPLOYMENT_MALE_IN_LABOR_FORCE_FIELD = (
|
||||
"PBG038003" # Total!!Male!!In labor force
|
||||
)
|
||||
self.EMPLOYMENT_MALE_UNEMPLOYED_FIELD = (
|
||||
"PBG038007" # Total!!Male!!In labor force!!Civilian!!Unemployed
|
||||
)
|
||||
self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_FIELD = (
|
||||
"PBG038010" # Total!!Female!!In labor force
|
||||
)
|
||||
self.EMPLOYMENT_FEMALE_UNEMPLOYED_FIELD = (
|
||||
"PBG038014" # Total!!Female!!In labor force!!Civilian!!Unemployed
|
||||
)
|
||||
|
||||
# Same fields, Virgin Islands.
|
||||
self.EMPLOYMENT_MALE_IN_LABOR_FORCE_VI_FIELD = (
|
||||
"PBG036003" # Total!!Male!!In labor force
|
||||
)
|
||||
self.EMPLOYMENT_MALE_UNEMPLOYED_VI_FIELD = (
|
||||
"PBG036007" # Total!!Male!!In labor force!!Civilian!!Unemployed
|
||||
)
|
||||
self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_VI_FIELD = (
|
||||
"PBG036010" # Total!!Female!!In labor force
|
||||
)
|
||||
self.EMPLOYMENT_FEMALE_UNEMPLOYED_VI_FIELD = (
|
||||
"PBG036014" # Total!!Female!!In labor force!!Civilian!!Unemployed
|
||||
)
|
||||
|
||||
self.UNEMPLOYMENT_FIELD_NAME = (
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
|
||||
)
|
||||
|
||||
# Race/Ethnicity fields
|
||||
self.TOTAL_RACE_POPULATION_FIELD = "PCT086001" # Total
|
||||
self.ASIAN_FIELD = "PCT086002" # Total!!Asian
|
||||
self.BLACK_FIELD = "PCT086003" # Total!!Black or African American
|
||||
self.HAWAIIAN_FIELD = (
|
||||
"PCT086004" # Total!!Native Hawaiian and Other Pacific Islander
|
||||
)
|
||||
# Note that the 2010 census for island araeas does not break out
|
||||
# hispanic and non-hispanic white, so this is slightly different from
|
||||
# our other demographic data
|
||||
self.NON_HISPANIC_WHITE_FIELD = "PCT086005" # Total!!White
|
||||
self.HISPANIC_FIELD = "PCT086006" # Total!!Hispanic or Latino
|
||||
self.OTHER_RACE_FIELD = "PCT086007" # Total!!Other Ethnic Origin or Ra
|
||||
|
||||
self.TOTAL_RACE_POPULATION_VI_FIELD = "P003001" # Total
|
||||
self.BLACK_VI_FIELD = (
|
||||
"P003003" # Total!!One race!!Black or African American alone
|
||||
)
|
||||
self.AMERICAN_INDIAN_VI_FIELD = "P003005" # Total!!One race!!American Indian and Alaska Native alone
|
||||
self.ASIAN_VI_FIELD = "P003006" # Total!!One race!!Asian alone
|
||||
self.HAWAIIAN_VI_FIELD = "P003007" # Total!!One race!!Native Hawaiian and Other Pacific Islander alone
|
||||
self.TWO_OR_MORE_RACES_VI_FIELD = "P003009" # Total!!Two or More Races
|
||||
self.NON_HISPANIC_WHITE_VI_FIELD = (
|
||||
"P005006" # Total!!Not Hispanic or Latino!!One race!!White alone
|
||||
)
|
||||
self.HISPANIC_VI_FIELD = "P005002" # Total!!Hispanic or Latino
|
||||
self.OTHER_RACE_VI_FIELD = (
|
||||
"P003008" # Total!!One race!!Some Other Race alone
|
||||
)
|
||||
self.TOTAL_RACE_POPULATION_VI_FIELD = "P003001" # Total
|
||||
|
||||
self.TOTAL_RACE_POPULATION_FIELD_NAME = (
|
||||
"Total population surveyed on racial data"
|
||||
)
|
||||
self.BLACK_FIELD_NAME = "Black or African American"
|
||||
self.AMERICAN_INDIAN_FIELD_NAME = "American Indian / Alaska Native"
|
||||
self.ASIAN_FIELD_NAME = "Asian"
|
||||
self.HAWAIIAN_FIELD_NAME = "Native Hawaiian or Pacific"
|
||||
self.TWO_OR_MORE_RACES_FIELD_NAME = "two or more races"
|
||||
self.NON_HISPANIC_WHITE_FIELD_NAME = "White"
|
||||
self.HISPANIC_FIELD_NAME = "Hispanic or Latino"
|
||||
# Note that `other` is lowercase because the whole field will show up in the download
|
||||
# file as "Percent other races"
|
||||
self.OTHER_RACE_FIELD_NAME = "other races"
|
||||
|
||||
# Name output demographics fields.
|
||||
self.RE_OUTPUT_FIELDS = [
|
||||
self.BLACK_FIELD_NAME,
|
||||
self.AMERICAN_INDIAN_FIELD_NAME,
|
||||
self.ASIAN_FIELD_NAME,
|
||||
self.HAWAIIAN_FIELD_NAME,
|
||||
self.TWO_OR_MORE_RACES_FIELD_NAME,
|
||||
self.NON_HISPANIC_WHITE_FIELD_NAME,
|
||||
self.HISPANIC_FIELD_NAME,
|
||||
self.OTHER_RACE_FIELD_NAME,
|
||||
]
|
||||
|
||||
var_list = [
|
||||
self.MEDIAN_INCOME_FIELD,
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD,
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_FIELD,
|
||||
self.TOTAL_POPULATION_FIELD,
|
||||
self.MALE_HIGH_SCHOOL_ED_FIELD,
|
||||
self.FEMALE_HIGH_SCHOOL_ED_FIELD,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_ONE,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_TWO,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_THREE,
|
||||
self.EMPLOYMENT_MALE_IN_LABOR_FORCE_FIELD,
|
||||
self.EMPLOYMENT_MALE_UNEMPLOYED_FIELD,
|
||||
self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_FIELD,
|
||||
self.EMPLOYMENT_FEMALE_UNEMPLOYED_FIELD,
|
||||
self.TOTAL_POP_FIELD,
|
||||
self.TOTAL_RACE_POPULATION_FIELD,
|
||||
self.ASIAN_FIELD,
|
||||
self.BLACK_FIELD,
|
||||
self.HAWAIIAN_FIELD,
|
||||
self.NON_HISPANIC_WHITE_FIELD,
|
||||
self.HISPANIC_FIELD,
|
||||
self.OTHER_RACE_FIELD,
|
||||
]
|
||||
var_list = ",".join(var_list)
|
||||
|
||||
var_list_vi = [
|
||||
self.MEDIAN_INCOME_VI_FIELD,
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_VI_FIELD,
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_VI_FIELD,
|
||||
self.TOTAL_POPULATION_VI_FIELD,
|
||||
self.MALE_HIGH_SCHOOL_ED_VI_FIELD,
|
||||
self.FEMALE_HIGH_SCHOOL_ED_VI_FIELD,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_VI_PART_ONE,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_VI_PART_TWO,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_VI_PART_THREE,
|
||||
self.EMPLOYMENT_MALE_IN_LABOR_FORCE_VI_FIELD,
|
||||
self.EMPLOYMENT_MALE_UNEMPLOYED_VI_FIELD,
|
||||
self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_VI_FIELD,
|
||||
self.EMPLOYMENT_FEMALE_UNEMPLOYED_VI_FIELD,
|
||||
self.TOTAL_POP_VI_FIELD,
|
||||
self.BLACK_VI_FIELD,
|
||||
self.AMERICAN_INDIAN_VI_FIELD,
|
||||
self.ASIAN_VI_FIELD,
|
||||
self.HAWAIIAN_VI_FIELD,
|
||||
self.TWO_OR_MORE_RACES_VI_FIELD,
|
||||
self.NON_HISPANIC_WHITE_VI_FIELD,
|
||||
self.HISPANIC_VI_FIELD,
|
||||
self.OTHER_RACE_VI_FIELD,
|
||||
self.TOTAL_RACE_POPULATION_VI_FIELD,
|
||||
]
|
||||
var_list_vi = ",".join(var_list_vi)
|
||||
|
||||
self.FIELD_NAME_XWALK = {
|
||||
self.MEDIAN_INCOME_FIELD: self.MEDIAN_INCOME_FIELD_NAME,
|
||||
self.MEDIAN_INCOME_VI_FIELD: self.MEDIAN_INCOME_FIELD_NAME,
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD: self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD_NAME,
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_VI_FIELD: self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD_NAME,
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_FIELD: self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_FIELD_NAME,
|
||||
self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_VI_FIELD: self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_FIELD_NAME,
|
||||
self.TOTAL_POPULATION_FIELD: self.TOTAL_POPULATION_FIELD_NAME,
|
||||
self.TOTAL_POPULATION_VI_FIELD: self.TOTAL_POPULATION_FIELD_NAME,
|
||||
self.MALE_HIGH_SCHOOL_ED_FIELD: self.MALE_HIGH_SCHOOL_ED_FIELD_NAME,
|
||||
self.MALE_HIGH_SCHOOL_ED_VI_FIELD: self.MALE_HIGH_SCHOOL_ED_FIELD_NAME,
|
||||
self.FEMALE_HIGH_SCHOOL_ED_FIELD: self.FEMALE_HIGH_SCHOOL_ED_FIELD_NAME,
|
||||
self.FEMALE_HIGH_SCHOOL_ED_VI_FIELD: self.FEMALE_HIGH_SCHOOL_ED_FIELD_NAME,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_ONE: self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_ONE,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_VI_PART_ONE: self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_ONE,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_TWO: self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_TWO,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_VI_PART_TWO: self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_TWO,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_THREE: self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_THREE,
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_VI_PART_THREE: self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_THREE,
|
||||
self.EMPLOYMENT_MALE_IN_LABOR_FORCE_VI_FIELD: self.EMPLOYMENT_MALE_IN_LABOR_FORCE_FIELD,
|
||||
self.EMPLOYMENT_MALE_UNEMPLOYED_VI_FIELD: self.EMPLOYMENT_MALE_UNEMPLOYED_FIELD,
|
||||
self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_VI_FIELD: self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_FIELD,
|
||||
self.EMPLOYMENT_FEMALE_UNEMPLOYED_VI_FIELD: self.EMPLOYMENT_FEMALE_UNEMPLOYED_FIELD,
|
||||
self.EMPLOYMENT_MALE_IN_LABOR_FORCE_FIELD: self.EMPLOYMENT_MALE_IN_LABOR_FORCE_FIELD,
|
||||
self.EMPLOYMENT_MALE_UNEMPLOYED_FIELD: self.EMPLOYMENT_MALE_UNEMPLOYED_FIELD,
|
||||
self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_FIELD: self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_FIELD,
|
||||
self.EMPLOYMENT_FEMALE_UNEMPLOYED_FIELD: self.EMPLOYMENT_FEMALE_UNEMPLOYED_FIELD,
|
||||
self.TOTAL_RACE_POPULATION_FIELD: self.TOTAL_RACE_POPULATION_FIELD_NAME,
|
||||
self.TOTAL_RACE_POPULATION_VI_FIELD: self.TOTAL_RACE_POPULATION_FIELD_NAME,
|
||||
# Note there is no American Indian data for AS/GU/MI
|
||||
self.AMERICAN_INDIAN_VI_FIELD: self.AMERICAN_INDIAN_FIELD_NAME,
|
||||
self.ASIAN_FIELD: self.ASIAN_FIELD_NAME,
|
||||
self.ASIAN_VI_FIELD: self.ASIAN_FIELD_NAME,
|
||||
self.BLACK_FIELD: self.BLACK_FIELD_NAME,
|
||||
self.BLACK_VI_FIELD: self.BLACK_FIELD_NAME,
|
||||
self.HAWAIIAN_FIELD: self.HAWAIIAN_FIELD_NAME,
|
||||
self.HAWAIIAN_VI_FIELD: self.HAWAIIAN_FIELD_NAME,
|
||||
self.TWO_OR_MORE_RACES_VI_FIELD: self.TWO_OR_MORE_RACES_FIELD_NAME,
|
||||
self.NON_HISPANIC_WHITE_FIELD: self.NON_HISPANIC_WHITE_FIELD_NAME,
|
||||
self.NON_HISPANIC_WHITE_VI_FIELD: self.NON_HISPANIC_WHITE_FIELD_NAME,
|
||||
self.HISPANIC_FIELD: self.HISPANIC_FIELD_NAME,
|
||||
self.HISPANIC_VI_FIELD: self.HISPANIC_FIELD_NAME,
|
||||
self.OTHER_RACE_FIELD: self.OTHER_RACE_FIELD_NAME,
|
||||
self.OTHER_RACE_VI_FIELD: self.OTHER_RACE_FIELD_NAME,
|
||||
}
|
||||
|
||||
# To do: Ask Census Slack Group about whether you need to hardcode the county fips
|
||||
# https://uscensusbureau.slack.com/archives/C6DGLC05B/p1635218909012600
|
||||
self.ISLAND_TERRITORIES = [
|
||||
{
|
||||
"state_abbreviation": "as",
|
||||
"fips": "60",
|
||||
"county_fips": ["010", "020", "030", "040", "050"],
|
||||
"var_list": var_list,
|
||||
# Note: we hardcode the median income for each territory in this dict,
|
||||
# because that data is hard to programmatically access.
|
||||
self.TERRITORY_MEDIAN_INCOME_FIELD: 23892,
|
||||
},
|
||||
{
|
||||
"state_abbreviation": "gu",
|
||||
"fips": "66",
|
||||
"county_fips": ["010"],
|
||||
"var_list": var_list,
|
||||
self.TERRITORY_MEDIAN_INCOME_FIELD: 48274,
|
||||
},
|
||||
{
|
||||
"state_abbreviation": "mp",
|
||||
"fips": "69",
|
||||
"county_fips": ["085", "100", "110", "120"],
|
||||
"var_list": var_list,
|
||||
self.TERRITORY_MEDIAN_INCOME_FIELD: 19958,
|
||||
},
|
||||
{
|
||||
"state_abbreviation": "vi",
|
||||
"fips": "78",
|
||||
"county_fips": ["010", "020", "030"],
|
||||
"var_list": var_list_vi,
|
||||
self.TERRITORY_MEDIAN_INCOME_FIELD: 37254,
|
||||
},
|
||||
]
|
||||
|
||||
self.API_URL = (
|
||||
"https://api.census.gov/data/{}/dec/{}?get=NAME,{}"
|
||||
+ "&for=tract:*&in=state:{}%20county:{}"
|
||||
)
|
||||
|
||||
census_api_key = os.environ.get("CENSUS_API_KEY")
|
||||
if census_api_key:
|
||||
self.API_URL = self.API_URL + f"&key={census_api_key}"
|
||||
url += f"&key={census_api_key}"
|
||||
return url
|
||||
|
||||
self.final_race_fields: List[str] = []
|
||||
def __get_destination_path(
|
||||
self,
|
||||
state_abbreviation: str,
|
||||
fips: str,
|
||||
county: str,
|
||||
test_path: Path = None,
|
||||
) -> str:
|
||||
root_path = test_path or self.get_sources_path()
|
||||
return (
|
||||
root_path
|
||||
/ str(self.DECENNIAL_YEAR)
|
||||
/ state_abbreviation
|
||||
/ fips
|
||||
/ county
|
||||
/ "census.json"
|
||||
)
|
||||
|
||||
self.df: pd.DataFrame
|
||||
self.df_vi: pd.DataFrame
|
||||
self.df_all: pd.DataFrame
|
||||
|
||||
def get_data_sources(self) -> [DataSource]:
|
||||
def __init__(self):
|
||||
self.df_all = pd.DataFrame()
|
||||
self.final_race_fields = []
|
||||
|
||||
def get_data_sources(self) -> List[DataSource]:
|
||||
sources = []
|
||||
|
||||
for island in self.ISLAND_TERRITORIES:
|
||||
for island in DEC_TERRITORY_PARAMS:
|
||||
for county in island["county_fips"]:
|
||||
|
||||
api_url = self.API_URL.format(
|
||||
self.DECENNIAL_YEAR,
|
||||
api_url = self.__get_api_url(
|
||||
island["state_abbreviation"],
|
||||
island["var_list"],
|
||||
",".join(island["xwalk"].keys()),
|
||||
island["fips"],
|
||||
county,
|
||||
)
|
||||
|
||||
sources.append(
|
||||
FileDataSource(
|
||||
source=api_url,
|
||||
destination=self.get_sources_path()
|
||||
/ str(self.DECENNIAL_YEAR)
|
||||
/ island["state_abbreviation"]
|
||||
/ island["fips"]
|
||||
/ county
|
||||
/ "census.json",
|
||||
api_url,
|
||||
self.__get_destination_path(
|
||||
island["state_abbreviation"], island["fips"], county
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
return sources
|
||||
|
||||
def extract(self, use_cached_data_sources: bool = False) -> None:
|
||||
|
||||
super().extract(
|
||||
use_cached_data_sources
|
||||
) # download and extract data sources
|
||||
|
||||
dfs = []
|
||||
dfs_vi = []
|
||||
for island in self.ISLAND_TERRITORIES:
|
||||
logger.debug(
|
||||
f"Downloading data for state/territory {island['state_abbreviation']}"
|
||||
)
|
||||
for county in island["county_fips"]:
|
||||
|
||||
def extract(
|
||||
self,
|
||||
use_cached_data_sources: bool = False,
|
||||
test_territory_params=None,
|
||||
test_path: Path = None,
|
||||
) -> None:
|
||||
super().extract(use_cached_data_sources)
|
||||
for territory in test_territory_params or DEC_TERRITORY_PARAMS:
|
||||
for county in territory["county_fips"]:
|
||||
abbr = territory["state_abbreviation"]
|
||||
file_path = self.__get_destination_path(
|
||||
abbr, territory["fips"], county, test_path=test_path
|
||||
)
|
||||
try:
|
||||
filepath = (
|
||||
self.get_sources_path()
|
||||
/ str(self.DECENNIAL_YEAR)
|
||||
/ island["state_abbreviation"]
|
||||
/ island["fips"]
|
||||
/ county
|
||||
/ "census.json"
|
||||
)
|
||||
df = json.load(filepath.open())
|
||||
except ValueError as e:
|
||||
json_data = json.load(file_path.open())
|
||||
except (FileNotFoundError, ValueError) as e:
|
||||
logger.error(
|
||||
f"Could not load content in census decennial ETL because {e}."
|
||||
)
|
||||
raise
|
||||
df = pd.DataFrame(json_data[1:], columns=json_data[0])
|
||||
# Rename the columns to their common names
|
||||
df.rename(columns=territory["xwalk"], inplace=True)
|
||||
|
||||
# First row is the header
|
||||
df = pd.DataFrame(df[1:], columns=df[0])
|
||||
# Convert columns to numeric where applicable
|
||||
for column in df.columns:
|
||||
if column not in ["state", "county", "NAME", "tract"]:
|
||||
df[column] = pd.to_numeric(df[column], errors="ignore")
|
||||
|
||||
for col in island["var_list"].split(","):
|
||||
# Converting appropriate variables to numeric.
|
||||
# Also replacing 0s with NaNs
|
||||
df[col] = pd.to_numeric(df[col])
|
||||
# Add the territory median income
|
||||
df.loc[
|
||||
df[field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019]
|
||||
> 0,
|
||||
DEC_FIELD_NAMES.TERRITORY_MEDIAN_INCOME,
|
||||
] = territory["median_income"]
|
||||
self.df_all = pd.concat([self.df_all, df], ignore_index=True)
|
||||
|
||||
# TO-DO: CHECK THIS. I think it makes sense to replace 0 with NaN
|
||||
# because for our variables of interest (e.g. Median Household Income,
|
||||
# it doesn't make sense for that to be 0.)
|
||||
# Likely, it's actually missing but can't find a cite for that in the docs
|
||||
df[col] = df[col].replace(0, np.nan)
|
||||
def _merge_tracts_2010_compatibility(self):
|
||||
"""Merges tract 69120950200 to match 2010 tracts"""
|
||||
# MP 69/120 69120950200 = 69120950201, 69120950202
|
||||
# Tract has been split, but 69120950202 has no data, so we just make 69120950200 = 69120950201
|
||||
self.df_all = self.df_all.drop(
|
||||
self.df_all[
|
||||
self.df_all[field_names.GEOID_TRACT_FIELD] == "69120950202"
|
||||
].index
|
||||
)
|
||||
self.df_all.loc[
|
||||
self.df_all[field_names.GEOID_TRACT_FIELD] == "69120950201",
|
||||
field_names.GEOID_TRACT_FIELD,
|
||||
] = "69120950200"
|
||||
|
||||
if island["state_abbreviation"] == "vi":
|
||||
dfs_vi.append(df)
|
||||
else:
|
||||
dfs.append(df)
|
||||
def _impute_income(self, geojson_path: Path):
|
||||
"""Impute income for both income measures."""
|
||||
# Merges Census geojson to imput values from.
|
||||
logger.debug(f"Reading GeoJSON from {geojson_path}")
|
||||
geo_df = gpd.read_file(geojson_path)
|
||||
self.df_all = CensusACSETL.merge_geojson(
|
||||
df=self.df_all,
|
||||
usa_geo_df=geo_df,
|
||||
)
|
||||
|
||||
self.df = pd.concat(dfs)
|
||||
self.df_vi = pd.concat(dfs_vi)
|
||||
logger.debug("Imputing income information")
|
||||
impute_var_named_tup_list = [
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019,
|
||||
imputed_field_name=DEC_FIELD_NAMES.IMPUTED_PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL,
|
||||
),
|
||||
]
|
||||
self.df_all = calculate_income_measures(
|
||||
impute_var_named_tup_list=impute_var_named_tup_list,
|
||||
geo_df=self.df_all,
|
||||
geoid_field=self.GEOID_TRACT_FIELD_NAME,
|
||||
population_field=field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
# Rename All Fields
|
||||
self.df.rename(columns=self.FIELD_NAME_XWALK, inplace=True)
|
||||
self.df_vi.rename(columns=self.FIELD_NAME_XWALK, inplace=True)
|
||||
logger.debug("Calculating with imputed values")
|
||||
self.df_all[
|
||||
field_names.CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019
|
||||
] = (
|
||||
self.df_all[
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019
|
||||
].fillna(
|
||||
self.df_all[
|
||||
DEC_FIELD_NAMES.IMPUTED_PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL
|
||||
]
|
||||
)
|
||||
# Use clip to ensure that the values are not negative
|
||||
).clip(
|
||||
lower=0
|
||||
)
|
||||
|
||||
# Combine the dfs after renaming
|
||||
self.df_all = pd.concat([self.df, self.df_vi])
|
||||
# All values should have a value at this point for tracts with >0 population
|
||||
assert (
|
||||
self.df_all[
|
||||
self.df_all[
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019
|
||||
]
|
||||
>= 1
|
||||
][
|
||||
field_names.CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019
|
||||
]
|
||||
.isna()
|
||||
.sum()
|
||||
== 0
|
||||
), "Error: not all values were filled with imputations..."
|
||||
|
||||
# Rename total population:
|
||||
self.df_all[self.TOTAL_POP_FIELD_NAME] = self.df_all[
|
||||
self.TOTAL_POP_FIELD
|
||||
# We generate a boolean that is TRUE when there is an imputed income but not a baseline income, and FALSE otherwise.
|
||||
# This allows us to see which tracts have an imputed income.
|
||||
self.df_all[field_names.ISLAND_AREAS_IMPUTED_INCOME_FLAG_FIELD] = (
|
||||
self.df_all[
|
||||
field_names.CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019
|
||||
].notna()
|
||||
& self.df_all[
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019
|
||||
].isna()
|
||||
)
|
||||
|
||||
def transform(self, geojson_path: Path = CENSUS_GEOJSON_PATH) -> None:
|
||||
# Creating Geo ID (Census Block Group) Field Name
|
||||
self.df_all[field_names.GEOID_TRACT_FIELD] = (
|
||||
self.df_all["state"] + self.df_all["county"] + self.df_all["tract"]
|
||||
)
|
||||
|
||||
# Combine the two MP 2020 tracts that were split from one 2010 tract
|
||||
self._merge_tracts_2010_compatibility()
|
||||
|
||||
# Replace invalid numeric values with NaN
|
||||
numeric_columns = self.df_all.select_dtypes(include="number").columns
|
||||
for num_column in numeric_columns:
|
||||
self.df_all.loc[self.df_all[num_column] < -999, num_column] = np.nan
|
||||
|
||||
# Percentage of households below 100% FPL
|
||||
self.df_all[
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019
|
||||
] = (
|
||||
self.df_all[DEC_FIELD_NAMES.HOUSEHOLD_POVERTY_LEVEL_UNDER_0_5]
|
||||
+ self.df_all[DEC_FIELD_NAMES.HOUSEHOLD_POVERTY_LEVEL_UNDER_0_74]
|
||||
+ self.df_all[DEC_FIELD_NAMES.HOUSEHOLD_POVERTY_LEVEL_UNDER_0_99]
|
||||
) / self.df_all[
|
||||
DEC_FIELD_NAMES.TOTAL_HOUSEHOLD_POVERTY_LEVEL
|
||||
]
|
||||
|
||||
# Percentage of households below 200% which is
|
||||
# [PBG083001 (total) - PBG083010 (num households over 200%)] / PBG083001 (total)
|
||||
self.df_all[
|
||||
self.PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL_FIELD_NAME
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019
|
||||
] = (
|
||||
self.df_all[
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD_NAME
|
||||
]
|
||||
- self.df_all[self.HOUSEHOLD_OVER_200_PERC_POVERTY_LEVEL_FIELD_NAME]
|
||||
self.df_all[DEC_FIELD_NAMES.TOTAL_HOUSEHOLD_POVERTY_LEVEL]
|
||||
- self.df_all[DEC_FIELD_NAMES.HOUSEHOLD_POVERTY_LEVEL_OVER_2_0]
|
||||
) / self.df_all[
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD_NAME
|
||||
]
|
||||
|
||||
# Percentage of households below 100% FPL
|
||||
# which we get by adding `Total!!Under .50`, `Total!!.50 to .74`, ` Total!!.75 to .99`,
|
||||
# and then dividing by PBG083001 (total)
|
||||
self.df_all[
|
||||
self.PERCENTAGE_HOUSEHOLDS_BELOW_100_PERC_POVERTY_LEVEL_FIELD_NAME
|
||||
] = (
|
||||
self.df_all[
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_ONE
|
||||
]
|
||||
+ self.df_all[
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_TWO
|
||||
]
|
||||
+ self.df_all[
|
||||
self.HOUSEHOLD_UNDER_100_PERC_POVERTY_LEVEL_FIELD_PART_THREE
|
||||
]
|
||||
) / self.df_all[
|
||||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD_NAME
|
||||
DEC_FIELD_NAMES.TOTAL_HOUSEHOLD_POVERTY_LEVEL
|
||||
]
|
||||
|
||||
# Percentage High School Achievement is
|
||||
# Percentage = (Male + Female) / (Total)
|
||||
self.df_all[self.PERCENTAGE_HIGH_SCHOOL_ED_FIELD_NAME] = (
|
||||
self.df_all[self.MALE_HIGH_SCHOOL_ED_FIELD_NAME]
|
||||
+ self.df_all[self.FEMALE_HIGH_SCHOOL_ED_FIELD_NAME]
|
||||
) / self.df_all[self.TOTAL_POPULATION_FIELD_NAME]
|
||||
self.df_all[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2019] = (
|
||||
self.df_all[DEC_FIELD_NAMES.MALE_HIGH_SCHOOL_ED]
|
||||
+ self.df_all[DEC_FIELD_NAMES.FEMALE_HIGH_SCHOOL_ED]
|
||||
) / self.df_all[
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019
|
||||
]
|
||||
|
||||
# Calculate employment.
|
||||
self.df_all[self.UNEMPLOYMENT_FIELD_NAME] = (
|
||||
self.df_all[self.EMPLOYMENT_MALE_UNEMPLOYED_FIELD]
|
||||
+ self.df_all[self.EMPLOYMENT_FEMALE_UNEMPLOYED_FIELD]
|
||||
self.df_all[field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019] = (
|
||||
self.df_all[DEC_FIELD_NAMES.EMPLOYMENT_MALE_UNEMPLOYED]
|
||||
+ self.df_all[DEC_FIELD_NAMES.EMPLOYMENT_FEMALE_UNEMPLOYED]
|
||||
) / (
|
||||
self.df_all[self.EMPLOYMENT_MALE_IN_LABOR_FORCE_FIELD]
|
||||
+ self.df_all[self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_FIELD]
|
||||
self.df_all[DEC_FIELD_NAMES.EMPLOYMENT_MALE_IN_LABOR_FORCE]
|
||||
+ self.df_all[DEC_FIELD_NAMES.EMPLOYMENT_FEMALE_IN_LABOR_FORCE]
|
||||
)
|
||||
|
||||
# Calculate area median income
|
||||
median_income_df = pd.DataFrame(self.ISLAND_TERRITORIES)
|
||||
median_income_df = median_income_df[
|
||||
["fips", self.TERRITORY_MEDIAN_INCOME_FIELD]
|
||||
]
|
||||
self.df_all = self.df_all.merge(
|
||||
right=median_income_df, left_on="state", right_on="fips", how="left"
|
||||
)
|
||||
self.df_all[self.AREA_MEDIAN_INCOME_FIELD_NAME] = (
|
||||
self.df_all[self.MEDIAN_INCOME_FIELD_NAME]
|
||||
/ self.df_all[self.TERRITORY_MEDIAN_INCOME_FIELD]
|
||||
self.df_all[
|
||||
field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019
|
||||
] = (
|
||||
self.df_all[field_names.CENSUS_DECENNIAL_MEDIAN_INCOME_2019]
|
||||
/ self.df_all[DEC_FIELD_NAMES.TERRITORY_MEDIAN_INCOME]
|
||||
)
|
||||
|
||||
# Creating Geo ID (Census Block Group) Field Name
|
||||
self.df_all[self.GEOID_TRACT_FIELD_NAME] = (
|
||||
self.df_all["state"] + self.df_all["county"] + self.df_all["tract"]
|
||||
# Calculate college attendance
|
||||
self.df_all[DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_POPULATION] = (
|
||||
self.df_all[DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_MALE_ENROLLED]
|
||||
+ self.df_all[DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_FEMALE_ENROLLED]
|
||||
)
|
||||
self.df_all[DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_PERCENT] = (
|
||||
self.df_all[DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_MALE_ENROLLED]
|
||||
+ self.df_all[DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_FEMALE_ENROLLED]
|
||||
) / self.df_all[DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_TOTAL_ENROLLED]
|
||||
self.df_all[DEC_FIELD_NAMES.COLLEGE_NON_ATTENDANCE_PERCENT] = (
|
||||
1 - self.df_all[DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_PERCENT]
|
||||
)
|
||||
|
||||
# Calculate stats by race
|
||||
for race_field_name in self.RE_OUTPUT_FIELDS:
|
||||
for race_field_name in OUTPUT_RACE_FIELDS:
|
||||
output_field_name = (
|
||||
field_names.PERCENT_PREFIX
|
||||
+ race_field_name
|
||||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX
|
||||
# 2010 vs 2020 WARNING
|
||||
# We must keep the old 2009 date to make it compatible with all the other 2010 data
|
||||
+ f" in {field_names.DEC_DATA_YEAR}"
|
||||
)
|
||||
self.final_race_fields.append(output_field_name)
|
||||
self.df_all[output_field_name] = (
|
||||
self.df_all[race_field_name]
|
||||
/ self.df_all[self.TOTAL_RACE_POPULATION_FIELD_NAME]
|
||||
/ self.df_all[DEC_FIELD_NAMES.TOTAL_RACE_POPULATION]
|
||||
)
|
||||
self.final_race_fields.append(output_field_name)
|
||||
|
||||
# Reporting Missing Values
|
||||
for col in self.df_all.columns:
|
||||
|
@ -532,22 +306,27 @@ class CensusDecennialETL(ExtractTransformLoad):
|
|||
f"There are {missing_value_count} missing values in the field {col} out of a total of {self.df_all.shape[0]} rows"
|
||||
)
|
||||
|
||||
self._impute_income(geojson_path)
|
||||
|
||||
def load(self) -> None:
|
||||
# mkdir census
|
||||
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
columns_to_include = [
|
||||
self.GEOID_TRACT_FIELD_NAME,
|
||||
self.TOTAL_POP_FIELD_NAME,
|
||||
self.MEDIAN_INCOME_FIELD_NAME,
|
||||
self.TERRITORY_MEDIAN_INCOME_FIELD,
|
||||
self.AREA_MEDIAN_INCOME_FIELD_NAME,
|
||||
self.PERCENTAGE_HOUSEHOLDS_BELOW_100_PERC_POVERTY_LEVEL_FIELD_NAME,
|
||||
self.PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL_FIELD_NAME,
|
||||
self.PERCENTAGE_HIGH_SCHOOL_ED_FIELD_NAME,
|
||||
self.UNEMPLOYMENT_FIELD_NAME,
|
||||
field_names.GEOID_TRACT_FIELD,
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_MEDIAN_INCOME_2019,
|
||||
DEC_FIELD_NAMES.TERRITORY_MEDIAN_INCOME,
|
||||
field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019,
|
||||
DEC_FIELD_NAMES.IMPUTED_PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL,
|
||||
field_names.CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2019,
|
||||
DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_PERCENT,
|
||||
DEC_FIELD_NAMES.COLLEGE_NON_ATTENDANCE,
|
||||
DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_POPULATION,
|
||||
field_names.ISLAND_AREAS_IMPUTED_INCOME_FLAG_FIELD,
|
||||
] + self.final_race_fields
|
||||
|
||||
self.df_all[columns_to_include].to_csv(
|
||||
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
|
||||
)
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -4,12 +4,16 @@ ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
|
|||
ADJACENT_MEAN_SUFFIX = " (based on adjacency index and low income alone)"
|
||||
ADJACENCY_INDEX_SUFFIX = " (average of neighbors)"
|
||||
ISLAND_AREA_BACKFILL_SUFFIX = " in 2009"
|
||||
V1_0_RESULTS_SUFFIX = " v1.0"
|
||||
|
||||
# Geographic field names
|
||||
GEOID_TRACT_FIELD = "GEOID10_TRACT"
|
||||
STATE_FIELD = "State/Territory"
|
||||
COUNTY_FIELD = "County Name"
|
||||
|
||||
# Census per-tract land area field name
|
||||
LAND_AREA_FIELD = "ALAND10"
|
||||
|
||||
# Definition Narwhal fields
|
||||
SCORE_N_COMMUNITIES = "Definition N (communities)"
|
||||
N_CLIMATE = "Climate Factor (Definition N)"
|
||||
|
@ -24,6 +28,10 @@ N_NON_WORKFORCE = "Any Non-Workforce Factor (Definition N)"
|
|||
FINAL_SCORE_N_BOOLEAN = (
|
||||
"Definition N community, including adjacency index tracts"
|
||||
)
|
||||
FINAL_SCORE_N_BOOLEAN_V1_0 = f"{FINAL_SCORE_N_BOOLEAN}{V1_0_RESULTS_SUFFIX}"
|
||||
GRANDFATHERED_N_COMMUNITIES_V1_0 = (
|
||||
f"Grandfathered {SCORE_N_COMMUNITIES} from v1.0"
|
||||
)
|
||||
|
||||
PERCENTILE = 90
|
||||
MEDIAN_HOUSE_VALUE_PERCENTILE = 90
|
||||
|
@ -182,19 +190,27 @@ AGGREGATION_POPULATION_FIELD = "Population Characteristics"
|
|||
UNDER_5_FIELD = "Individuals under 5 years old"
|
||||
OVER_64_FIELD = "Individuals over 64 years old"
|
||||
|
||||
# Fields from 2010 decennial census (generally only loaded for the territories)
|
||||
CENSUS_DECENNIAL_MEDIAN_INCOME_2009 = "Median household income in 2009 ($)"
|
||||
CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009 = (
|
||||
"Percentage households below 100% of federal poverty line in 2009"
|
||||
# Fields from 2020 decennial census (generally only loaded for the territories)
|
||||
# 2010 vs 2020 WARNING
|
||||
# We must keep the old 2009 date to make it compatible with all the other 2010 data
|
||||
DEC_DATA_YEAR = "2009"
|
||||
CENSUS_DECENNIAL_MEDIAN_INCOME_2019 = (
|
||||
f"Median household income in {DEC_DATA_YEAR} ($)"
|
||||
)
|
||||
CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009 = "Percent individuals age 25 or over with less than high school degree in 2009"
|
||||
CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 = "Unemployment (percent) in 2009"
|
||||
CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009 = "Total population in 2009"
|
||||
CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = (
|
||||
"Median household income as a percent of territory median income in 2009"
|
||||
CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019 = f"Percentage households below 100% of federal poverty line in {DEC_DATA_YEAR}"
|
||||
CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019 = f"Percentage households below 200% of federal poverty line in {DEC_DATA_YEAR}"
|
||||
CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019 = f"{CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019}, adjusted and imputed"
|
||||
CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2019 = f"Percent individuals age 25 or over with less than high school degree in {DEC_DATA_YEAR}"
|
||||
CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019 = (
|
||||
f"Unemployment (percent) in {DEC_DATA_YEAR}"
|
||||
)
|
||||
LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = "Low median household income as a percent of territory median income in 2009"
|
||||
# Fields from 2010 ACS (loaded for comparison with the territories)
|
||||
CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019 = (
|
||||
f"Total population in {DEC_DATA_YEAR}"
|
||||
)
|
||||
CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019 = f"Median household income as a percent of territory median income in {DEC_DATA_YEAR}"
|
||||
LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019 = f"Low median household income as a percent of territory median income in {DEC_DATA_YEAR}"
|
||||
|
||||
# # Fields from 2010 ACS (loaded for comparison with the territories)
|
||||
CENSUS_UNEMPLOYMENT_FIELD_2010 = "Unemployment (percent) in 2010"
|
||||
CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = (
|
||||
"Percent of individuals less than 100% Federal Poverty Line in 2010"
|
||||
|
@ -226,6 +242,10 @@ COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = (
|
|||
"Percentage households below 100% of federal poverty line in 2009 (island areas) "
|
||||
"and 2010 (states and PR)"
|
||||
)
|
||||
COMBINED_POVERTY_LESS_THAN_200_FPL_FIELD_2010 = (
|
||||
"Percentage households below 200% of federal poverty line in 2009 (island areas) "
|
||||
"and 2010 (states and PR)"
|
||||
)
|
||||
|
||||
# Urban Rural Map
|
||||
URBAN_HEURISTIC_FIELD = "Urban Heuristic Flag"
|
||||
|
@ -691,11 +711,13 @@ LOW_MEDIAN_INCOME_PCTILE_THRESHOLD = (
|
|||
f"percent of area median income"
|
||||
)
|
||||
ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD = (
|
||||
f"{LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
||||
f"{LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019} exceeds "
|
||||
f"{PERCENTILE}th percentile"
|
||||
)
|
||||
ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD = f"{CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009} exceeds {PERCENTILE}th percentile"
|
||||
ISLAND_POVERTY_PCTILE_THRESHOLD = f"{CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009} exceeds {PERCENTILE}th percentile"
|
||||
ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD = f"{CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019} exceeds {PERCENTILE}th percentile"
|
||||
ISLAND_POVERTY_PCTILE_THRESHOLD = f"{CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019} exceeds {PERCENTILE}th percentile"
|
||||
# Low Income Island Areas
|
||||
ISLAND_AREAS_IMPUTED_INCOME_FLAG_FIELD = f"Income data has been estimated based on neighbor income{ISLAND_AREAS_SUFFIX}"
|
||||
|
||||
# Not currently used in a factor
|
||||
EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreA(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score A")
|
||||
self.df[field_names.SCORE_A] = self.df[
|
||||
[
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
].mean(axis=1)
|
||||
return self.df
|
|
@ -1,21 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreB(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score B")
|
||||
self.df[field_names.SCORE_B] = (
|
||||
self.df[
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
* self.df[
|
||||
field_names.HIGH_SCHOOL_ED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
)
|
||||
return self.df
|
|
@ -1,102 +0,0 @@
|
|||
from collections import namedtuple
|
||||
|
||||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreC(Score):
|
||||
def __init__(self, df: pd.DataFrame) -> None:
|
||||
Bucket = namedtuple(typename="Bucket", field_names=["name", "fields"])
|
||||
|
||||
# Note: we use percentiles for every field below.
|
||||
# To do so, we add the percentile suffix to all the field names.
|
||||
self.BUCKET_SOCIOECONOMIC = Bucket(
|
||||
field_names.C_SOCIOECONOMIC,
|
||||
[
|
||||
field_names.HOUSEHOLDS_LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
],
|
||||
)
|
||||
self.BUCKET_SENSITIVE = Bucket(
|
||||
field_names.C_SENSITIVE,
|
||||
[
|
||||
field_names.UNDER_5_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.OVER_64_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
],
|
||||
)
|
||||
self.BUCKET_ENVIRONMENTAL = Bucket(
|
||||
field_names.C_ENVIRONMENTAL,
|
||||
[
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
],
|
||||
)
|
||||
self.BUCKET_EXPOSURES = Bucket(
|
||||
field_names.C_EXPOSURES,
|
||||
[
|
||||
field_names.AIR_TOXICS_CANCER_RISK_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.RESPIRATORY_HAZARD_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.OZONE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
],
|
||||
)
|
||||
self.BUCKETS = [
|
||||
self.BUCKET_SOCIOECONOMIC,
|
||||
self.BUCKET_SENSITIVE,
|
||||
self.BUCKET_ENVIRONMENTAL,
|
||||
self.BUCKET_EXPOSURES,
|
||||
]
|
||||
super().__init__(df)
|
||||
|
||||
# "CalEnviroScreen for the US" score
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score C")
|
||||
# Average all the percentile values in each bucket into a single score for each of the four buckets.
|
||||
for bucket in self.BUCKETS:
|
||||
self.df[bucket.name] = self.df[bucket.fields].mean(axis=1)
|
||||
|
||||
# Combine the score from the two Exposures and Environmental Effects buckets
|
||||
# into a single score called "Pollution Burden".
|
||||
# The math for this score is:
|
||||
# (1.0 * Exposures Score + 0.5 * Environment Effects score) / 1.5.
|
||||
self.df[field_names.AGGREGATION_POLLUTION_FIELD] = (
|
||||
1.0 * self.df[self.BUCKET_EXPOSURES.name]
|
||||
+ 0.5 * self.df[self.BUCKET_ENVIRONMENTAL.name]
|
||||
) / 1.5
|
||||
|
||||
# Average the score from the two Sensitive populations and
|
||||
# Socioeconomic factors buckets into a single score called
|
||||
# "Population Characteristics".
|
||||
self.df[field_names.AGGREGATION_POPULATION_FIELD] = self.df[
|
||||
[self.BUCKET_SENSITIVE.name, self.BUCKET_SOCIOECONOMIC.name]
|
||||
].mean(axis=1)
|
||||
|
||||
# Multiply the "Pollution Burden" score and the "Population Characteristics"
|
||||
# together to produce the cumulative impact score.
|
||||
self.df[field_names.SCORE_C] = (
|
||||
self.df[field_names.AGGREGATION_POLLUTION_FIELD]
|
||||
* self.df[field_names.AGGREGATION_POPULATION_FIELD]
|
||||
)
|
||||
return self.df
|
|
@ -1,34 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreD(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Scores D and E")
|
||||
fields_to_use_in_score = [
|
||||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.LINGUISTIC_ISO_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.POVERTY_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
]
|
||||
|
||||
fields_min_max = [
|
||||
f"{field}{field_names.MIN_MAX_FIELD_SUFFIX}"
|
||||
for field in fields_to_use_in_score
|
||||
]
|
||||
fields_percentile = [
|
||||
f"{field}{field_names.PERCENTILE_FIELD_SUFFIX}"
|
||||
for field in fields_to_use_in_score
|
||||
]
|
||||
|
||||
# Calculate "Score D", which uses min-max normalization
|
||||
# and calculate "Score E", which uses percentile normalization for the same fields
|
||||
self.df[field_names.SCORE_D] = self.df[fields_min_max].mean(axis=1)
|
||||
self.df[field_names.SCORE_E] = self.df[fields_percentile].mean(axis=1)
|
||||
|
||||
return self.df
|
|
@ -1,97 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreF(Score):
|
||||
# TODO Make variables and constants clearer (meaning and type)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score F")
|
||||
ami_and_high_school_field = "Low AMI, Low HS graduation"
|
||||
meets_socio_field = "Meets socioeconomic criteria"
|
||||
meets_burden_field = "Meets burden criteria"
|
||||
|
||||
self.df[ami_and_high_school_field] = (
|
||||
self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD] < 0.80
|
||||
) & (self.df[field_names.HIGH_SCHOOL_ED_FIELD] > 0.2)
|
||||
|
||||
self.df[meets_socio_field] = (
|
||||
self.df[ami_and_high_school_field]
|
||||
| (self.df[field_names.POVERTY_FIELD] > 0.40)
|
||||
| (self.df[field_names.LINGUISTIC_ISO_FIELD] > 0.10)
|
||||
| (self.df[field_names.HIGH_SCHOOL_ED_FIELD] > 0.4)
|
||||
)
|
||||
|
||||
self.df[meets_burden_field] = (
|
||||
(
|
||||
self.df[
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.RESPIRATORY_HAZARD_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.TRAFFIC_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.ASTHMA_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.CANCER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.DIABETES_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
)
|
||||
|
||||
self.df[field_names.SCORE_F_COMMUNITIES] = (
|
||||
self.df[meets_socio_field] & self.df[meets_burden_field]
|
||||
)
|
||||
|
||||
return self.df
|
|
@ -1,34 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreG(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score G")
|
||||
|
||||
high_school_cutoff_threshold = 0.05
|
||||
|
||||
# Score G is now modified NMTC
|
||||
self.df[field_names.SCORE_G_COMMUNITIES] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.8)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
) | (
|
||||
(self.df[field_names.POVERTY_LESS_THAN_100_FPL_FIELD] > 0.20)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
)
|
||||
self.df[field_names.SCORE_G] = self.df[
|
||||
field_names.SCORE_G_COMMUNITIES
|
||||
].astype(int)
|
||||
self.df["Score G (percentile)"] = self.df[field_names.SCORE_G]
|
||||
|
||||
return self.df
|
|
@ -1,32 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreH(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score H")
|
||||
|
||||
high_school_cutoff_threshold = 0.06
|
||||
|
||||
self.df[field_names.SCORE_H_COMMUNITIES] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.8)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
) | (
|
||||
(self.df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD] > 0.40)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
)
|
||||
self.df[field_names.SCORE_H] = self.df[
|
||||
field_names.SCORE_H_COMMUNITIES
|
||||
].astype(int)
|
||||
|
||||
return self.df
|
|
@ -1,33 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreI(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score I")
|
||||
|
||||
high_school_cutoff_threshold = 0.05
|
||||
|
||||
self.df[field_names.SCORE_I_COMMUNITIES] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.7)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
) | (
|
||||
(self.df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD] > 0.50)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
)
|
||||
self.df[field_names.SCORE_I] = self.df[
|
||||
field_names.SCORE_I_COMMUNITIES
|
||||
].astype(int)
|
||||
self.df["Score I (percentile)"] = self.df[field_names.SCORE_I]
|
||||
|
||||
return self.df
|
|
@ -1,33 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreK(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score K")
|
||||
|
||||
high_school_cutoff_threshold = 0.06
|
||||
|
||||
self.df[field_names.SCORE_K] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.8)
|
||||
) | (self.df[field_names.POVERTY_LESS_THAN_100_FPL_FIELD] > 0.20)
|
||||
|
||||
self.df[field_names.SCORE_K_COMMUNITIES] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.8)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
) | (
|
||||
(self.df[field_names.POVERTY_LESS_THAN_100_FPL_FIELD] > 0.20)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
)
|
||||
|
||||
return self.df
|
|
@ -1,690 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreL(Score):
|
||||
def __init__(self, df: pd.DataFrame) -> None:
|
||||
self.LOW_INCOME_THRESHOLD: float = 0.65
|
||||
self.ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
|
||||
self.MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
|
||||
self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD: float = 0.10
|
||||
|
||||
super().__init__(df)
|
||||
|
||||
def _combine_island_areas_with_states_and_set_thresholds(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
column_from_island_areas: str,
|
||||
column_from_decennial_census: str,
|
||||
combined_column_name: str,
|
||||
threshold_cutoff_for_island_areas: float,
|
||||
) -> (pd.DataFrame, str):
|
||||
"""Steps to set thresholds for island areas.
|
||||
|
||||
This function is fairly logically complicated. It takes the following steps:
|
||||
|
||||
1. Combine the two different fields into a single field.
|
||||
2. Calculate the 90th percentile cutoff raw value for the combined field.
|
||||
3. Create a boolean series that is true for any census tract in the island
|
||||
areas (and only the island areas) that exceeds this cutoff.
|
||||
|
||||
For step one, it combines data that is either the island area's Decennial Census
|
||||
value in 2009 or the state's value in 5-year ACS ending in 2010.
|
||||
|
||||
This will be used to generate the percentile cutoff for the 90th percentile.
|
||||
|
||||
The stateside decennial census stopped asking economic comparisons,
|
||||
so this is as close to apples-to-apples as we get. We use 5-year ACS for data
|
||||
robustness over 1-year ACS.
|
||||
"""
|
||||
# Create the combined field.
|
||||
# TODO: move this combined field percentile calculation to `etl_score`,
|
||||
# since most other percentile logic is there.
|
||||
# There should only be one entry in either 2009 or 2019 fields, not one in both.
|
||||
# But just to be safe, we take the mean and ignore null values so if there
|
||||
# *were* entries in both, this result would make sense.
|
||||
df[combined_column_name] = df[
|
||||
[column_from_island_areas, column_from_decennial_census]
|
||||
].mean(axis=1, skipna=True)
|
||||
|
||||
logger.debug(
|
||||
f"Combined field `{combined_column_name}` has "
|
||||
f"{df[combined_column_name].isnull().sum()} "
|
||||
f"({df[combined_column_name].isnull().sum() * 100 / len(df):.2f}%) "
|
||||
f"missing values for census tracts. "
|
||||
)
|
||||
|
||||
# Calculate the percentile threshold raw value.
|
||||
raw_threshold = np.nanquantile(
|
||||
a=df[combined_column_name], q=threshold_cutoff_for_island_areas
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"For combined field `{combined_column_name}`, "
|
||||
f"the {threshold_cutoff_for_island_areas*100:.0f} percentile cutoff is a "
|
||||
f"raw value of {raw_threshold:.3f}."
|
||||
)
|
||||
|
||||
threshold_column_name = (
|
||||
f"{column_from_island_areas} exceeds "
|
||||
f"{threshold_cutoff_for_island_areas*100:.0f}th percentile"
|
||||
)
|
||||
|
||||
df[threshold_column_name] = (
|
||||
df[column_from_island_areas] >= raw_threshold
|
||||
)
|
||||
|
||||
percent_of_tracts_highlighted = (
|
||||
100
|
||||
* df[threshold_column_name].sum()
|
||||
/ df[column_from_island_areas].notnull().sum()
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"For `{threshold_column_name}`, "
|
||||
f"{df[threshold_column_name].sum()} ("
|
||||
f"{percent_of_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
)
|
||||
|
||||
return df, threshold_column_name
|
||||
|
||||
def _create_low_income_threshold(self, df: pd.DataFrame) -> pd.Series:
|
||||
"""
|
||||
Returns a pandas series (really a numpy array)
|
||||
of booleans based on the condition of the FPL at 200%
|
||||
is at or more than some established threshold
|
||||
"""
|
||||
return (
|
||||
df[
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.LOW_INCOME_THRESHOLD
|
||||
)
|
||||
|
||||
def _increment_total_eligibility_exceeded(
|
||||
self, columns_for_subset: list
|
||||
) -> None:
|
||||
"""
|
||||
Increments the total eligible factors for a given tract
|
||||
"""
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] += self.df[columns_for_subset].sum(
|
||||
axis=1, skipna=True
|
||||
)
|
||||
|
||||
def _climate_factor(self) -> bool:
|
||||
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
climate_eligibility_columns = [
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
expected_population_loss_threshold = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
expected_agriculture_loss_threshold = (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
expected_building_loss_threshold = (
|
||||
self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_population_loss_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_agriculture_loss_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_building_loss_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(climate_eligibility_columns)
|
||||
|
||||
return self.df[climate_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _energy_factor(self) -> bool:
|
||||
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
energy_eligibility_columns = [
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
energy_burden_threshold = (
|
||||
self.df[
|
||||
field_names.ENERGY_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
pm25_threshold = (
|
||||
self.df[
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.PM25_EXPOSURE_LOW_INCOME_FIELD] = (
|
||||
pm25_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.ENERGY_BURDEN_LOW_INCOME_FIELD] = (
|
||||
energy_burden_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(energy_eligibility_columns)
|
||||
|
||||
return self.df[energy_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _transportation_factor(self) -> bool:
|
||||
# In Xth percentile or above for diesel particulate matter (Source: EPA National Air Toxics Assessment (NATA)
|
||||
# or
|
||||
# In Xth percentile or above for PM 2.5 (Source: EPA, Office of Air and Radiation (OAR) fusion of model and monitor data)]
|
||||
# or
|
||||
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
transportion_eligibility_columns = [
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
diesel_threshold = (
|
||||
self.df[
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
traffic_threshold = (
|
||||
self.df[
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD] = (
|
||||
diesel_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD] = (
|
||||
traffic_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
transportion_eligibility_columns
|
||||
)
|
||||
|
||||
return self.df[transportion_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _housing_factor(self) -> bool:
|
||||
# (
|
||||
# In Xth percentile or above for lead paint (Source: Census's American Community Survey’s
|
||||
# percent of housing units built pre-1960, used as an indicator of potential lead paint exposure in homes)
|
||||
# AND
|
||||
# In Yth percentile or below for Median House Value (Source: Census's American Community Survey)
|
||||
# )
|
||||
# or
|
||||
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
housing_eligibility_columns = [
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
lead_paint_median_home_value_threshold = (
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
) & (
|
||||
self.df[
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
|
||||
housing_burden_threshold = (
|
||||
self.df[
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
# series by series indicators
|
||||
self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = (
|
||||
lead_paint_median_home_value_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_BURDEN_LOW_INCOME_FIELD] = (
|
||||
housing_burden_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(housing_eligibility_columns)
|
||||
|
||||
return self.df[housing_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _pollution_factor(self) -> bool:
|
||||
# Proximity to Risk Management Plan sites is > X
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
pollution_eligibility_columns = [
|
||||
field_names.RMP_LOW_INCOME_FIELD,
|
||||
field_names.SUPERFUND_LOW_INCOME_FIELD,
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
rmp_sites_threshold = (
|
||||
self.df[field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
npl_sites_threshold = (
|
||||
self.df[field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
tsdf_sites_threshold = (
|
||||
self.df[
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
# individual series-by-series
|
||||
self.df[field_names.RMP_LOW_INCOME_FIELD] = (
|
||||
rmp_sites_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.SUPERFUND_LOW_INCOME_FIELD] = (
|
||||
npl_sites_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD] = (
|
||||
tsdf_sites_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
pollution_eligibility_columns
|
||||
)
|
||||
|
||||
return self.df[pollution_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _water_factor(self) -> bool:
|
||||
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
wastewater_threshold = (
|
||||
self.df[
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] = (
|
||||
wastewater_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
|
||||
)
|
||||
|
||||
return self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
|
||||
|
||||
def _health_factor(self) -> bool:
|
||||
# In Xth percentile or above for diabetes (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for asthma (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for heart disease
|
||||
# or
|
||||
# In Xth percentile or above for low life expectancy (Source: CDC Places)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
health_eligibility_columns = [
|
||||
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
diabetes_threshold = (
|
||||
self.df[
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
asthma_threshold = (
|
||||
self.df[
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
heart_disease_threshold = (
|
||||
self.df[
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
low_life_expectancy_threshold = (
|
||||
self.df[
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.DIABETES_LOW_INCOME_FIELD] = (
|
||||
diabetes_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.ASTHMA_LOW_INCOME_FIELD] = (
|
||||
asthma_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = (
|
||||
heart_disease_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
||||
low_life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(health_eligibility_columns)
|
||||
|
||||
return self.df[health_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _workforce_factor(self) -> bool:
|
||||
# Where unemployment is above Xth percentile
|
||||
# or
|
||||
# Where median income as a percent of area median income is above Xth percentile
|
||||
# or
|
||||
# Where the percent of households at or below 100% of the federal poverty level
|
||||
# is above Xth percentile
|
||||
# or
|
||||
# Where linguistic isolation is above Xth percentile
|
||||
# AND
|
||||
# Where the high school degree achievement rates for adults 25 years and older
|
||||
# is less than Y%
|
||||
# (necessary to screen out university tracts)
|
||||
|
||||
# Workforce criteria for states fields.
|
||||
workforce_eligibility_columns = [
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
unemployment_threshold = (
|
||||
self.df[
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
low_median_income_threshold = (
|
||||
self.df[
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
linguistic_isolation_threshold = (
|
||||
self.df[
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
poverty_threshold = (
|
||||
self.df[
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = (
|
||||
linguistic_isolation_threshold
|
||||
& self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
poverty_threshold & self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
|
||||
low_median_income_threshold
|
||||
& self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = (
|
||||
unemployment_threshold & self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_states = self.df[
|
||||
workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
workforce_eligibility_columns
|
||||
)
|
||||
|
||||
# Now, calculate workforce criteria for island territories.
|
||||
island_areas_workforce_eligibility_columns = [
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
# First, combine unemployment.
|
||||
(
|
||||
self.df,
|
||||
island_areas_unemployment_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# Next, combine poverty.
|
||||
(
|
||||
self.df,
|
||||
island_areas_poverty_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# Also check whether low area median income is 90th percentile or higher
|
||||
# within the islands.
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name = (
|
||||
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
||||
f"{field_names.PERCENTILE}th percentile"
|
||||
)
|
||||
self.df[
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name
|
||||
] = (
|
||||
self.df[
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[island_areas_unemployment_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[island_areas_poverty_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name
|
||||
]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_island_areas = self.df[
|
||||
island_areas_workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
island_areas_workforce_eligibility_columns
|
||||
)
|
||||
|
||||
percent_of_island_tracts_highlighted = (
|
||||
100
|
||||
* workforce_combined_criteria_for_island_areas.sum()
|
||||
# Choosing a random column from island areas to calculate the denominator.
|
||||
/ self.df[field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009]
|
||||
.notnull()
|
||||
.sum()
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"For workforce criteria in island areas, "
|
||||
f"{workforce_combined_criteria_for_island_areas.sum()} ("
|
||||
f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
)
|
||||
|
||||
# A tract is included if it meets either the states tract criteria or the
|
||||
# island areas tract criteria.
|
||||
return (
|
||||
workforce_combined_criteria_for_states
|
||||
| workforce_combined_criteria_for_island_areas
|
||||
)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score L")
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
self.df[field_names.FPL_200_SERIES] = self._create_low_income_threshold(
|
||||
self.df
|
||||
)
|
||||
self.df[field_names.L_CLIMATE] = self._climate_factor()
|
||||
self.df[field_names.L_ENERGY] = self._energy_factor()
|
||||
self.df[field_names.L_TRANSPORTATION] = self._transportation_factor()
|
||||
self.df[field_names.L_HOUSING] = self._housing_factor()
|
||||
self.df[field_names.L_POLLUTION] = self._pollution_factor()
|
||||
self.df[field_names.L_WATER] = self._water_factor()
|
||||
self.df[field_names.L_HEALTH] = self._health_factor()
|
||||
self.df[field_names.L_WORKFORCE] = self._workforce_factor()
|
||||
|
||||
factors = [
|
||||
field_names.L_CLIMATE,
|
||||
field_names.L_ENERGY,
|
||||
field_names.L_TRANSPORTATION,
|
||||
field_names.L_HOUSING,
|
||||
field_names.L_POLLUTION,
|
||||
field_names.L_WATER,
|
||||
field_names.L_HEALTH,
|
||||
field_names.L_WORKFORCE,
|
||||
]
|
||||
self.df[field_names.SCORE_L_COMMUNITIES] = self.df[factors].any(axis=1)
|
||||
|
||||
# Note: this is purely used for comparison tool analysis, and can be removed at a later date. - LMB.
|
||||
non_workforce_factors = [
|
||||
field_names.L_CLIMATE,
|
||||
field_names.L_ENERGY,
|
||||
field_names.L_TRANSPORTATION,
|
||||
field_names.L_HOUSING,
|
||||
field_names.L_POLLUTION,
|
||||
field_names.L_WATER,
|
||||
field_names.L_HEALTH,
|
||||
]
|
||||
self.df[field_names.L_NON_WORKFORCE] = self.df[
|
||||
non_workforce_factors
|
||||
].any(axis=1)
|
||||
|
||||
self.df[
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
] = self.df[field_names.SCORE_L_COMMUNITIES].astype(int)
|
||||
|
||||
return self.df
|
|
@ -1,888 +0,0 @@
|
|||
from typing import Tuple
|
||||
|
||||
import data_pipeline.etl.score.constants as constants
|
||||
import data_pipeline.score.field_names as field_names
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreM(Score):
|
||||
"""Very similar to Score L, with a few minor modifications."""
|
||||
|
||||
def __init__(self, df: pd.DataFrame) -> None:
|
||||
self.LOW_INCOME_THRESHOLD: float = 0.65
|
||||
self.MAX_COLLEGE_ATTENDANCE_THRESHOLD: float = 0.20
|
||||
self.ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
|
||||
self.MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
|
||||
self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD: float = 0.10
|
||||
|
||||
super().__init__(df)
|
||||
|
||||
def _combine_island_areas_with_states_and_set_thresholds(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
column_from_island_areas: str,
|
||||
column_from_decennial_census: str,
|
||||
combined_column_name: str,
|
||||
threshold_cutoff_for_island_areas: float,
|
||||
) -> Tuple[pd.DataFrame, str]:
|
||||
"""Steps to set thresholds for island areas.
|
||||
|
||||
This function is fairly logically complicated. It takes the following steps:
|
||||
|
||||
1. Combine the two different fields into a single field.
|
||||
2. Calculate the 90th percentile for the combined field.
|
||||
3. Create a boolean series that is true for any census tract in the island
|
||||
areas (and only the island areas) that exceeds this percentile.
|
||||
|
||||
For step one, it combines data that is either the island area's Decennial Census
|
||||
value in 2009 or the state's value in 5-year ACS ending in 2010.
|
||||
|
||||
This will be used to generate the percentile cutoff for the 90th percentile.
|
||||
|
||||
The stateside decennial census stopped asking economic comparisons,
|
||||
so this is as close to apples-to-apples as we get. We use 5-year ACS for data
|
||||
robustness over 1-year ACS.
|
||||
"""
|
||||
# Create the combined field.
|
||||
# TODO: move this combined field percentile calculation to `etl_score`,
|
||||
# since most other percentile logic is there.
|
||||
# There should only be one entry in either 2009 or 2019 fields, not one in both.
|
||||
# But just to be safe, we take the mean and ignore null values so if there
|
||||
# *were* entries in both, this result would make sense.
|
||||
df[combined_column_name] = df[
|
||||
[column_from_island_areas, column_from_decennial_census]
|
||||
].mean(axis=1, skipna=True)
|
||||
|
||||
# Create a percentile field for use in the Islands / PR visualization
|
||||
# TODO: move this code
|
||||
# In the code below, percentiles are constructed based on the combined column
|
||||
# of census and island data, but only reported for the island areas (where there
|
||||
# is no other comprehensive percentile information)
|
||||
return_series_name = (
|
||||
column_from_island_areas
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
)
|
||||
df[return_series_name] = np.where(
|
||||
df[column_from_decennial_census].isna(),
|
||||
df[combined_column_name].rank(pct=True),
|
||||
np.nan,
|
||||
)
|
||||
|
||||
threshold_column_name = (
|
||||
f"{column_from_island_areas} exceeds "
|
||||
f"{threshold_cutoff_for_island_areas*100:.0f}th percentile"
|
||||
)
|
||||
|
||||
df[threshold_column_name] = (
|
||||
df[return_series_name] >= threshold_cutoff_for_island_areas
|
||||
)
|
||||
|
||||
return df, threshold_column_name
|
||||
|
||||
def _create_low_income_and_low_college_attendance_threshold(
|
||||
self, df: pd.DataFrame
|
||||
) -> pd.Series:
|
||||
"""
|
||||
Returns a pandas series (really a numpy array)
|
||||
of booleans based on the condition of the FPL at 200%
|
||||
is at or more than some established threshold
|
||||
"""
|
||||
|
||||
return (df[field_names.LOW_INCOME_THRESHOLD]) & (
|
||||
df[field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD]
|
||||
| (
|
||||
# If college attendance data is null for this tract, just rely on the
|
||||
# poverty data
|
||||
df[field_names.COLLEGE_ATTENDANCE_FIELD].isna()
|
||||
)
|
||||
)
|
||||
|
||||
def _increment_total_eligibility_exceeded(
|
||||
self, columns_for_subset: list, skip_fips: tuple = ()
|
||||
) -> None:
|
||||
"""
|
||||
Increments the total eligible factors for a given tract
|
||||
|
||||
The new skip_fips argument specifies which (if any) fips codes to
|
||||
skip over for incrementing.
|
||||
This allows us to essentially skip data we think is of limited veracity,
|
||||
without overriding any values in the data.
|
||||
THIS IS A TEMPORARY FIX.
|
||||
"""
|
||||
if skip_fips:
|
||||
self.df[field_names.THRESHOLD_COUNT] += np.where(
|
||||
self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
|
||||
skip_fips
|
||||
),
|
||||
0,
|
||||
self.df[columns_for_subset].sum(axis=1, skipna=True),
|
||||
)
|
||||
else:
|
||||
self.df[field_names.THRESHOLD_COUNT] += self.df[
|
||||
columns_for_subset
|
||||
].sum(axis=1, skipna=True)
|
||||
|
||||
def _climate_factor(self) -> bool:
|
||||
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and there is low higher ed attendance
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
climate_eligibility_columns = [
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.CLIMATE_THRESHOLD_EXCEEDED] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
| self.df[
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
| self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
climate_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[climate_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _energy_factor(self) -> bool:
|
||||
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has low higher ed attendance.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
energy_eligibility_columns = [
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.ENERGY_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.PM25_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ENERGY_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.PM25_EXCEEDS_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
self.df[field_names.PM25_EXPOSURE_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.PM25_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.ENERGY_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
energy_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[energy_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _transportation_factor(self) -> bool:
|
||||
# In Xth percentile or above for diesel particulate matter (Source: EPA National Air Toxics Assessment (NATA)
|
||||
# or
|
||||
# In Xth percentile or above for PM 2.5 (Source: EPA, Office of Air and Radiation (OAR) fusion of model and monitor data)]
|
||||
# or
|
||||
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
transportion_eligibility_columns = [
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
transportion_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[transportion_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _housing_factor(self) -> bool:
|
||||
# (
|
||||
# In Xth percentile or above for lead paint (Source: Census's American Community Survey’s
|
||||
# percent of housing units built pre-1960, used as an indicator of potential lead paint exposure in homes)
|
||||
# AND
|
||||
# In Yth percentile or below for Median House Value (Source: Census's American Community Survey)
|
||||
# )
|
||||
# or
|
||||
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
housing_eligibility_columns = [
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
) & (
|
||||
self.df[
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_THREHSOLD_EXCEEDED] = (
|
||||
self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
# series by series indicators
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
housing_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[housing_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _pollution_factor(self) -> bool:
|
||||
# Proximity to Risk Management Plan sites is > X
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
pollution_eligibility_columns = [
|
||||
field_names.RMP_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.RMP_PCTILE_THRESHOLD] = (
|
||||
self.df[field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.NPL_PCTILE_THRESHOLD] = (
|
||||
self.df[field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TSDF_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.RMP_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.NPL_PCTILE_THRESHOLD]
|
||||
) | self.df[field_names.TSDF_PCTILE_THRESHOLD]
|
||||
|
||||
# individual series-by-series
|
||||
self.df[field_names.RMP_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.RMP_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.NPL_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.TSDF_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
pollution_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[pollution_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _water_factor(self) -> bool:
|
||||
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
self.df[field_names.WASTEWATER_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
# Straight copy here in case we add additional water fields.
|
||||
self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[
|
||||
field_names.WASTEWATER_PCTILE_THRESHOLD
|
||||
].copy()
|
||||
|
||||
self.df[
|
||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.WASTEWATER_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD],
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[
|
||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
]
|
||||
|
||||
def _health_factor(self) -> bool:
|
||||
# In Xth percentile or above for diabetes (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for asthma (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for heart disease
|
||||
# or
|
||||
# In Xth percentile or above for low life expectancy (Source: CDC Places)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
health_eligibility_columns = [
|
||||
field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.DIABETES_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ASTHMA_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HEART_DISEASE_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HEALTH_THRESHOLD_EXCEEDED] = (
|
||||
(
|
||||
self.df[field_names.DIABETES_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.ASTHMA_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.HEART_DISEASE_PCTILE_THRESHOLD]
|
||||
) | self.df[field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD]
|
||||
|
||||
self.df[field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.DIABETES_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.ASTHMA_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.HEART_DISEASE_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
health_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[health_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _workforce_factor(self) -> bool:
|
||||
# Where unemployment is above Xth percentile
|
||||
# or
|
||||
# Where median income as a percent of area median income is above Xth percentile
|
||||
# or
|
||||
# Where the percent of households at or below 100% of the federal poverty level
|
||||
# is above Xth percentile
|
||||
# or
|
||||
# Where linguistic isolation is above Xth percentile
|
||||
# AND
|
||||
# Where the high school degree achievement rates for adults 25 years and older
|
||||
# is less than Y%
|
||||
# AND the higher ed attendance rates are under Z%
|
||||
# (necessary to screen out university tracts)
|
||||
|
||||
# Workforce criteria for states fields.
|
||||
workforce_eligibility_columns = [
|
||||
field_names.UNEMPLOYMENT_LOW_HS_LOW_HIGHER_ED_FIELD,
|
||||
field_names.POVERTY_LOW_HS_LOW_HIGHER_ED_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_LOW_HIGHER_ED_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
) & (
|
||||
(
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_FIELD]
|
||||
<= self.MAX_COLLEGE_ATTENDANCE_THRESHOLD
|
||||
)
|
||||
| (
|
||||
# If college attendance data is null for this tract, just rely on the
|
||||
# poverty/AMI data
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_FIELD].isna()
|
||||
)
|
||||
)
|
||||
|
||||
self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.POVERTY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.POVERTY_LOW_HS_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.POVERTY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.UNEMPLOYMENT_LOW_HS_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_states = self.df[
|
||||
workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
workforce_eligibility_columns
|
||||
)
|
||||
|
||||
# Now, calculate workforce criteria for island territories.
|
||||
island_areas_workforce_eligibility_columns = [
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
# First, combine unemployment.
|
||||
# This will include an adjusted percentile column for the island areas
|
||||
# to be used by the front end.
|
||||
(
|
||||
self.df,
|
||||
island_areas_unemployment_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# TODO: Remove this, it's for checking only
|
||||
assert (
|
||||
island_areas_unemployment_criteria_field_name
|
||||
== field_names.ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD
|
||||
), "Error combining island columns"
|
||||
|
||||
# Next, combine poverty.
|
||||
# This will include an adjusted percentile column for the island areas
|
||||
# to be used by the front end.
|
||||
(
|
||||
self.df,
|
||||
island_areas_poverty_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# TODO: Remove this, it's for checking only
|
||||
assert (
|
||||
island_areas_poverty_criteria_field_name
|
||||
== field_names.ISLAND_POVERTY_PCTILE_THRESHOLD
|
||||
), "Error combining island columns"
|
||||
|
||||
# Also check whether low area median income is 90th percentile or higher
|
||||
# within the islands.
|
||||
|
||||
# Note that because the field for low median does not have to be combined,
|
||||
# unlike the other fields, we do not need to create a new percentile
|
||||
# column. This code should probably be refactored when (TODO) we do the big
|
||||
# refactor.
|
||||
self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[island_areas_unemployment_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[island_areas_poverty_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_island_areas = self.df[
|
||||
island_areas_workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
island_areas_workforce_eligibility_columns
|
||||
)
|
||||
|
||||
percent_of_island_tracts_highlighted = (
|
||||
100
|
||||
* workforce_combined_criteria_for_island_areas.sum()
|
||||
# Choosing a random column from island areas to calculate the denominator.
|
||||
/ self.df[field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009]
|
||||
.notnull()
|
||||
.sum()
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"For workforce criteria in island areas, "
|
||||
f"{workforce_combined_criteria_for_island_areas.sum()} ("
|
||||
f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
)
|
||||
|
||||
# Because these criteria are calculated differently for the islands, we also calculate the
|
||||
# thresholds to pass to the FE slightly differently
|
||||
|
||||
self.df[field_names.WORKFORCE_THRESHOLD_EXCEEDED] = (
|
||||
## First we calculate for the non-island areas
|
||||
(
|
||||
(
|
||||
self.df[field_names.POVERTY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD]
|
||||
) | (
|
||||
## then we calculate just for the island areas
|
||||
(
|
||||
self.df[field_names.ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.ISLAND_POVERTY_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
# Because of the island complications, we also have to separately calculate the threshold for
|
||||
# socioeconomic thresholds
|
||||
self.df[field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED] = (
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
| self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
# A tract is included if it meets either the states tract criteria or the
|
||||
# island areas tract criteria.
|
||||
return (
|
||||
workforce_combined_criteria_for_states
|
||||
| workforce_combined_criteria_for_island_areas
|
||||
)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score M")
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
|
||||
# TODO: move this inside of
|
||||
# `_create_low_income_and_low_college_attendance_threshold`
|
||||
# and change the return signature of that method.
|
||||
# Create a standalone field that captures the college attendance boolean
|
||||
# threshold.
|
||||
self.df[field_names.LOW_INCOME_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.LOW_INCOME_THRESHOLD
|
||||
)
|
||||
|
||||
# Because we are moving this variable to be in the same direction as all
|
||||
# other variables, we change this to be < rather than <=. This translates
|
||||
# to "80% or more of residents are not college students", rather than
|
||||
# "Strictly greater than 80% of residents are not college students."
|
||||
# There are two tracts that are impacted by this (that is, they have exactly)
|
||||
# 20% college students -- neither of these has been a DAC under any score.
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD] = (
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_FIELD]
|
||||
< self.MAX_COLLEGE_ATTENDANCE_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES
|
||||
] = self._create_low_income_and_low_college_attendance_threshold(
|
||||
self.df
|
||||
)
|
||||
self.df[field_names.M_CLIMATE] = self._climate_factor()
|
||||
self.df[field_names.M_ENERGY] = self._energy_factor()
|
||||
self.df[field_names.M_TRANSPORTATION] = self._transportation_factor()
|
||||
self.df[field_names.M_HOUSING] = self._housing_factor()
|
||||
self.df[field_names.M_POLLUTION] = self._pollution_factor()
|
||||
self.df[field_names.M_WATER] = self._water_factor()
|
||||
self.df[field_names.M_HEALTH] = self._health_factor()
|
||||
self.df[field_names.M_WORKFORCE] = self._workforce_factor()
|
||||
|
||||
factors = [
|
||||
field_names.M_CLIMATE,
|
||||
field_names.M_ENERGY,
|
||||
field_names.M_TRANSPORTATION,
|
||||
field_names.M_HOUSING,
|
||||
field_names.M_POLLUTION,
|
||||
field_names.M_WATER,
|
||||
field_names.M_HEALTH,
|
||||
field_names.M_WORKFORCE,
|
||||
]
|
||||
self.df[field_names.CATEGORY_COUNT] = self.df[factors].sum(axis=1)
|
||||
self.df[field_names.SCORE_M_COMMUNITIES] = self.df[factors].any(axis=1)
|
||||
|
||||
# Note: this is purely used for comparison tool analysis, and can be removed at a later date. - LMB.
|
||||
non_workforce_factors = [
|
||||
field_names.M_CLIMATE,
|
||||
field_names.M_ENERGY,
|
||||
field_names.M_TRANSPORTATION,
|
||||
field_names.M_HOUSING,
|
||||
field_names.M_POLLUTION,
|
||||
field_names.M_WATER,
|
||||
field_names.M_HEALTH,
|
||||
]
|
||||
self.df[field_names.M_NON_WORKFORCE] = self.df[
|
||||
non_workforce_factors
|
||||
].any(axis=1)
|
||||
|
||||
self.df[
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
] = self.df[field_names.SCORE_M_COMMUNITIES].astype(int)
|
||||
|
||||
return self.df
|
|
@ -807,7 +807,7 @@ class ScoreNarwhal(Score):
|
|||
island_areas_unemployment_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019,
|
||||
column_from_decennial_census=field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
|
@ -827,7 +827,7 @@ class ScoreNarwhal(Score):
|
|||
island_areas_poverty_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019,
|
||||
column_from_decennial_census=field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
|
@ -848,14 +848,14 @@ class ScoreNarwhal(Score):
|
|||
# refactor.
|
||||
self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2019]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
|
@ -890,7 +890,7 @@ class ScoreNarwhal(Score):
|
|||
100
|
||||
* workforce_combined_criteria_for_island_areas.sum()
|
||||
# Choosing a random column from island areas to calculate the denominator.
|
||||
/ self.df[field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009]
|
||||
/ self.df[field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019]
|
||||
.notnull()
|
||||
.sum()
|
||||
)
|
||||
|
@ -1013,6 +1013,66 @@ class ScoreNarwhal(Score):
|
|||
self.df[field_names.SCORE_N_COMMUNITIES],
|
||||
)
|
||||
|
||||
def _mark_territory_dacs(self) -> None:
|
||||
"""Territory tracts that are flagged as low income are Score N communities."""
|
||||
self.df[field_names.SCORE_N_COMMUNITIES] = np.where(
|
||||
self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
|
||||
tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)
|
||||
)
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED],
|
||||
True,
|
||||
self.df[field_names.SCORE_N_COMMUNITIES],
|
||||
)
|
||||
|
||||
def _mark_grandfathered_dacs(self) -> None:
|
||||
"""Territory tracts that are flagged as DACS in the V1.0 score are also marked."""
|
||||
self.df[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0] = np.where(
|
||||
self.df[field_names.FINAL_SCORE_N_BOOLEAN_V1_0]
|
||||
& ~self.df[field_names.FINAL_SCORE_N_BOOLEAN],
|
||||
True,
|
||||
False,
|
||||
)
|
||||
self.df[field_names.FINAL_SCORE_N_BOOLEAN] = np.where(
|
||||
self.df[field_names.FINAL_SCORE_N_BOOLEAN_V1_0],
|
||||
True,
|
||||
self.df[field_names.FINAL_SCORE_N_BOOLEAN],
|
||||
)
|
||||
|
||||
def _mark_poverty_flag(self) -> None:
|
||||
"""Combine poverty less than 200% for territories and update the income flag."""
|
||||
# First we set the low income flag for non-territories by themselves, this
|
||||
# way we don't change the original outcome if we include territories.
|
||||
self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (
|
||||
self.df[
|
||||
# UPDATE: Pull the imputed poverty statistic
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.LOW_INCOME_THRESHOLD
|
||||
)
|
||||
|
||||
# Now we set the low income flag only for territories, but we need to rank them
|
||||
# with all other tracts.
|
||||
(
|
||||
self.df,
|
||||
island_areas_poverty_200_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019,
|
||||
column_from_decennial_census=field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_200_FPL_FIELD_2010,
|
||||
threshold_cutoff_for_island_areas=self.LOW_INCOME_THRESHOLD,
|
||||
)
|
||||
self.df.loc[
|
||||
self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
|
||||
tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)
|
||||
),
|
||||
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED,
|
||||
] = (
|
||||
self.df[island_areas_poverty_200_criteria_field_name]
|
||||
>= self.LOW_INCOME_THRESHOLD
|
||||
)
|
||||
|
||||
def _get_percent_of_tract_that_is_dac(self) -> float:
|
||||
"""Per the October 7th compromise (#1988),
|
||||
tracts can be partially DACs if some portion of the tract is tribal land.
|
||||
|
@ -1034,14 +1094,7 @@ class ScoreNarwhal(Score):
|
|||
logger.debug("Adding Score Narhwal")
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
|
||||
self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (
|
||||
self.df[
|
||||
# UPDATE: Pull the imputed poverty statistic
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.LOW_INCOME_THRESHOLD
|
||||
)
|
||||
self._mark_poverty_flag()
|
||||
|
||||
self.df[field_names.N_CLIMATE] = self._climate_factor()
|
||||
self.df[field_names.N_ENERGY] = self._energy_factor()
|
||||
|
@ -1065,12 +1118,14 @@ class ScoreNarwhal(Score):
|
|||
self.df[field_names.CATEGORY_COUNT] = self.df[factors].sum(axis=1)
|
||||
self.df[field_names.SCORE_N_COMMUNITIES] = self.df[factors].any(axis=1)
|
||||
self._mark_tribal_dacs()
|
||||
self._mark_territory_dacs()
|
||||
self.df[
|
||||
field_names.SCORE_N_COMMUNITIES
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
] = self.df[field_names.SCORE_N_COMMUNITIES].astype(int)
|
||||
|
||||
self._mark_donut_hole_tracts()
|
||||
self._mark_grandfathered_dacs()
|
||||
self.df[
|
||||
field_names.PERCENT_OF_TRACT_IS_DAC
|
||||
] = self._get_percent_of_tract_that_is_dac()
|
||||
|
|
|
@ -11,6 +11,42 @@ from data_pipeline.utils import get_module_logger
|
|||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
def in_water_range(x: int):
|
||||
"""Check if a tract ID is within the water area ID range
|
||||
|
||||
Input must be integer representation of the 6-char census tract code
|
||||
(equivalent to last 6 characters of full geo ID)
|
||||
|
||||
Returns TRUE if ID is in the water area range.
|
||||
Returns FALSE if ID is NOT in the water area range.
|
||||
|
||||
NB: Not currently in use; artifact of attempt to filter
|
||||
water areas using Census tract ID rangers.
|
||||
"""
|
||||
if x >= 990000 and x <= 990099:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def full_geo_id_to_water_range_bool(x: str):
|
||||
"""Check if a geo ID is within the water area ID range.
|
||||
|
||||
First, convert full ID string to int of last six digits,
|
||||
Then check if this int is in the correct range.
|
||||
|
||||
Input should be the 11 character Census tract identifier,
|
||||
the last 6 characters of which are the census tract code.
|
||||
|
||||
Returns TRUE if ID is in the water area range.
|
||||
Returns FALSE if ID is NOT in the water area range.
|
||||
|
||||
NB: Not currently in use; artifact of attempt to filter
|
||||
water areas using Census tract ID rangers.
|
||||
"""
|
||||
num_x = int(x[-6:])
|
||||
return in_water_range(num_x)
|
||||
|
||||
|
||||
def calculate_tract_adjacency_scores(
|
||||
df: pd.DataFrame, score_column: str
|
||||
) -> pd.DataFrame:
|
||||
|
@ -33,11 +69,15 @@ def calculate_tract_adjacency_scores(
|
|||
ORIGINAL_TRACT = "ORIGINAL_TRACT"
|
||||
logger.debug("Calculating tract adjacency scores")
|
||||
tract_data = get_tract_geojson()
|
||||
|
||||
df: gpd.GeoDataFrame = tract_data.merge(
|
||||
df, on=field_names.GEOID_TRACT_FIELD
|
||||
)
|
||||
df = df.rename(columns={field_names.GEOID_TRACT_FIELD: ORIGINAL_TRACT})
|
||||
|
||||
# remove water areas from input frame
|
||||
df = df[df[field_names.LAND_AREA_FIELD] > 0]
|
||||
|
||||
logger.debug("Perfoming spatial join to find all adjacent tracts")
|
||||
adjacent_tracts: gpd.GeoDataFrame = df.sjoin(
|
||||
tract_data, predicate="touches"
|
||||
|
|
|
@ -4,20 +4,19 @@ import pytest
|
|||
from data_pipeline.config import settings
|
||||
from data_pipeline.etl.score.etl_score import ScoreETL
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.score.score_narwhal import ScoreNarwhal
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
TEST_DATA_FOLDER = settings.APP_ROOT / "tests" / "score" / "test_utils" / "data"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def toy_score_df(scope="module"):
|
||||
return pd.read_csv(
|
||||
settings.APP_ROOT
|
||||
/ "tests"
|
||||
/ "score"
|
||||
/ "test_utils"
|
||||
/ "data"
|
||||
/ "test_drop_tracts_from_percentile.csv",
|
||||
TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
|
||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||
)
|
||||
|
||||
|
@ -83,3 +82,76 @@ def test_drop_all_tracts(toy_score_df):
|
|||
toy_score_df,
|
||||
drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list(),
|
||||
), "Percentile in score fails when we drop all tracts"
|
||||
|
||||
|
||||
def test_mark_territory_dacs():
|
||||
test_data = pd.read_csv(
|
||||
TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
|
||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||
)
|
||||
# Sanity check on the input data
|
||||
assert not test_data[field_names.SCORE_N_COMMUNITIES].all()
|
||||
|
||||
scorer = ScoreNarwhal(test_data)
|
||||
scorer._mark_territory_dacs()
|
||||
# Check territories are set to true
|
||||
expected_new_dacs_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
|
||||
["60050951100", "66010951100", "69110001101", "78010990000"]
|
||||
)
|
||||
assert test_data.loc[
|
||||
expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
|
||||
].all()
|
||||
# Non-territories are still false
|
||||
assert not test_data.loc[
|
||||
~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
|
||||
].all()
|
||||
|
||||
|
||||
def test_mark_poverty_flag():
|
||||
test_data = pd.read_csv(
|
||||
TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
|
||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||
)
|
||||
# Sanity check on the input data
|
||||
assert not test_data[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
|
||||
|
||||
scorer = ScoreNarwhal(test_data)
|
||||
scorer._mark_poverty_flag()
|
||||
expected_low_income_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
|
||||
["36087011302", "66010951100", "78010990000"]
|
||||
)
|
||||
# Three tracts are set to true
|
||||
assert test_data[expected_low_income_filter][
|
||||
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
|
||||
].all()
|
||||
# Everything else is false
|
||||
assert not test_data[~expected_low_income_filter][
|
||||
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
|
||||
].all()
|
||||
|
||||
|
||||
def test_mark_grandfathered_dacs():
|
||||
data = {
|
||||
field_names.GEOID_TRACT_FIELD: [
|
||||
"78010971500",
|
||||
"78010970500",
|
||||
"66010954400",
|
||||
"66010953400",
|
||||
],
|
||||
field_names.FINAL_SCORE_N_BOOLEAN_V1_0: [False, False, True, True],
|
||||
field_names.FINAL_SCORE_N_BOOLEAN: [False, True, False, True],
|
||||
}
|
||||
test_df = pd.DataFrame(data)
|
||||
scorer = ScoreNarwhal(test_df)
|
||||
scorer._mark_grandfathered_dacs()
|
||||
result = scorer.df
|
||||
assert field_names.GRANDFATHERED_N_COMMUNITIES_V1_0 in result.columns
|
||||
assert not result[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0][0]
|
||||
assert not result[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0][1]
|
||||
assert result[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0][2]
|
||||
assert not result[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0][3]
|
||||
|
||||
assert not result[field_names.FINAL_SCORE_N_BOOLEAN][0]
|
||||
assert result[field_names.FINAL_SCORE_N_BOOLEAN][1]
|
||||
assert result[field_names.FINAL_SCORE_N_BOOLEAN][2]
|
||||
assert result[field_names.FINAL_SCORE_N_BOOLEAN][3]
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
GEOID10_TRACT,"Percentage households below 200% of federal poverty line in 2009, adjusted and imputed","Percent of individuals below 200% Federal Poverty Line, imputed and adjusted","Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Is low income (imputed and adjusted)?
|
||||
01071950300,,0.1,0.1,False
|
||||
36087011302,,0.7,0.7,False
|
||||
72119130701,,0.5,0.5,False
|
||||
60050951100,0.1,,,False
|
||||
66010951100,0.7,,,False
|
||||
69110001100,0.5,,,False
|
||||
78010990000,0.9,,,False
|
|
|
@ -0,0 +1,9 @@
|
|||
GEOID10_TRACT,Is low income (imputed and adjusted)?,Definition N (communities)
|
||||
01071950300,True,False
|
||||
36087011302,False,False
|
||||
72119130701,True,False
|
||||
60050951100,True,False
|
||||
66010951100,True,False
|
||||
69110001100,False,False
|
||||
69110001101,True,False
|
||||
78010990000,True,False
|
|
|
@ -0,0 +1,8 @@
|
|||
[["NAME","P1_001N","PBG19_005N","PBG19_012N","PCT31_001N","PBG32_003N","PBG32_007N","PBG32_010N","PBG32_014N","PCT34_003N","PCT34_016N","PBG43_001N","PBG74_001N","PBG74_002N","PBG74_003N","PBG74_004N","PBG74_010N","PCT9_001N","PCT9_003N","PCT9_079N","PCT9_130N","PCT9_155N","PCT9_180N","PCT9_205N","PCT9_230N","P5_002N","state","county","tract"],
|
||||
["Census Tract 9505, Eastern District, American Samoa","2623","305","308","2044","443","42","369","44","175","164","26000","2151","538","338","329","352","2623","2021","342","77","0","0","99","84","107","60","010","950500"],
|
||||
["Census Tract 9503, Eastern District, American Samoa","2415","325","345","2293","418","35","379","53","173","174","33631","2414","448","332","320","565","2415","2259","39","6","0","0","1","110","14","60","010","950300"],
|
||||
["Census Tract 9501, Eastern District, American Samoa","1487","248","214","1404","269","38","254","39","103","106","24219","1485","448","243","146","200","1487","1449","3","1","1","0","1","32","6","60","010","950100"],
|
||||
["Census Tract 9502, Eastern District, American Samoa","1158","164","143","1096","221","23","163","18","84","75","28333","1148","236","172","175","335","1158","1092","11","7","0","0","1","47","5","60","010","950200"],
|
||||
["Census Tract 9506, Eastern District, American Samoa","3218","452","458","3031","645","57","573","56","216","242","26970","3188","774","462","439","666","3218","2781","167","9","1","0","5","255","40","60","010","950600"],
|
||||
["Census Tract 9507, Eastern District, American Samoa","2727","381","364","2489","569","60","456","48","208","183","29083","2612","593","458","315","494","2727","2366","217","29","8","2","10","95","38","60","010","950700"],
|
||||
["Census Tract 9509, Eastern District, American Samoa","3431","512","476","3239","718","76","682","89","238","244","30643","3412","724","510","573","623","3431","2988","332","28","0","0","2","81","27","60","010","950900"]]
|
|
@ -0,0 +1,58 @@
|
|||
[["NAME","P1_001N","PBG19_005N","PBG19_012N","PCT31_001N","PBG32_003N","PBG32_007N","PBG32_010N","PBG32_014N","PCT34_003N","PCT34_016N","PBG43_001N","PBG74_001N","PBG74_002N","PBG74_003N","PBG74_004N","PBG74_010N","PCT10_001N","PCT10_003N","PCT10_204N","PCT10_330N","PCT10_355N","PCT10_380N","PCT10_405N","P5_026N","PCT9_003N","state","county","tract"],
|
||||
["Census Tract 9501, Guam, Guam","1347","7","6","77","25","1","14","0","2","29","40000","80","6","5","12","32","1347","13","128","159","11","63","111","764","221","66","010","950100"],
|
||||
["Census Tract 9502, Guam, Guam","626","9","9","87","23","2","19","2","3","30","43750","102","6","0","10","44","626","29","91","56","3","32","86","296","81","66","010","950200"],
|
||||
["Census Tract 9503, Guam, Guam","629","4","1","6","1","0","1","0","0","1","53750","6","0","0","0","0","629","3","620","0","0","0","1","5","0","66","010","950300"],
|
||||
["Census Tract 9504.01, Guam, Guam","5809","669","581","5273","1447","150","1191","141","151","2096","53309","5688","455","322","376","2836","5809","1592","3775","14","9","3","359","54","70","66","010","950401"],
|
||||
["Census Tract 9504.02, Guam, Guam","6606","735","668","5972","1581","168","1234","157","145","2198","50980","6579","834","472","531","2759","6606","2722","3135","32","0","13","574","122","101","66","010","950402"],
|
||||
["Census Tract 9505.01, Guam, Guam","1834","211","188","1638","398","45","295","48","35","588","42054","1803","309","166","171","646","1834","1060","560","2","0","1","186","23","39","66","010","950501"],
|
||||
["Census Tract 9505.02, Guam, Guam","5343","660","577","4952","1387","115","1052","86","160","1932","55833","5285","532","206","297","2911","5343","1833","2813","46","2","36","421","183","110","66","010","950502"],
|
||||
["Census Tract 9507.01, Guam, Guam","5213","549","535","4849","1327","75","1088","86","162","1927","66023","5187","316","190","224","3283","5213","1279","2776","64","12","50","556","448","134","66","010","950701"],
|
||||
["Census Tract 9507.02, Guam, Guam","4020","485","449","3640","930","89","742","61","102","1338","56406","3978","513","294","235","1894","4020","2093","1426","17","3","2","408","61","44","66","010","950702"],
|
||||
["Census Tract 9508.01, Guam, Guam","3921","503","470","3607","932","127","836","124","66","1365","51528","3898","545","217","210","1804","3921","2058","1539","8","2","4","273","36","36","66","010","950801"],
|
||||
["Census Tract 9508.02, Guam, Guam","4110","483","550","3846","894","102","746","111","97","1580","35372","4099","691","292","375","1531","4110","2055","1629","3","2","14","352","51","56","66","010","950802"],
|
||||
["Census Tract 9509, Guam, Guam","4653","531","506","4404","1149","94","1014","94","98","1837","53221","4638","386","196","284","2672","4653","1048","3251","2","5","16","291","40","57","66","010","950900"],
|
||||
["Census Tract 9510, Guam, Guam","3449","441","400","3240","821","74","739","57","105","1277","51806","3400","325","175","219","1743","3449","1137","1993","4","3","3","271","37","50","66","010","951000"],
|
||||
["Census Tract 9511, Guam, Guam","6498","775","751","6108","1745","98","1498","134","216","2433","55673","6472","517","279","356","3630","6498","1974","3791","26","9","23","511","155","118","66","010","951100"],
|
||||
["Census Tract 9516, Guam, Guam","142","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","142","111","16","4","0","1","7","3","3","66","010","951600"],
|
||||
["Census Tract 9517, Guam, Guam","2239","193","198","1910","523","24","442","40","57","756","78333","2014","138","46","97","1387","2239","779","993","21","4","22","269","149","57","66","010","951700"],
|
||||
["Census Tract 9518, Guam, Guam","99","18","13","83","27","1","18","3","1","33","50000","92","2","5","3","36","99","80","14","0","0","0","5","0","0","66","010","951800"],
|
||||
["Census Tract 9519.01, Guam, Guam","4081","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","4081","797","2733","34","5","24","287","197","47","66","010","951901"],
|
||||
["Census Tract 9519.02, Guam, Guam","3484","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","3484","575","2057","52","6","48","262","456","108","66","010","951902"],
|
||||
["Census Tract 9522, Guam, Guam","3273","322","333","3102","900","84","749","105","58","1282","49355","3253","267","161","185","1805","3273","912","1845","26","1","13","349","125","47","66","010","952200"],
|
||||
["Census Tract 9523, Guam, Guam","2829","250","289","2639","776","67","684","71","54","1080","59083","2811","233","141","128","1763","2829","848","1355","29","11","27","307","242","68","66","010","952300"],
|
||||
["Census Tract 9524, Guam, Guam","1637","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","1637","614","737","16","0","18","176","72","46","66","010","952400"],
|
||||
["Census Tract 9527, Guam, Guam","4468","537","542","3973","1016","78","874","68","117","1550","67578","4302","367","183","208","2623","4468","2929","823","19","0","11","423","253","136","66","010","952700"],
|
||||
["Census Tract 9528, Guam, Guam",null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,"66","010","952800"],
|
||||
["Census Tract 9529, Guam, Guam","5145","599","604","4121","1096","96","971","99","132","1631","62031","4430","349","125","215","2754","5145","3508","779","27","9","41","540","234","133","66","010","952900"],
|
||||
["Census Tract 9530, Guam, Guam","3388","359","419","3023","775","85","742","100","112","1222","49605","3251","360","178","189","1777","3388","1990","747","16","0","22","453","157","98","66","010","953000"],
|
||||
["Census Tract 9531.01, Guam, Guam","3966","530","500","3720","1029","110","889","75","111","1435","76413","3947","250","133","202","2659","3966","2585","551","15","1","20","544","236","171","66","010","953101"],
|
||||
["Census Tract 9531.02, Guam, Guam","3098","362","380","2885","776","50","637","80","107","1078","67841","3074","196","114","161","1948","3098","1891","609","16","1","13","445","122","116","66","010","953102"],
|
||||
["Census Tract 9532, Guam, Guam","2611","351","356","2410","614","72","577","77","48","960","67813","2597","286","80","137","1612","2611","1725","343","28","3","5","385","118","105","66","010","953200"],
|
||||
["Census Tract 9533, Guam, Guam","3808","461","449","3478","902","85","769","94","78","1352","47419","3764","499","256","243","1889","3808","2268","910","18","0","11","452","137","99","66","010","953300"],
|
||||
["Census Tract 9534, Guam, Guam","943","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","943","609","120","10","0","9","129","61","38","66","010","953400"],
|
||||
["Census Tract 9535, Guam, Guam",null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,"66","010","953500"],
|
||||
["Census Tract 9536, Guam, Guam","3616","418","408","3349","925","87","773","78","103","1295","71711","3574","278","171","104","2372","3616","2403","427","16","5","16","596","144","278","66","010","953600"],
|
||||
["Census Tract 9539, Guam, Guam","4192","558","522","3831","946","97","789","74","85","1456","62386","4174","560","256","232","2215","4192","3101","328","3","5","39","532","176","137","66","010","953900"],
|
||||
["Census Tract 9540, Guam, Guam","2106","259","234","1961","527","29","446","39","61","732","83889","2102","111","58","88","1447","2106","1283","277","11","1","16","308","207","82","66","010","954000"],
|
||||
["Census Tract 9543, Guam, Guam","1532","153","172","1405","426","41","325","39","42","534","77125","1526","131","63","58","993","1532","932","160","10","3","14","237","160","61","66","010","954300"],
|
||||
["Census Tract 9544, Guam, Guam","53","7","4","48","14","4","10","2","0","19","92500","50","8","0","1","32","53","39","4","0","0","0","9","1","0","66","010","954400"],
|
||||
["Census Tract 9545, Guam, Guam","2483","9","7","69","20","1","14","3","3","21","40000","83","12","15","3","28","2483","71","203","257","35","98","219","1444","324","66","010","954500"],
|
||||
["Census Tract 9547, Guam, Guam","2089","287","255","1910","497","71","407","48","52","729","54931","2061","238","108","95","1129","2089","1259","485","30","1","3","197","111","53","66","010","954700"],
|
||||
["Census Tract 9548, Guam, Guam","2426","288","273","2183","472","99","444","83","31","833","47969","2416","504","186","173","979","2426","1638","489","5","0","2","217","69","68","66","010","954800"],
|
||||
["Census Tract 9551, Guam, Guam","3550","514","473","3285","873","78","677","80","69","1200","80909","3518","265","165","162","2364","3550","2554","168","28","5","25","409","340","175","66","010","955100"],
|
||||
["Census Tract 9552, Guam, Guam","2317","367","322","2116","529","56","416","39","31","799","67813","2295","252","105","149","1358","2317","2016","68","7","0","3","163","59","90","66","010","955200"],
|
||||
["Census Tract 9553, Guam, Guam","1604","230","235","1481","305","50","264","28","25","582","51667","1600","267","82","84","726","1604","1362","43","4","0","1","129","65","50","66","010","955300"],
|
||||
["Census Tract 9554, Guam, Guam","647","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","-999999999","647","565","10","0","2","3","51","16","24","66","010","955400"],
|
||||
["Census Tract 9556, Guam, Guam","1008","125","118","940","267","11","209","11","22","355","81250","1001","63","36","48","640","1008","683","168","8","1","4","106","37","24","66","010","955600"],
|
||||
["Census Tract 9557, Guam, Guam","4940","625","550","4642","1263","132","1059","105","139","1803","63352","4901","456","235","198","2988","4940","2251","2010","22","2","8","497","146","95","66","010","955700"],
|
||||
["Census Tract 9558, Guam, Guam","7804","920","801","7224","1979","153","1695","167","267","2723","66349","7738","475","326","410","4635","7804","2631","3870","71","16","48","759","393","179","66","010","955800"],
|
||||
["Census Tract 9559, Guam, Guam","3185","322","324","3023","1011","65","740","66","99","1230","64833","3150","237","90","119","2201","3185","650","1461","50","28","39","389","562","139","66","010","955900"],
|
||||
["Census Tract 9560, Guam, Guam","2572","260","285","2256","536","46","481","77","42","855","59375","2524","337","208","179","1227","2572","1693","419","5","3","20","259","159","60","66","010","956000"],
|
||||
["Census Tract 9561, Guam, Guam","2011","218","236","1846","528","50","425","36","53","721","77500","1981","150","98","84","1301","2011","1306","239","12","1","16","263","169","69","66","010","956100"],
|
||||
["Census Tract 9562, Guam, Guam","3945","469","420","3270","797","69","747","68","66","1255","77455","3488","220","71","122","2406","3945","1911","833","20","2","35","471","643","158","66","010","956200"],
|
||||
["Census Tract 9563, Guam, Guam","2385","278","250","2173","592","54","429","64","58","822","57031","2346","242","140","121","1133","2385","1316","748","7","2","14","181","112","51","66","010","956300"],
|
||||
["Census Tract 9804, Guam, Guam","0","0","0","0","0","0","0","0","0","0","-666666666","0","0","0","0","0","0","0","0","0","0","0","0","0","0","66","010","980400"],
|
||||
["Census Tract 9801, Guam, Guam","42","7","9","41","12","0","11","2","1","17","60000","42","6","0","3","28","42","26","7","0","0","0","9","0","0","66","010","980100"],
|
||||
["Census Tract 9802, Guam, Guam","0","0","0","0","0","0","0","0","0","0","-666666666","0","0","0","0","0","0","0","0","0","0","0","0","0","0","66","010","980200"],
|
||||
["Census Tract 9803, Guam, Guam","0","0","0","0","0","0","0","0","0","0","-666666666","0","0","0","0","0","0","0","0","0","0","0","0","0","0","66","010","980300"],
|
||||
["Census Tract 9900, Guam, Guam","0","0","0","0","0","0","0","0","0","0","-666666666","0","0","0","0","0","0","0","0","0","0","0","0","0","0","66","010","990000"]]
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,134 @@
|
|||
,NAME,Total population in 2009,Total male high school graduates 25 and over,Total female high school graduates 25 and over,Total asked enrolled in college or graduate school (excludes military housing),Total males in labor force,Total males not in labor force,Total females in labor force,Total females not in labor force,Males enrolled in college or graduate school (excludes military housing),Females enrolled in college or graduate school (excludes military housing),Median household income in 2009 ($),Total Household poverty level IN 2019,Household poverty level Under 0.50 IN 2019,Household poverty level Under 0.74 IN 2019,Household poverty level Under 0.99 IN 2019,Household poverty level Over 2.0 IN 2019,Total population surveyed on racial data,Native Hawaiian or Pacific,Asian,White,Black or African American,American Indian / Alaska Native,other races,two or more races,Hispanic or Latino,state,county,tract,Territory Median Income
|
||||
0,"Census Tract 9505, Eastern District, American Samoa",2623.0,305.0,308.0,2044.0,443.0,42.0,369.0,44.0,175.0,164.0,26000.0,2151.0,538.0,338.0,329.0,352.0,2623.0,2021.0,342.0,77.0,0.0,0.0,99.0,84.0,107.0,60,010,950500,26352.0
|
||||
1,"Census Tract 9503, Eastern District, American Samoa",2415.0,325.0,345.0,2293.0,418.0,35.0,379.0,53.0,173.0,174.0,33631.0,2414.0,448.0,332.0,320.0,565.0,2415.0,2259.0,39.0,6.0,0.0,0.0,1.0,110.0,14.0,60,010,950300,26352.0
|
||||
2,"Census Tract 9501, Eastern District, American Samoa",1487.0,248.0,214.0,1404.0,269.0,38.0,254.0,39.0,103.0,106.0,24219.0,1485.0,448.0,243.0,146.0,200.0,1487.0,1449.0,3.0,1.0,1.0,0.0,1.0,32.0,6.0,60,010,950100,26352.0
|
||||
3,"Census Tract 9502, Eastern District, American Samoa",1158.0,164.0,143.0,1096.0,221.0,23.0,163.0,18.0,84.0,75.0,28333.0,1148.0,236.0,172.0,175.0,335.0,1158.0,1092.0,11.0,7.0,0.0,0.0,1.0,47.0,5.0,60,010,950200,26352.0
|
||||
4,"Census Tract 9506, Eastern District, American Samoa",3218.0,452.0,458.0,3031.0,645.0,57.0,573.0,56.0,216.0,242.0,26970.0,3188.0,774.0,462.0,439.0,666.0,3218.0,2781.0,167.0,9.0,1.0,0.0,5.0,255.0,40.0,60,010,950600,26352.0
|
||||
5,"Census Tract 9507, Eastern District, American Samoa",2727.0,381.0,364.0,2489.0,569.0,60.0,456.0,48.0,208.0,183.0,29083.0,2612.0,593.0,458.0,315.0,494.0,2727.0,2366.0,217.0,29.0,8.0,2.0,10.0,95.0,38.0,60,010,950700,26352.0
|
||||
6,"Census Tract 9509, Eastern District, American Samoa",3431.0,512.0,476.0,3239.0,718.0,76.0,682.0,89.0,238.0,244.0,30643.0,3412.0,724.0,510.0,573.0,623.0,3431.0,2988.0,332.0,28.0,0.0,0.0,2.0,81.0,27.0,60,010,950900,26352.0
|
||||
0,"Census Tract 9518, Manu'a District, American Samoa",832.0,169.0,119.0,784.0,213.0,23.0,148.0,36.0,63.0,63.0,26818.0,831.0,167.0,155.0,181.0,116.0,832.0,800.0,0.0,2.0,0.0,0.0,0.0,30.0,3.0,60,020,951800,26352.0
|
||||
0,"Census Tract 9519, Rose Island, American Samoa",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60,030,951900,
|
||||
0,"Census Tract 9520, Swains Island, American Samoa",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60,040,952000,
|
||||
0,"Census Tract 9510, Western District, American Samoa",3593.0,484.0,488.0,3386.0,738.0,34.0,559.0,25.0,272.0,225.0,24688.0,3572.0,1032.0,693.0,578.0,399.0,3593.0,3002.0,473.0,10.0,0.0,0.0,5.0,103.0,22.0,60,050,951000,26352.0
|
||||
1,"Census Tract 9511, Western District, American Samoa",6252.0,1007.0,961.0,5818.0,1162.0,80.0,842.0,72.0,477.0,422.0,24227.0,6201.0,2026.0,1206.0,876.0,725.0,6252.0,5765.0,212.0,50.0,1.0,0.0,1.0,223.0,40.0,60,050,951100,26352.0
|
||||
2,"Census Tract 9512.01, Western District, American Samoa",3406.0,411.0,386.0,2896.0,625.0,33.0,483.0,43.0,239.0,206.0,29107.0,3061.0,723.0,517.0,451.0,408.0,3406.0,3030.0,230.0,12.0,0.0,2.0,4.0,128.0,11.0,60,050,951201,26352.0
|
||||
3,"Census Tract 9512.02, Western District, American Samoa",4582.0,586.0,616.0,4322.0,906.0,59.0,814.0,64.0,319.0,320.0,31714.0,4535.0,784.0,687.0,577.0,1184.0,4582.0,3903.0,355.0,50.0,3.0,6.0,3.0,262.0,31.0,60,050,951202,26352.0
|
||||
4,"Census Tract 9512.03, Western District, American Samoa",4994.0,605.0,648.0,4721.0,964.0,89.0,896.0,106.0,330.0,340.0,32196.0,4948.0,1019.0,724.0,626.0,1199.0,4994.0,4308.0,264.0,61.0,3.0,3.0,4.0,351.0,33.0,60,050,951203,26352.0
|
||||
5,"Census Tract 9513, Western District, American Samoa",3010.0,443.0,419.0,2844.0,632.0,115.0,542.0,107.0,240.0,220.0,27330.0,2994.0,791.0,488.0,384.0,593.0,3010.0,2833.0,97.0,10.0,1.0,0.0,1.0,68.0,11.0,60,050,951300,26352.0
|
||||
6,"Census Tract 9515, Western District, American Samoa",1689.0,270.0,235.0,1588.0,355.0,77.0,307.0,96.0,139.0,110.0,28611.0,1674.0,484.0,184.0,217.0,246.0,1689.0,1593.0,14.0,2.0,1.0,0.0,0.0,79.0,2.0,60,050,951500,26352.0
|
||||
7,"Census Tract 9516, Western District, American Samoa",4293.0,629.0,565.0,4077.0,1075.0,306.0,937.0,351.0,311.0,322.0,28958.0,4280.0,990.0,726.0,621.0,889.0,4293.0,3900.0,122.0,20.0,5.0,0.0,2.0,244.0,16.0,60,050,951600,26352.0
|
||||
0,"Census Tract 9501, Guam, Guam",1347.0,7.0,6.0,77.0,25.0,1.0,14.0,0.0,2.0,29.0,40000.0,80.0,6.0,5.0,12.0,32.0,1347.0,13.0,128.0,764.0,159.0,11.0,63.0,111.0,221.0,66,010,950100,58289.0
|
||||
1,"Census Tract 9502, Guam, Guam",626.0,9.0,9.0,87.0,23.0,2.0,19.0,2.0,3.0,30.0,43750.0,102.0,6.0,0.0,10.0,44.0,626.0,29.0,91.0,296.0,56.0,3.0,32.0,86.0,81.0,66,010,950200,58289.0
|
||||
2,"Census Tract 9503, Guam, Guam",629.0,4.0,1.0,6.0,1.0,0.0,1.0,0.0,0.0,1.0,53750.0,6.0,0.0,0.0,0.0,0.0,629.0,3.0,620.0,5.0,0.0,0.0,0.0,1.0,0.0,66,010,950300,58289.0
|
||||
3,"Census Tract 9504.01, Guam, Guam",5809.0,669.0,581.0,5273.0,1447.0,150.0,1191.0,141.0,151.0,2096.0,53309.0,5688.0,455.0,322.0,376.0,2836.0,5809.0,1592.0,3775.0,54.0,14.0,9.0,3.0,359.0,70.0,66,010,950401,58289.0
|
||||
4,"Census Tract 9504.02, Guam, Guam",6606.0,735.0,668.0,5972.0,1581.0,168.0,1234.0,157.0,145.0,2198.0,50980.0,6579.0,834.0,472.0,531.0,2759.0,6606.0,2722.0,3135.0,122.0,32.0,0.0,13.0,574.0,101.0,66,010,950402,58289.0
|
||||
5,"Census Tract 9505.01, Guam, Guam",1834.0,211.0,188.0,1638.0,398.0,45.0,295.0,48.0,35.0,588.0,42054.0,1803.0,309.0,166.0,171.0,646.0,1834.0,1060.0,560.0,23.0,2.0,0.0,1.0,186.0,39.0,66,010,950501,58289.0
|
||||
6,"Census Tract 9505.02, Guam, Guam",5343.0,660.0,577.0,4952.0,1387.0,115.0,1052.0,86.0,160.0,1932.0,55833.0,5285.0,532.0,206.0,297.0,2911.0,5343.0,1833.0,2813.0,183.0,46.0,2.0,36.0,421.0,110.0,66,010,950502,58289.0
|
||||
7,"Census Tract 9507.01, Guam, Guam",5213.0,549.0,535.0,4849.0,1327.0,75.0,1088.0,86.0,162.0,1927.0,66023.0,5187.0,316.0,190.0,224.0,3283.0,5213.0,1279.0,2776.0,448.0,64.0,12.0,50.0,556.0,134.0,66,010,950701,58289.0
|
||||
8,"Census Tract 9507.02, Guam, Guam",4020.0,485.0,449.0,3640.0,930.0,89.0,742.0,61.0,102.0,1338.0,56406.0,3978.0,513.0,294.0,235.0,1894.0,4020.0,2093.0,1426.0,61.0,17.0,3.0,2.0,408.0,44.0,66,010,950702,58289.0
|
||||
9,"Census Tract 9508.01, Guam, Guam",3921.0,503.0,470.0,3607.0,932.0,127.0,836.0,124.0,66.0,1365.0,51528.0,3898.0,545.0,217.0,210.0,1804.0,3921.0,2058.0,1539.0,36.0,8.0,2.0,4.0,273.0,36.0,66,010,950801,58289.0
|
||||
10,"Census Tract 9508.02, Guam, Guam",4110.0,483.0,550.0,3846.0,894.0,102.0,746.0,111.0,97.0,1580.0,35372.0,4099.0,691.0,292.0,375.0,1531.0,4110.0,2055.0,1629.0,51.0,3.0,2.0,14.0,352.0,56.0,66,010,950802,58289.0
|
||||
11,"Census Tract 9509, Guam, Guam",4653.0,531.0,506.0,4404.0,1149.0,94.0,1014.0,94.0,98.0,1837.0,53221.0,4638.0,386.0,196.0,284.0,2672.0,4653.0,1048.0,3251.0,40.0,2.0,5.0,16.0,291.0,57.0,66,010,950900,58289.0
|
||||
12,"Census Tract 9510, Guam, Guam",3449.0,441.0,400.0,3240.0,821.0,74.0,739.0,57.0,105.0,1277.0,51806.0,3400.0,325.0,175.0,219.0,1743.0,3449.0,1137.0,1993.0,37.0,4.0,3.0,3.0,271.0,50.0,66,010,951000,58289.0
|
||||
13,"Census Tract 9511, Guam, Guam",6498.0,775.0,751.0,6108.0,1745.0,98.0,1498.0,134.0,216.0,2433.0,55673.0,6472.0,517.0,279.0,356.0,3630.0,6498.0,1974.0,3791.0,155.0,26.0,9.0,23.0,511.0,118.0,66,010,951100,58289.0
|
||||
14,"Census Tract 9516, Guam, Guam",142.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,142.0,111.0,16.0,3.0,4.0,0.0,1.0,7.0,3.0,66,010,951600,58289.0
|
||||
15,"Census Tract 9517, Guam, Guam",2239.0,193.0,198.0,1910.0,523.0,24.0,442.0,40.0,57.0,756.0,78333.0,2014.0,138.0,46.0,97.0,1387.0,2239.0,779.0,993.0,149.0,21.0,4.0,22.0,269.0,57.0,66,010,951700,58289.0
|
||||
16,"Census Tract 9518, Guam, Guam",99.0,18.0,13.0,83.0,27.0,1.0,18.0,3.0,1.0,33.0,50000.0,92.0,2.0,5.0,3.0,36.0,99.0,80.0,14.0,0.0,0.0,0.0,0.0,5.0,0.0,66,010,951800,58289.0
|
||||
17,"Census Tract 9519.01, Guam, Guam",4081.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,4081.0,797.0,2733.0,197.0,34.0,5.0,24.0,287.0,47.0,66,010,951901,58289.0
|
||||
18,"Census Tract 9519.02, Guam, Guam",3484.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,3484.0,575.0,2057.0,456.0,52.0,6.0,48.0,262.0,108.0,66,010,951902,58289.0
|
||||
19,"Census Tract 9522, Guam, Guam",3273.0,322.0,333.0,3102.0,900.0,84.0,749.0,105.0,58.0,1282.0,49355.0,3253.0,267.0,161.0,185.0,1805.0,3273.0,912.0,1845.0,125.0,26.0,1.0,13.0,349.0,47.0,66,010,952200,58289.0
|
||||
20,"Census Tract 9523, Guam, Guam",2829.0,250.0,289.0,2639.0,776.0,67.0,684.0,71.0,54.0,1080.0,59083.0,2811.0,233.0,141.0,128.0,1763.0,2829.0,848.0,1355.0,242.0,29.0,11.0,27.0,307.0,68.0,66,010,952300,58289.0
|
||||
21,"Census Tract 9524, Guam, Guam",1637.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,1637.0,614.0,737.0,72.0,16.0,0.0,18.0,176.0,46.0,66,010,952400,58289.0
|
||||
22,"Census Tract 9527, Guam, Guam",4468.0,537.0,542.0,3973.0,1016.0,78.0,874.0,68.0,117.0,1550.0,67578.0,4302.0,367.0,183.0,208.0,2623.0,4468.0,2929.0,823.0,253.0,19.0,0.0,11.0,423.0,136.0,66,010,952700,58289.0
|
||||
23,"Census Tract 9528, Guam, Guam",,,,,,,,,,,,,,,,,,,,,,,,,,66,010,952800,
|
||||
24,"Census Tract 9529, Guam, Guam",5145.0,599.0,604.0,4121.0,1096.0,96.0,971.0,99.0,132.0,1631.0,62031.0,4430.0,349.0,125.0,215.0,2754.0,5145.0,3508.0,779.0,234.0,27.0,9.0,41.0,540.0,133.0,66,010,952900,58289.0
|
||||
25,"Census Tract 9530, Guam, Guam",3388.0,359.0,419.0,3023.0,775.0,85.0,742.0,100.0,112.0,1222.0,49605.0,3251.0,360.0,178.0,189.0,1777.0,3388.0,1990.0,747.0,157.0,16.0,0.0,22.0,453.0,98.0,66,010,953000,58289.0
|
||||
26,"Census Tract 9531.01, Guam, Guam",3966.0,530.0,500.0,3720.0,1029.0,110.0,889.0,75.0,111.0,1435.0,76413.0,3947.0,250.0,133.0,202.0,2659.0,3966.0,2585.0,551.0,236.0,15.0,1.0,20.0,544.0,171.0,66,010,953101,58289.0
|
||||
27,"Census Tract 9531.02, Guam, Guam",3098.0,362.0,380.0,2885.0,776.0,50.0,637.0,80.0,107.0,1078.0,67841.0,3074.0,196.0,114.0,161.0,1948.0,3098.0,1891.0,609.0,122.0,16.0,1.0,13.0,445.0,116.0,66,010,953102,58289.0
|
||||
28,"Census Tract 9532, Guam, Guam",2611.0,351.0,356.0,2410.0,614.0,72.0,577.0,77.0,48.0,960.0,67813.0,2597.0,286.0,80.0,137.0,1612.0,2611.0,1725.0,343.0,118.0,28.0,3.0,5.0,385.0,105.0,66,010,953200,58289.0
|
||||
29,"Census Tract 9533, Guam, Guam",3808.0,461.0,449.0,3478.0,902.0,85.0,769.0,94.0,78.0,1352.0,47419.0,3764.0,499.0,256.0,243.0,1889.0,3808.0,2268.0,910.0,137.0,18.0,0.0,11.0,452.0,99.0,66,010,953300,58289.0
|
||||
30,"Census Tract 9534, Guam, Guam",943.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,943.0,609.0,120.0,61.0,10.0,0.0,9.0,129.0,38.0,66,010,953400,58289.0
|
||||
31,"Census Tract 9535, Guam, Guam",,,,,,,,,,,,,,,,,,,,,,,,,,66,010,953500,
|
||||
32,"Census Tract 9536, Guam, Guam",3616.0,418.0,408.0,3349.0,925.0,87.0,773.0,78.0,103.0,1295.0,71711.0,3574.0,278.0,171.0,104.0,2372.0,3616.0,2403.0,427.0,144.0,16.0,5.0,16.0,596.0,278.0,66,010,953600,58289.0
|
||||
33,"Census Tract 9539, Guam, Guam",4192.0,558.0,522.0,3831.0,946.0,97.0,789.0,74.0,85.0,1456.0,62386.0,4174.0,560.0,256.0,232.0,2215.0,4192.0,3101.0,328.0,176.0,3.0,5.0,39.0,532.0,137.0,66,010,953900,58289.0
|
||||
34,"Census Tract 9540, Guam, Guam",2106.0,259.0,234.0,1961.0,527.0,29.0,446.0,39.0,61.0,732.0,83889.0,2102.0,111.0,58.0,88.0,1447.0,2106.0,1283.0,277.0,207.0,11.0,1.0,16.0,308.0,82.0,66,010,954000,58289.0
|
||||
35,"Census Tract 9543, Guam, Guam",1532.0,153.0,172.0,1405.0,426.0,41.0,325.0,39.0,42.0,534.0,77125.0,1526.0,131.0,63.0,58.0,993.0,1532.0,932.0,160.0,160.0,10.0,3.0,14.0,237.0,61.0,66,010,954300,58289.0
|
||||
36,"Census Tract 9544, Guam, Guam",53.0,7.0,4.0,48.0,14.0,4.0,10.0,2.0,0.0,19.0,92500.0,50.0,8.0,0.0,1.0,32.0,53.0,39.0,4.0,1.0,0.0,0.0,0.0,9.0,0.0,66,010,954400,58289.0
|
||||
37,"Census Tract 9545, Guam, Guam",2483.0,9.0,7.0,69.0,20.0,1.0,14.0,3.0,3.0,21.0,40000.0,83.0,12.0,15.0,3.0,28.0,2483.0,71.0,203.0,1444.0,257.0,35.0,98.0,219.0,324.0,66,010,954500,58289.0
|
||||
38,"Census Tract 9547, Guam, Guam",2089.0,287.0,255.0,1910.0,497.0,71.0,407.0,48.0,52.0,729.0,54931.0,2061.0,238.0,108.0,95.0,1129.0,2089.0,1259.0,485.0,111.0,30.0,1.0,3.0,197.0,53.0,66,010,954700,58289.0
|
||||
39,"Census Tract 9548, Guam, Guam",2426.0,288.0,273.0,2183.0,472.0,99.0,444.0,83.0,31.0,833.0,47969.0,2416.0,504.0,186.0,173.0,979.0,2426.0,1638.0,489.0,69.0,5.0,0.0,2.0,217.0,68.0,66,010,954800,58289.0
|
||||
40,"Census Tract 9551, Guam, Guam",3550.0,514.0,473.0,3285.0,873.0,78.0,677.0,80.0,69.0,1200.0,80909.0,3518.0,265.0,165.0,162.0,2364.0,3550.0,2554.0,168.0,340.0,28.0,5.0,25.0,409.0,175.0,66,010,955100,58289.0
|
||||
41,"Census Tract 9552, Guam, Guam",2317.0,367.0,322.0,2116.0,529.0,56.0,416.0,39.0,31.0,799.0,67813.0,2295.0,252.0,105.0,149.0,1358.0,2317.0,2016.0,68.0,59.0,7.0,0.0,3.0,163.0,90.0,66,010,955200,58289.0
|
||||
42,"Census Tract 9553, Guam, Guam",1604.0,230.0,235.0,1481.0,305.0,50.0,264.0,28.0,25.0,582.0,51667.0,1600.0,267.0,82.0,84.0,726.0,1604.0,1362.0,43.0,65.0,4.0,0.0,1.0,129.0,50.0,66,010,955300,58289.0
|
||||
43,"Census Tract 9554, Guam, Guam",647.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,-999999999.0,647.0,565.0,10.0,16.0,0.0,2.0,3.0,51.0,24.0,66,010,955400,58289.0
|
||||
44,"Census Tract 9556, Guam, Guam",1008.0,125.0,118.0,940.0,267.0,11.0,209.0,11.0,22.0,355.0,81250.0,1001.0,63.0,36.0,48.0,640.0,1008.0,683.0,168.0,37.0,8.0,1.0,4.0,106.0,24.0,66,010,955600,58289.0
|
||||
45,"Census Tract 9557, Guam, Guam",4940.0,625.0,550.0,4642.0,1263.0,132.0,1059.0,105.0,139.0,1803.0,63352.0,4901.0,456.0,235.0,198.0,2988.0,4940.0,2251.0,2010.0,146.0,22.0,2.0,8.0,497.0,95.0,66,010,955700,58289.0
|
||||
46,"Census Tract 9558, Guam, Guam",7804.0,920.0,801.0,7224.0,1979.0,153.0,1695.0,167.0,267.0,2723.0,66349.0,7738.0,475.0,326.0,410.0,4635.0,7804.0,2631.0,3870.0,393.0,71.0,16.0,48.0,759.0,179.0,66,010,955800,58289.0
|
||||
47,"Census Tract 9559, Guam, Guam",3185.0,322.0,324.0,3023.0,1011.0,65.0,740.0,66.0,99.0,1230.0,64833.0,3150.0,237.0,90.0,119.0,2201.0,3185.0,650.0,1461.0,562.0,50.0,28.0,39.0,389.0,139.0,66,010,955900,58289.0
|
||||
48,"Census Tract 9560, Guam, Guam",2572.0,260.0,285.0,2256.0,536.0,46.0,481.0,77.0,42.0,855.0,59375.0,2524.0,337.0,208.0,179.0,1227.0,2572.0,1693.0,419.0,159.0,5.0,3.0,20.0,259.0,60.0,66,010,956000,58289.0
|
||||
49,"Census Tract 9561, Guam, Guam",2011.0,218.0,236.0,1846.0,528.0,50.0,425.0,36.0,53.0,721.0,77500.0,1981.0,150.0,98.0,84.0,1301.0,2011.0,1306.0,239.0,169.0,12.0,1.0,16.0,263.0,69.0,66,010,956100,58289.0
|
||||
50,"Census Tract 9562, Guam, Guam",3945.0,469.0,420.0,3270.0,797.0,69.0,747.0,68.0,66.0,1255.0,77455.0,3488.0,220.0,71.0,122.0,2406.0,3945.0,1911.0,833.0,643.0,20.0,2.0,35.0,471.0,158.0,66,010,956200,58289.0
|
||||
51,"Census Tract 9563, Guam, Guam",2385.0,278.0,250.0,2173.0,592.0,54.0,429.0,64.0,58.0,822.0,57031.0,2346.0,242.0,140.0,121.0,1133.0,2385.0,1316.0,748.0,112.0,7.0,2.0,14.0,181.0,51.0,66,010,956300,58289.0
|
||||
52,"Census Tract 9801, Guam, Guam",42.0,7.0,9.0,41.0,12.0,0.0,11.0,2.0,1.0,17.0,60000.0,42.0,6.0,0.0,3.0,28.0,42.0,26.0,7.0,0.0,0.0,0.0,0.0,9.0,0.0,66,010,980100,58289.0
|
||||
53,"Census Tract 9802, Guam, Guam",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,66,010,980200,
|
||||
54,"Census Tract 9803, Guam, Guam",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,66,010,980300,
|
||||
55,"Census Tract 9804, Guam, Guam",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,66,010,980400,
|
||||
56,"Census Tract 9900, Guam, Guam",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,66,010,990000,
|
||||
0,"Census Tract 9501, Northern Islands Municipality, Commonwealth of the Northern Mariana Islands",7.0,2.0,0.0,7.0,5.0,0.0,0.0,0.0,2.0,0.0,13125.0,7.0,2.0,0.0,1.0,0.0,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69,085,950100,31362.0
|
||||
0,"Census Tract 9501, Rota Municipality, Commonwealth of the Northern Mariana Islands",1893.0,347.0,252.0,1810.0,514.0,96.0,374.0,75.0,109.0,119.0,31289.0,1891.0,357.0,175.0,157.0,548.0,1893.0,1212.0,557.0,27.0,1.0,0.0,3.0,92.0,17.0,69,100,950100,31362.0
|
||||
1,"Census Tract 9900, Rota Municipality, Commonwealth of the Northern Mariana Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69,100,990000,
|
||||
0,"Census Tract 1, Saipan Municipality, Commonwealth of the Northern Mariana Islands",1206.0,161.0,143.0,1164.0,342.0,40.0,231.0,37.0,112.0,65.0,42000.0,1203.0,130.0,86.0,121.0,427.0,1206.0,613.0,413.0,41.0,1.0,1.0,1.0,136.0,10.0,69,110,000100,31362.0
|
||||
1,"Census Tract 2, Saipan Municipality, Commonwealth of the Northern Mariana Islands",1399.0,193.0,148.0,1340.0,328.0,39.0,229.0,39.0,91.0,86.0,31250.0,1394.0,280.0,162.0,189.0,362.0,1399.0,817.0,454.0,36.0,0.0,1.0,3.0,88.0,6.0,69,110,000200,31362.0
|
||||
2,"Census Tract 3, Saipan Municipality, Commonwealth of the Northern Mariana Islands",1616.0,189.0,158.0,1551.0,478.0,53.0,365.0,62.0,93.0,117.0,48281.0,1602.0,147.0,115.0,114.0,754.0,1616.0,497.0,856.0,103.0,3.0,1.0,8.0,148.0,33.0,69,110,000300,31362.0
|
||||
3,"Census Tract 4, Saipan Municipality, Commonwealth of the Northern Mariana Islands",3096.0,477.0,364.0,2993.0,980.0,123.0,651.0,92.0,140.0,122.0,24056.0,3087.0,423.0,325.0,508.0,622.0,3096.0,457.0,2462.0,47.0,5.0,2.0,8.0,114.0,15.0,69,110,000400,31362.0
|
||||
4,"Census Tract 5, Saipan Municipality, Commonwealth of the Northern Mariana Islands",2448.0,311.0,269.0,2367.0,645.0,79.0,442.0,61.0,180.0,154.0,30431.0,2440.0,345.0,243.0,307.0,749.0,2448.0,799.0,1393.0,67.0,3.0,0.0,7.0,177.0,41.0,69,110,000500,31362.0
|
||||
5,"Census Tract 6, Saipan Municipality, Commonwealth of the Northern Mariana Islands",2739.0,337.0,240.0,2633.0,830.0,77.0,562.0,67.0,209.0,170.0,31250.0,2728.0,448.0,181.0,381.0,826.0,2739.0,801.0,1736.0,44.0,2.0,2.0,1.0,153.0,28.0,69,110,000600,31362.0
|
||||
6,"Census Tract 7, Saipan Municipality, Commonwealth of the Northern Mariana Islands",3071.0,508.0,308.0,2937.0,949.0,67.0,599.0,61.0,171.0,176.0,26417.0,3066.0,504.0,377.0,406.0,701.0,3071.0,973.0,1919.0,33.0,2.0,0.0,3.0,140.0,17.0,69,110,000700,31362.0
|
||||
7,"Census Tract 8, Saipan Municipality, Commonwealth of the Northern Mariana Islands",1840.0,224.0,183.0,1771.0,494.0,77.0,315.0,53.0,133.0,104.0,23693.0,1657.0,308.0,200.0,224.0,289.0,1840.0,576.0,1131.0,39.0,1.0,0.0,4.0,87.0,5.0,69,110,000800,31362.0
|
||||
8,"Census Tract 9, Saipan Municipality, Commonwealth of the Northern Mariana Islands",2967.0,438.0,295.0,2870.0,838.0,138.0,520.0,108.0,183.0,172.0,23365.0,2957.0,651.0,395.0,384.0,506.0,2967.0,947.0,1871.0,15.0,2.0,0.0,0.0,132.0,18.0,69,110,000900,31362.0
|
||||
9,"Census Tract 10, Saipan Municipality, Commonwealth of the Northern Mariana Islands",2566.0,335.0,232.0,2446.0,733.0,89.0,487.0,73.0,153.0,156.0,32500.0,2564.0,372.0,255.0,344.0,797.0,2566.0,906.0,1365.0,32.0,6.0,2.0,4.0,249.0,26.0,69,110,001000,31362.0
|
||||
10,"Census Tract 11, Saipan Municipality, Commonwealth of the Northern Mariana Islands",1597.0,248.0,178.0,1536.0,471.0,48.0,294.0,50.0,111.0,84.0,23015.0,1594.0,375.0,169.0,187.0,341.0,1597.0,392.0,1099.0,19.0,3.0,0.0,1.0,83.0,14.0,69,110,001100,31362.0
|
||||
11,"Census Tract 12, Saipan Municipality, Commonwealth of the Northern Mariana Islands",1900.0,321.0,214.0,1835.0,535.0,100.0,327.0,76.0,130.0,101.0,21250.0,1895.0,455.0,207.0,268.0,373.0,1900.0,645.0,1158.0,15.0,0.0,0.0,0.0,82.0,2.0,69,110,001200,31362.0
|
||||
12,"Census Tract 13, Saipan Municipality, Commonwealth of the Northern Mariana Islands",3236.0,434.0,331.0,3119.0,738.0,117.0,521.0,105.0,234.0,229.0,27850.0,3225.0,777.0,351.0,379.0,746.0,3236.0,1702.0,1264.0,23.0,0.0,0.0,5.0,242.0,44.0,69,110,001300,31362.0
|
||||
13,"Census Tract 14, Saipan Municipality, Commonwealth of the Northern Mariana Islands",4266.0,565.0,478.0,4081.0,1007.0,132.0,753.0,129.0,311.0,261.0,36086.0,4263.0,674.0,426.0,487.0,1265.0,4266.0,2577.0,1282.0,46.0,8.0,0.0,2.0,350.0,86.0,69,110,001400,31362.0
|
||||
14,"Census Tract 15, Saipan Municipality, Commonwealth of the Northern Mariana Islands",3306.0,458.0,356.0,3172.0,909.0,88.0,689.0,89.0,194.0,196.0,37550.0,3296.0,456.0,280.0,289.0,1196.0,3306.0,1506.0,1358.0,136.0,12.0,1.0,4.0,282.0,82.0,69,110,001500,31362.0
|
||||
15,"Census Tract 16, Saipan Municipality, Commonwealth of the Northern Mariana Islands",4027.0,569.0,496.0,3816.0,952.0,139.0,748.0,141.0,326.0,315.0,43073.0,4022.0,653.0,309.0,310.0,1425.0,4027.0,3049.0,469.0,36.0,3.0,0.0,3.0,460.0,30.0,69,110,001600,31362.0
|
||||
16,"Census Tract 17, Saipan Municipality, Commonwealth of the Northern Mariana Islands",2105.0,264.0,232.0,2020.0,516.0,55.0,417.0,53.0,126.0,107.0,54554.0,2102.0,226.0,103.0,154.0,1115.0,2105.0,1156.0,497.0,204.0,11.0,2.0,5.0,225.0,55.0,69,110,001700,31362.0
|
||||
17,"Census Tract 9900, Saipan Municipality, Commonwealth of the Northern Mariana Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69,110,990000,
|
||||
0,"Census Tract 9501.01, Tinian Municipality, Commonwealth of the Northern Mariana Islands",1250.0,230.0,149.0,1207.0,402.0,52.0,262.0,33.0,62.0,54.0,31771.0,1249.0,245.0,124.0,114.0,346.0,1250.0,535.0,589.0,9.0,0.0,0.0,3.0,113.0,21.0,69,120,950101,31362.0
|
||||
1,"Census Tract 9501.02, Tinian Municipality, Commonwealth of the Northern Mariana Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69,120,950102,
|
||||
2,"Census Tract 9502.01, Tinian Municipality, Commonwealth of the Northern Mariana Islands",794.0,119.0,95.0,763.0,220.0,28.0,171.0,27.0,38.0,51.0,41875.0,793.0,98.0,73.0,70.0,329.0,794.0,498.0,181.0,13.0,2.0,0.0,0.0,100.0,4.0,69,120,950201,31362.0
|
||||
3,"Census Tract 9502.02, Tinian Municipality, Commonwealth of the Northern Mariana Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69,120,950202,
|
||||
4,"Census Tract 9900, Tinian Municipality, Commonwealth of the Northern Mariana Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69,120,990000,
|
||||
0,"Census Tract 9704, St. Croix Island, United States Virgin Islands",3983.0,666.0,538.0,3362.0,923.0,47.0,881.0,33.0,33.0,82.0,56402.0,3971.0,318.0,71.0,145.0,2742.0,3983.0,0.0,39.0,595.0,2676.0,28.0,430.0,183.0,940.0,78,010,970400,40408.0
|
||||
1,"Census Tract 9701, St. Croix Island, United States Virgin Islands",1832.0,197.0,170.0,1583.0,475.0,23.0,410.0,23.0,1.0,78.0,75625.0,1767.0,69.0,31.0,48.0,1411.0,1832.0,0.0,28.0,986.0,534.0,20.0,149.0,94.0,267.0,78,010,970100,40408.0
|
||||
2,"Census Tract 9702, St. Croix Island, United States Virgin Islands",2446.0,369.0,361.0,1947.0,536.0,41.0,501.0,27.0,32.0,39.0,33400.0,2366.0,438.0,90.0,147.0,1154.0,2446.0,0.0,18.0,299.0,1816.0,14.0,181.0,102.0,695.0,78,010,970200,40408.0
|
||||
3,"Census Tract 9703, St. Croix Island, United States Virgin Islands",3025.0,410.0,459.0,2384.0,638.0,45.0,623.0,50.0,3.0,44.0,34444.0,3002.0,603.0,174.0,219.0,1440.0,3025.0,0.0,17.0,368.0,2081.0,8.0,318.0,215.0,907.0,78,010,970300,40408.0
|
||||
4,"Census Tract 9705, St. Croix Island, United States Virgin Islands",4814.0,567.0,541.0,3195.0,903.0,84.0,815.0,73.0,20.0,51.0,44196.0,3779.0,371.0,145.0,219.0,2251.0,4814.0,44.0,63.0,530.0,2999.0,31.0,581.0,448.0,1405.0,78,010,970500,40408.0
|
||||
5,"Census Tract 9706, St. Croix Island, United States Virgin Islands",2074.0,236.0,195.0,1773.0,570.0,31.0,500.0,27.0,5.0,70.0,62750.0,2074.0,111.0,45.0,86.0,1516.0,2074.0,0.0,59.0,531.0,1089.0,13.0,193.0,170.0,344.0,78,010,970600,40408.0
|
||||
6,"Census Tract 9707, St. Croix Island, United States Virgin Islands",1909.0,280.0,269.0,1651.0,435.0,31.0,400.0,27.0,8.0,34.0,40809.0,1896.0,147.0,80.0,109.0,1179.0,1909.0,0.0,5.0,208.0,1393.0,13.0,184.0,105.0,410.0,78,010,970700,40408.0
|
||||
7,"Census Tract 9708, St. Croix Island, United States Virgin Islands",3157.0,383.0,397.0,2405.0,585.0,62.0,595.0,91.0,19.0,15.0,29375.0,3086.0,648.0,214.0,244.0,1270.0,3157.0,0.0,5.0,59.0,2181.0,19.0,630.0,230.0,1331.0,78,010,970800,40408.0
|
||||
8,"Census Tract 9709, St. Croix Island, United States Virgin Islands",914.0,77.0,151.0,665.0,169.0,18.0,231.0,36.0,6.0,9.0,30385.0,911.0,227.0,48.0,50.0,363.0,914.0,0.0,1.0,40.0,779.0,1.0,67.0,22.0,135.0,78,010,970900,40408.0
|
||||
9,"Census Tract 9710, St. Croix Island, United States Virgin Islands",2041.0,270.0,259.0,1690.0,430.0,44.0,404.0,42.0,25.0,33.0,34550.0,2032.0,340.0,128.0,121.0,1018.0,2041.0,0.0,6.0,278.0,1499.0,11.0,118.0,115.0,270.0,78,010,971000,40408.0
|
||||
10,"Census Tract 9711, St. Croix Island, United States Virgin Islands",3079.0,399.0,433.0,2317.0,527.0,99.0,627.0,103.0,26.0,35.0,24036.0,3011.0,743.0,265.0,223.0,1087.0,3079.0,0.0,6.0,171.0,2569.0,5.0,203.0,117.0,683.0,78,010,971100,40408.0
|
||||
11,"Census Tract 9712, St. Croix Island, United States Virgin Islands",3372.0,546.0,506.0,2835.0,712.0,67.0,622.0,44.0,11.0,31.0,34250.0,3366.0,445.0,176.0,200.0,1800.0,3372.0,0.0,21.0,123.0,2708.0,6.0,266.0,215.0,644.0,78,010,971200,40408.0
|
||||
12,"Census Tract 9713, St. Croix Island, United States Virgin Islands",2678.0,361.0,344.0,2191.0,582.0,63.0,536.0,53.0,25.0,33.0,37132.0,2674.0,306.0,145.0,186.0,1397.0,2678.0,0.0,21.0,161.0,2113.0,15.0,231.0,134.0,585.0,78,010,971300,40408.0
|
||||
13,"Census Tract 9714, St. Croix Island, United States Virgin Islands",2550.0,297.0,359.0,1759.0,402.0,20.0,467.0,33.0,9.0,14.0,28641.0,2337.0,570.0,110.0,190.0,996.0,2550.0,0.0,6.0,80.0,2067.0,7.0,261.0,98.0,563.0,78,010,971400,40408.0
|
||||
14,"Census Tract 9715, St. Croix Island, United States Virgin Islands",3130.0,450.0,436.0,2573.0,707.0,26.0,754.0,36.0,14.0,35.0,45292.0,3124.0,255.0,106.0,205.0,1887.0,3130.0,0.0,20.0,129.0,2603.0,11.0,190.0,171.0,557.0,78,010,971500,40408.0
|
||||
15,"Census Tract 9900, St. Croix Island, United States Virgin Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78,010,990000,
|
||||
0,"Census Tract 9501, St. John Island, United States Virgin Islands",1233.0,164.0,130.0,1058.0,322.0,34.0,329.0,32.0,8.0,35.0,52143.0,1228.0,97.0,36.0,63.0,799.0,1233.0,0.0,7.0,570.0,532.0,6.0,37.0,75.0,88.0,78,020,950100,40408.0
|
||||
1,"Census Tract 9502, St. John Island, United States Virgin Islands",2648.0,496.0,401.0,2305.0,853.0,72.0,700.0,99.0,25.0,47.0,49100.0,2623.0,301.0,98.0,134.0,1617.0,2648.0,1.0,13.0,777.0,1420.0,19.0,158.0,248.0,444.0,78,020,950200,40408.0
|
||||
2,"Census Tract 9900, St. John Island, United States Virgin Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78,020,990000,
|
||||
0,"Census Tract 9601, St. Thomas Island, United States Virgin Islands",3329.0,485.0,449.0,2761.0,856.0,108.0,861.0,118.0,37.0,39.0,40234.0,3313.0,339.0,160.0,200.0,1840.0,3329.0,0.0,11.0,409.0,2403.0,7.0,90.0,402.0,542.0,78,030,960100,40408.0
|
||||
1,"Census Tract 9602, St. Thomas Island, United States Virgin Islands",3762.0,556.0,652.0,3101.0,894.0,116.0,871.0,90.0,84.0,22.0,37628.0,3722.0,401.0,175.0,209.0,1959.0,3762.0,0.0,13.0,41.0,3427.0,1.0,62.0,213.0,287.0,78,030,960200,40408.0
|
||||
2,"Census Tract 9603, St. Thomas Island, United States Virgin Islands",3117.0,516.0,560.0,2708.0,692.0,61.0,766.0,65.0,56.0,45.0,41746.0,3099.0,253.0,160.0,159.0,1926.0,3117.0,0.0,1.0,19.0,2838.0,1.0,59.0,199.0,248.0,78,030,960300,40408.0
|
||||
3,"Census Tract 9604, St. Thomas Island, United States Virgin Islands",3872.0,494.0,547.0,3303.0,992.0,52.0,1068.0,65.0,14.0,99.0,50602.0,3868.0,284.0,100.0,176.0,2548.0,3872.0,3.0,98.0,1000.0,2369.0,24.0,106.0,239.0,333.0,78,030,960400,40408.0
|
||||
4,"Census Tract 9605, St. Thomas Island, United States Virgin Islands",5001.0,676.0,580.0,4375.0,1340.0,90.0,1435.0,102.0,14.0,138.0,61962.0,4990.0,253.0,103.0,180.0,3793.0,5001.0,2.0,51.0,1715.0,2608.0,27.0,135.0,430.0,414.0,78,030,960500,40408.0
|
||||
5,"Census Tract 9606, St. Thomas Island, United States Virgin Islands",3114.0,402.0,423.0,2538.0,743.0,59.0,741.0,79.0,30.0,53.0,39393.0,3081.0,286.0,145.0,212.0,1658.0,3114.0,0.0,71.0,332.0,2220.0,13.0,158.0,302.0,561.0,78,030,960600,40408.0
|
||||
6,"Census Tract 9607, St. Thomas Island, United States Virgin Islands",2900.0,486.0,379.0,2489.0,783.0,77.0,688.0,64.0,23.0,51.0,44375.0,2897.0,273.0,135.0,147.0,1797.0,2900.0,0.0,15.0,488.0,2088.0,10.0,63.0,224.0,263.0,78,030,960700,40408.0
|
||||
7,"Census Tract 9608, St. Thomas Island, United States Virgin Islands",3161.0,430.0,428.0,2399.0,687.0,66.0,702.0,88.0,41.0,37.0,41375.0,2837.0,306.0,101.0,207.0,1578.0,3161.0,1.0,33.0,302.0,2351.0,11.0,173.0,278.0,602.0,78,030,960800,40408.0
|
||||
8,"Census Tract 9609, St. Thomas Island, United States Virgin Islands",3721.0,449.0,501.0,3049.0,930.0,107.0,826.0,119.0,57.0,66.0,38444.0,3700.0,478.0,239.0,241.0,1959.0,3721.0,0.0,82.0,437.0,2662.0,24.0,90.0,419.0,490.0,78,030,960900,40408.0
|
||||
9,"Census Tract 9610, St. Thomas Island, United States Virgin Islands",4386.0,681.0,620.0,3582.0,1095.0,135.0,976.0,156.0,60.0,29.0,31429.0,4276.0,559.0,301.0,317.0,1861.0,4386.0,0.0,85.0,217.0,3437.0,11.0,123.0,491.0,1050.0,78,030,961000,40408.0
|
||||
10,"Census Tract 9611, St. Thomas Island, United States Virgin Islands",3780.0,521.0,647.0,3144.0,833.0,114.0,866.0,117.0,24.0,24.0,29135.0,3754.0,511.0,272.0,278.0,1592.0,3780.0,0.0,92.0,84.0,3033.0,5.0,137.0,420.0,623.0,78,030,961100,40408.0
|
||||
11,"Census Tract 9612, St. Thomas Island, United States Virgin Islands",2118.0,261.0,280.0,1608.0,436.0,53.0,432.0,69.0,59.0,13.0,28523.0,1982.0,354.0,153.0,131.0,751.0,2118.0,0.0,23.0,87.0,1688.0,10.0,85.0,210.0,394.0,78,030,961200,40408.0
|
||||
12,"Census Tract 9900, St. Thomas Island, United States Virgin Islands",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-666666666.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78,030,990000,
|
|
|
@ -0,0 +1,260 @@
|
|||
import pytest
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from data_pipeline.etl.sources.census_decennial.constants import (
|
||||
DEC_TERRITORY_PARAMS,
|
||||
DEC_FIELD_NAMES,
|
||||
OUTPUT_RACE_FIELDS,
|
||||
)
|
||||
from data_pipeline.etl.sources.census_decennial.etl import CensusDecennialETL
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
|
||||
def _check_fields_exist(df: pd.DataFrame, field_names: list):
|
||||
for field in field_names:
|
||||
assert field in df.columns
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def territory_params_fixture():
|
||||
return [
|
||||
{
|
||||
"state_abbreviation": "as",
|
||||
"fips": "60",
|
||||
"county_fips": ["010"],
|
||||
"xwalk": DEC_TERRITORY_PARAMS[0]["xwalk"],
|
||||
"median_income": 26352,
|
||||
},
|
||||
{
|
||||
"state_abbreviation": "gu",
|
||||
"fips": "66",
|
||||
"county_fips": ["010"],
|
||||
"xwalk": DEC_TERRITORY_PARAMS[1]["xwalk"],
|
||||
"median_income": 58289,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def extract_path_fixture() -> Path:
|
||||
return Path(__file__).parents[0] / "data/extract"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transform_path_fixture() -> Path:
|
||||
return Path(__file__).parents[0] / "data/transform"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def imputed_path_fixture() -> Path:
|
||||
return Path(__file__).parents[0] / "data/imputation"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def extracted_data_fixture(
|
||||
transform_path_fixture: pd.DataFrame,
|
||||
) -> pd.DataFrame:
|
||||
return pd.read_csv(
|
||||
transform_path_fixture / "usa.csv",
|
||||
# Make sure these columns are string as expected of the original
|
||||
dtype={"state": "object", "county": "object", "tract": "object"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transformed_data_fixture(
|
||||
extracted_data_fixture: pd.DataFrame, imputed_path_fixture: Path
|
||||
) -> pd.DataFrame:
|
||||
"""Load the test data and call the ETL transform"""
|
||||
dec = CensusDecennialETL()
|
||||
dec.df_all = extracted_data_fixture
|
||||
dec.transform(imputed_path_fixture / "census-us-territory-geojson.json")
|
||||
return dec.df_all
|
||||
|
||||
|
||||
###############
|
||||
# Extract tests
|
||||
###############
|
||||
def test_no_files_found(territory_params_fixture):
|
||||
"""Test the ETL raises an exception if the file is not found"""
|
||||
dec = CensusDecennialETL()
|
||||
with pytest.raises(FileNotFoundError):
|
||||
dec.extract(
|
||||
use_cached_data_sources=True,
|
||||
test_territory_params=territory_params_fixture,
|
||||
test_path=Path("/path_does_not_exist"),
|
||||
)
|
||||
|
||||
|
||||
def test_load_data(extract_path_fixture: Path, territory_params_fixture):
|
||||
"""Test the ETL loads and translates the data"""
|
||||
dec = CensusDecennialETL()
|
||||
dec.extract(
|
||||
use_cached_data_sources=True,
|
||||
test_territory_params=territory_params_fixture,
|
||||
test_path=extract_path_fixture,
|
||||
)
|
||||
df = dec.df_all
|
||||
assert len(df) == 64
|
||||
assert len(df.columns) == 30
|
||||
assert df.index.is_unique
|
||||
|
||||
# Columns should not have any census variable names
|
||||
census_vars = list(DEC_TERRITORY_PARAMS[0]["xwalk"].keys()) + list(
|
||||
DEC_TERRITORY_PARAMS[1]["xwalk"].keys()
|
||||
)
|
||||
for var in census_vars:
|
||||
assert var not in df.columns
|
||||
|
||||
# Median income is added for entries with population
|
||||
assert (
|
||||
df.loc[
|
||||
df[field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019] > 0,
|
||||
DEC_FIELD_NAMES.TERRITORY_MEDIAN_INCOME,
|
||||
]
|
||||
> 0
|
||||
).all()
|
||||
assert not (
|
||||
df.loc[
|
||||
df[field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019] == 0,
|
||||
DEC_FIELD_NAMES.TERRITORY_MEDIAN_INCOME,
|
||||
]
|
||||
> 0
|
||||
).any()
|
||||
|
||||
|
||||
#################
|
||||
# Transform tests
|
||||
#################
|
||||
def test_geo_tract_generation(transformed_data_fixture: pd.DataFrame):
|
||||
result = transformed_data_fixture
|
||||
assert field_names.GEOID_TRACT_FIELD in result.columns
|
||||
assert result[field_names.GEOID_TRACT_FIELD].notnull().all()
|
||||
|
||||
# Grab one GEO ID and test it
|
||||
assert (
|
||||
result[field_names.GEOID_TRACT_FIELD][0]
|
||||
== result["state"][0] + result["county"][0] + result["tract"][0]
|
||||
)
|
||||
|
||||
|
||||
def test_merge_tracts(transformed_data_fixture: pd.DataFrame):
|
||||
result = transformed_data_fixture
|
||||
# 69120950200 exists, but the tract split does now
|
||||
assert (
|
||||
result.loc[result[field_names.GEOID_TRACT_FIELD] == "69120950200"]
|
||||
.any()
|
||||
.any()
|
||||
)
|
||||
assert (
|
||||
not result.loc[result[field_names.GEOID_TRACT_FIELD] == "69120950201"]
|
||||
.any()
|
||||
.any()
|
||||
)
|
||||
assert (
|
||||
not result.loc[result[field_names.GEOID_TRACT_FIELD] == "69120950202"]
|
||||
.any()
|
||||
.any()
|
||||
)
|
||||
|
||||
|
||||
def test_remove_invalid_values(transformed_data_fixture: pd.DataFrame):
|
||||
numeric_df = transformed_data_fixture.select_dtypes(include="number")
|
||||
assert not (numeric_df < -999).any().any()
|
||||
|
||||
|
||||
def test_race_fields(transformed_data_fixture: pd.DataFrame):
|
||||
for race_field_name in OUTPUT_RACE_FIELDS:
|
||||
assert race_field_name in transformed_data_fixture.columns
|
||||
assert any(
|
||||
col.startswith(field_names.PERCENT_PREFIX + race_field_name)
|
||||
for col in transformed_data_fixture.columns
|
||||
)
|
||||
|
||||
|
||||
def test_transformation_fields(transformed_data_fixture: pd.DataFrame):
|
||||
_check_fields_exist(
|
||||
transformed_data_fixture,
|
||||
[
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019,
|
||||
field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2019,
|
||||
DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_POPULATION,
|
||||
DEC_FIELD_NAMES.COLLEGE_ATTENDANCE_PERCENT,
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
##################
|
||||
# Imputation tests
|
||||
##################
|
||||
def test_merge_geojson(transformed_data_fixture: pd.DataFrame):
|
||||
_check_fields_exist(transformed_data_fixture, ["STATEFP10", "COUNTYFP10"])
|
||||
|
||||
|
||||
def test_imputation_added(transformed_data_fixture: pd.DataFrame):
|
||||
assert (
|
||||
DEC_FIELD_NAMES.IMPUTED_PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL
|
||||
in transformed_data_fixture.columns
|
||||
)
|
||||
|
||||
# All rows with population > 0 need to have an value (real or imputed)
|
||||
df_has_pop = transformed_data_fixture[
|
||||
transformed_data_fixture[
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019
|
||||
]
|
||||
> 0
|
||||
]
|
||||
assert (
|
||||
df_has_pop[
|
||||
DEC_FIELD_NAMES.IMPUTED_PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL
|
||||
]
|
||||
.notnull()
|
||||
.all()
|
||||
)
|
||||
|
||||
# The imputed value equals the real value when available
|
||||
df_has_real_data = transformed_data_fixture[
|
||||
transformed_data_fixture[
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019
|
||||
].notnull()
|
||||
]
|
||||
assert (
|
||||
df_has_real_data[
|
||||
DEC_FIELD_NAMES.IMPUTED_PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL
|
||||
]
|
||||
== df_has_real_data[
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019
|
||||
]
|
||||
).all()
|
||||
|
||||
# The imputed value exists when no real value exists
|
||||
df_missing_data = transformed_data_fixture[
|
||||
transformed_data_fixture[
|
||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019
|
||||
].isnull()
|
||||
]
|
||||
assert (
|
||||
df_missing_data[
|
||||
df_missing_data[
|
||||
DEC_FIELD_NAMES.IMPUTED_PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL
|
||||
].notnull()
|
||||
][
|
||||
DEC_FIELD_NAMES.IMPUTED_PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL
|
||||
]
|
||||
.notnull()
|
||||
.all()
|
||||
)
|
||||
|
||||
# Test the imputation flag is set
|
||||
df_missing_no_pop = df_missing_data[
|
||||
df_missing_data[
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2019
|
||||
]
|
||||
> 0
|
||||
]
|
||||
assert df_missing_no_pop[
|
||||
field_names.ISLAND_AREAS_IMPUTED_INCOME_FLAG_FIELD
|
||||
].all()
|
|
@ -13,6 +13,7 @@ import requests
|
|||
import urllib3
|
||||
import yaml
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.constants import LOG_LEVEL
|
||||
from data_pipeline.content.schemas.download_schemas import CodebookConfig
|
||||
from data_pipeline.content.schemas.download_schemas import CSVConfig
|
||||
from data_pipeline.content.schemas.download_schemas import ExcelConfig
|
||||
|
@ -48,7 +49,7 @@ def get_module_logger(module_name: str) -> logging.Logger:
|
|||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
logger.setLevel(LOG_LEVEL)
|
||||
logger.propagate = False # don't send log messages to the parent logger (to avoid duplicate log messages)
|
||||
return logger
|
||||
|
||||
|
|
5084
data/data-pipeline/poetry.lock
generated
5084
data/data-pipeline/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "justice40-data-pipeline"
|
||||
version = "0.1.0"
|
||||
version = "1.0.1"
|
||||
description = "ETL, Score and Map Generation of Justice 40 Tool"
|
||||
authors = ["Justice40 Engineering <j40-engineering@lists.usds.gov>"]
|
||||
keywords = ["justice40", "environmental_justice", "python", "etl"]
|
||||
|
@ -17,7 +17,7 @@ packages = [
|
|||
|
||||
[tool.poetry.dependencies]
|
||||
CensusData = "^1.13"
|
||||
certifi = "^2022.12.07" # explicit callout due to https://pyup.io/v/52365/f17/
|
||||
certifi = ">= 2024.07.04" # Due to https://data.safetycli.com/v/72083/f17
|
||||
click = "8.0.4" # pinning for now per https://github.com/psf/black/issues/2964
|
||||
dynaconf = "^3.1.4"
|
||||
geopandas = "^0.11.0"
|
||||
|
@ -27,39 +27,38 @@ jupyter = "^1.0.0"
|
|||
jupyter-contrib-nbextensions = "^0.5.1"
|
||||
marshmallow-dataclass = "^8.5.3"
|
||||
marshmallow-enum = "^1.5.1"
|
||||
matplotlib = "^3.4.2"
|
||||
numpy = "^1.22.1"
|
||||
pandas = "^1.2.5"
|
||||
pandas = "~1.4.3"
|
||||
pylint = "^2.11.1"
|
||||
pillow = "9.3.0"
|
||||
python = "^3.8"
|
||||
python = "^3.10"
|
||||
pypandoc = "^1.6.3"
|
||||
PyYAML = "^6.0"
|
||||
requests = "^2.25.1"
|
||||
tqdm = "4.62.0"
|
||||
tqdm = "^4.66.3"
|
||||
types-requests = "^2.25.0"
|
||||
us = "^2.0.2"
|
||||
xlsxwriter = "^2.0.0"
|
||||
pydantic = "^1.9.0"
|
||||
Rtree = "^1.0.0"
|
||||
fiona = "~1.8.21"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
black = {version = "^21.6b0", allow-prereleases = true}
|
||||
flake8 = "^3.9.2"
|
||||
liccheck = "^0.6.2"
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
black = "^21"
|
||||
flake8 = "^7.1.1"
|
||||
liccheck = "^0.9.2"
|
||||
mypy = "^0.910"
|
||||
openpyxl = "^3.0.7"
|
||||
pylint = "^2.9.6"
|
||||
pytest = "^6.2.4"
|
||||
pytest = "^8.3.3"
|
||||
safety = "^2.3.1"
|
||||
tox = "^3.24.0"
|
||||
pytest-mock = "^3.6.1"
|
||||
pytest-mock = "^3.14.0"
|
||||
tox-poetry = "^0.4.1"
|
||||
pandas-vet = "^0.2.2"
|
||||
pytest-snapshot = "^0.8.1"
|
||||
pytest-snapshot = "^0.9.0"
|
||||
seaborn = "^0.11.2"
|
||||
papermill = "^2.3.4"
|
||||
jupyterlab = "3.4.4"
|
||||
jupyterlab = "^3.6.7"
|
||||
|
||||
[build-system]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
@ -125,6 +124,7 @@ authorized_licenses = [
|
|||
"apache",
|
||||
"apache 2.0",
|
||||
"apache license 2.0",
|
||||
"apache license, version 2.0",
|
||||
"apache software license",
|
||||
"apache software",
|
||||
"gnu lgpl",
|
||||
|
@ -144,3 +144,5 @@ authorized_licenses = [
|
|||
"gpl v3",
|
||||
"historical permission notice and disclaimer (hpnd)",
|
||||
]
|
||||
[tool.liccheck.authorized_packages]
|
||||
ypy-websocket="0.8.4"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
[tox]
|
||||
# required because we use pyproject.toml
|
||||
isolated_build = true
|
||||
envlist = py38, py39, lint, checkdeps, pytest
|
||||
envlist = py310, lint, checkdeps, pytest
|
||||
# only checks python versions installed locally
|
||||
skip_missing_interpreters = true
|
||||
|
||||
|
@ -16,7 +16,9 @@ commands = black data_pipeline
|
|||
# checks the dependencies for security vulnerabilities and open source licenses
|
||||
allowlist_externals = bash
|
||||
commands = pip install -U wheel
|
||||
safety check --ignore 51457 --ignore 44715 # known issue: https://github.com/pyupio/safety/issues/364
|
||||
# known issue: https://github.com/pyupio/safety/issues/364
|
||||
# jinja2 false positive for our use: https://data.safetycli.com/v/70612/f17
|
||||
safety check --ignore 51457 --ignore 44715 --ignore 70612
|
||||
bash scripts/run-liccheck.sh
|
||||
|
||||
[testenv:pytest]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue