diff --git a/.github/workflows/data-checks.yml b/.github/workflows/data-checks.yml deleted file mode 100644 index ac111d2a..00000000 --- a/.github/workflows/data-checks.yml +++ /dev/null @@ -1,44 +0,0 @@ -# This runs tox in the two directories under data -name: Data Checks -on: - pull_request: - paths: - - "data/**" -jobs: - data-pipeline: - runs-on: ubuntu-latest - defaults: - run: - working-directory: data/data-pipeline - strategy: - matrix: - # checks all of the versions allowed in pyproject.toml - python-version: [3.10.15] - steps: - # installs Python - # one execution of the tests per version listed above - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Upgrade wheel - run: pip install -U wheel - - name: Print variables to help debug - uses: hmarr/debug-action@v2 - - name: Load cached Poetry installation - id: cached-poetry-dependencies - uses: actions/cache@v4 - with: - path: ~/.cache/pypoetry/virtualenvs - key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/data-checks.yml') }} - - name: Install poetry - uses: snok/install-poetry@v1 - - name: Print Poetry settings - run: poetry show -v - - name: Install dependencies - run: poetry install - # TODO: investigate why caching layer started failing. - # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - - name: Run tox - run: poetry run tox diff --git a/.github/workflows/deploy_backend_main.yml b/.github/workflows/deploy_backend_main.yml index 1a420687..a68beb69 100644 --- a/.github/workflows/deploy_backend_main.yml +++ b/.github/workflows/deploy_backend_main.yml @@ -4,6 +4,9 @@ on: branches: [main] paths: - "data/**" +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true env: CENSUS_API_KEY: ${{ secrets.CENSUS_API_KEY }} J40_VERSION_LABEL_STRING: ${{ vars.SCORE_VERSION }} @@ -51,15 +54,27 @@ jobs: sudo apt-get update sudo apt-get -y install gdal-bin ogrinfo --version + - name: Cleanup Data + run: | + poetry run python3 -m data_pipeline.application data-cleanup - name: Get Census Data run: | - poetry run python3 data_pipeline/application.py pull-census-data -s aws + poetry run python3 -m data_pipeline.application census-data-download + - name: Extract Data Sources + run: | + poetry run python3 -m data_pipeline.application extract-data-sources + - name: Run ETL + run: | + poetry run python3 -m data_pipeline.application etl-run - name: Generate Score run: | - poetry run python3 data_pipeline/application.py score-full-run + poetry run python3 -m data_pipeline.application score-run + - name: Score Compare + run: | + poetry run python3 -m data_pipeline.comparator compare-score - name: Generate Score Post run: | - poetry run python3 data_pipeline/application.py generate-score-post -s aws + poetry run python3 -m data_pipeline.application generate-score-post - name: Confirm we generated the version of the score we think we did if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' || env.J40_VERSION_LABEL_STRING == 'test' }} run: | @@ -70,7 +85,7 @@ jobs: grep -v "Identified as disadvantaged due to tribal overlap" data_pipeline/data/score/downloadable/* > /dev/null - name: Generate Score Geo run: | - poetry run python3 data_pipeline/application.py geo-score + poetry run python3 -m data_pipeline.application geo-score - name: Run smoketest for 1.0 if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' }} run: | @@ -80,6 +95,7 @@ jobs: poetry run s4cmd put ./data_pipeline/data/score/csv/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/csv --sync-check --recursive --force poetry run s4cmd put ./data_pipeline/files/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/downloadable --sync-check --recursive --force poetry run s4cmd put ./data_pipeline/data/score/downloadable/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/downloadable --sync-check --recursive --force + poetry run s4cmd put ./data_pipeline/data/score/search/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/search --sync-check --recursive --force - name: Deploy 1.0 score post if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' }} run: | @@ -116,7 +132,7 @@ jobs: tippecanoe -v - name: Generate Tiles run: | - poetry run python3 data_pipeline/application.py generate-map-tiles + poetry run python3 -m data_pipeline.application generate-map-tiles - name: Deploy Map to Geoplatform AWS run: | poetry run s4cmd put ./data_pipeline/data/score/geojson/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/geojson --sync-check --recursive --force --delete-removed --num-threads=250 diff --git a/.github/workflows/deploy_frontend_main.yml b/.github/workflows/deploy_frontend_main.yml index 662edeaa..c94a3c15 100644 --- a/.github/workflows/deploy_frontend_main.yml +++ b/.github/workflows/deploy_frontend_main.yml @@ -4,6 +4,9 @@ on: branches: [main] paths: - "client/**/*" +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true jobs: build: runs-on: ubuntu-latest diff --git a/.github/workflows/pr_backend.yml b/.github/workflows/pr_backend.yml new file mode 100644 index 00000000..cba7be0f --- /dev/null +++ b/.github/workflows/pr_backend.yml @@ -0,0 +1,166 @@ +name: Pull Request Backend +on: + pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + # JOB to run change detection + detect-be-changes: + name: Detect backend changes + runs-on: ubuntu-latest + # Required permissions + permissions: + pull-requests: read + # Set job outputs to values from filter step + outputs: + backend: ${{ steps.filter.outputs.backend }} + steps: + # For pull requests it's not necessary to checkout the code + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + backend: + - 'data/**' + - '.github/workflows/pr_backend.yml' + code-quality-checks: + name: Code quality checks and tests - ${{ matrix.python-version }} + needs: detect-be-changes + if: ${{ needs.detect-be-changes.outputs.backend == 'true' }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: data/data-pipeline + strategy: + matrix: + python-version: ['3.10'] + environment: PR + steps: + - name: Checkout source + uses: actions/checkout@v4 + - name: Print variables to help debug + uses: hmarr/debug-action@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Load cached Poetry installation + id: cached-poetry-dependencies + uses: actions/cache@v4 + with: + path: ~/.cache/pypoetry/virtualenvs + key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }} + - name: Install poetry + uses: snok/install-poetry@v1 + - name: Install dependencies + run: poetry install + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + - name: Check code is formatted + run: poetry run black --check data_pipeline/ + - name: Check code style consistency + run: poetry run flake8 -v data_pipeline/ + - name: Run static code analysis + run: poetry run pylint data_pipeline/ + - name: Check library safety + run: poetry run safety check --ignore 51457 --ignore 44715 --ignore 70612 + - name: Run unit tests + run: | + poetry run pytest data_pipeline/ + generate-score-tiles: + name: Score and tile generation - ${{ matrix.python-version }} + needs: detect-be-changes + if: ${{ needs.detect-be-changes.outputs.backend == 'true' }} + runs-on: ubuntu-latest + defaults: + run: + working-directory: data/data-pipeline + strategy: + matrix: + python-version: ['3.10'] + environment: PR + steps: + - name: Checkout source + uses: actions/checkout@v4 + - name: Print variables to help debug + uses: hmarr/debug-action@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Load cached Poetry installation + id: cached-poetry-dependencies + uses: actions/cache@v4 + with: + path: ~/.cache/pypoetry/virtualenvs + key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }} + - name: Install poetry + uses: snok/install-poetry@v1 + - name: Print Poetry settings + run: poetry show -v + - name: Install dependencies + run: poetry add s4cmd && poetry install + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + - name: Install GDAL/ogr2ogr + run: | + sudo apt-get update + sudo apt-get -y install gdal-bin + ogrinfo --version + - name: Cleanup Data + run: | + poetry run python3 -m data_pipeline.application data-cleanup + - name: Get Census Data + run: | + poetry run python3 -m data_pipeline.application census-data-download + - name: Extract Data Sources + run: | + poetry run python3 -m data_pipeline.application extract-data-sources + - name: Run ETL + run: | + poetry run python3 -m data_pipeline.application etl-run + - name: Generate Score + run: | + poetry run python3 -m data_pipeline.application score-run + - name: Score Compare + run: | + poetry run python3 -m data_pipeline.comparator compare-score + - name: Generate Score Post + run: | + poetry run python3 -m data_pipeline.application generate-score-post + - name: Confirm we generated the version of the score we think we did + if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' || env.J40_VERSION_LABEL_STRING == 'test' }} + run: | + grep "Identified as disadvantaged due to tribal overlap" data_pipeline/data/score/downloadable/* > /dev/null + - name: Confirm we generated the version of the score we think we did + if: ${{ env.J40_VERSION_LABEL_STRING == '2.0' || env.J40_VERSION_LABEL_STRING == 'beta' }} + run: | + grep -v "Identified as disadvantaged due to tribal overlap" data_pipeline/data/score/downloadable/* > /dev/null + - name: Generate Score Geo + run: | + poetry run python3 -m data_pipeline.application geo-score + - name: Run smoketest for 1.0 + if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' }} + run: | + poetry run pytest data_pipeline/ -m smoketest + - name: Set timezone for tippecanoe + uses: szenius/set-timezone@v2.0 + with: + timezoneLinux: "America/Los_Angeles" + - name: Get tippecanoe + run: | + sudo apt-get install -y software-properties-common libsqlite3-dev zlib1g-dev + sudo apt-add-repository -y ppa:git-core/ppa + sudo mkdir -p /tmp/tippecanoe-src + sudo git clone https://github.com/mapbox/tippecanoe.git /tmp/tippecanoe-src + - name: Make tippecanoe + working-directory: /tmp/tippecanoe-src + run: | + sudo /usr/bin/bash -c make + mkdir -p /usr/local/bin + cp tippecanoe /usr/local/bin/tippecanoe + tippecanoe -v + - name: Generate Tiles + run: | + poetry run python3 -m data_pipeline.application generate-map-tiles + diff --git a/.github/workflows/pr_frontend.yml b/.github/workflows/pr_frontend.yml new file mode 100644 index 00000000..de4232a2 --- /dev/null +++ b/.github/workflows/pr_frontend.yml @@ -0,0 +1,66 @@ +name: Pull Request Frontend +on: + pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} + cancel-in-progress: true +jobs: + # JOB to run change detection + detect-fe-changes: + name: Detect frontend changes + runs-on: ubuntu-latest + # Required permissions + permissions: + pull-requests: read + # Set job outputs to values from filter step + outputs: + frontend: ${{ steps.filter.outputs.frontend }} + steps: + # For pull requests it's not necessary to checkout the code + - uses: dorny/paths-filter@v3 + id: filter + with: + filters: | + frontend: + - 'client/**' + - '.github/workflows/pr_frontend.yml' + frontend-build: + name: Frontend build - ${{ matrix.node-version }} + needs: detect-fe-changes + if: ${{ needs.detect-fe-changes.outputs.frontend == 'true' }} + runs-on: ubuntu-latest + environment: PR + defaults: + run: + working-directory: client + strategy: + matrix: + node-version: [14.x] + steps: + - uses: actions/checkout@v4 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v2 + with: + node-version: ${{ matrix.node-version }} + - name: Install + run: npm ci + - name: Build + run: npm run build --if-present + env: + # See the client readme for more info on environment variables: + # https://github.com/usds/justice40-tool/blob/main/client/README.md + DATA_SOURCE: cdn + # TODO: Update main URL when either is back up + SITE_URL: "${{ secrets.SITE_URL }}" + MAPBOX_STYLES_READ_TOKEN: "${{ secrets.MAPBOX_STYLES_READ_TOKEN }}" + - name: Get directory contents + run: ls -la public + - name: Lint + run: npm run lint + # Disabling for now due to jsonlint - TODO: put this back + # - name: License Check + # run: npm run licenses + - name: Test + run: npm test + # - name: Check for security vulnerabilities + # run: npm audit --production \ No newline at end of file diff --git a/client/.env.development b/client/.env.development index a9594395..b599cfb5 100644 --- a/client/.env.development +++ b/client/.env.development @@ -12,6 +12,9 @@ GATSBY_DATA_PIPELINE_TRIBAL_PATH=data-pipeline/data/tribal GATSBY_BETA_SCORE_PATH = data-versions/beta/data/score GATSBY_2_0_SCORE_PATH = data-versions/2.0/data/score +GATSBY_DATA_PIPELINE_SEARCH_PATH_LOCAL = data_pipeline/data/score/search/tracts.json +GATSBY_2_0_MAP_TRACT_SEARCH_PATH = data-versions/2.0/data/score/search/tracts.json + GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_XLS=downloadable/beta-communities.xlsx GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_CSV=downloadable/beta-communities.csv GATSBY_FILE_DL_PATH_BETA_SHAPE_FILE_ZIP=downloadable/beta-shapefile-codebook.zip diff --git a/client/.env.production b/client/.env.production index a925c4a2..0e91cc36 100644 --- a/client/.env.production +++ b/client/.env.production @@ -10,6 +10,8 @@ GATSBY_DATA_PIPELINE_TRIBAL_PATH=data-pipeline/data/tribal GATSBY_BETA_SCORE_PATH = data-versions/beta/data/score GATSBY_2_0_SCORE_PATH = data-versions/2.0/data/score +GATSBY_2_0_MAP_TRACT_SEARCH_PATH = data-versions/2.0/data/score/search/tracts.json + GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_XLS=downloadable/beta-communities.xlsx GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_CSV=downloadable/beta-communities.csv GATSBY_FILE_DL_PATH_BETA_SHAPE_FILE_ZIP=downloadable/beta-shapefile-codebook.zip diff --git a/client/README.md b/client/README.md index da2079b3..8984887f 100644 --- a/client/README.md +++ b/client/README.md @@ -1,7 +1,7 @@ [![Staging](https://github.com/usds/justice40-tool/actions/workflows/deploy_fe_staging.yml/badge.svg)](https://github.com/usds/justice40-tool/actions/workflows/deploy_fe_staging.yml) [![Production](https://github.com/usds/justice40-tool/actions/workflows/deploy_fe_main.yml/badge.svg)](https://github.com/usds/justice40-tool/actions/workflows/deploy_fe_main.yml) -# Justice40 Client +# Justice40 Clientss This README contains the following content: @@ -213,8 +213,6 @@ When developing, to use a flag: 5. Install the [CORS chrome extension](https://chrome.google.com/webstore/detail/allow-cors-access-control/lhobafahddgcelffkeicbaginigeejlf?hl=en) in the browser that is launched by the debugger. 6. Set breakpoints in VS code! - - ## Package Versions The following attemps to explain why certain packages versions have been chosen and what their current limitations are @@ -225,4 +223,4 @@ The following attemps to explain why certain packages versions have been chosen | gatsby-cli | 3.14.2 | 4.15.2 | No | when attempting to update - breaks all unit tests. Compatibility warning come up with all plugins but this doesn't seems to effect functionality. This is the latest version we can release without investigating unit tests.| | sass | 1.32.12 | 1.52.3 | No | This version is needed to surpress the dart warnings on / as division for each component. See [here](https://github.com/twbs/bootstrap/issues/34051#issuecomment-845884423) for more information | | uswds | 2.13.3 | 3.0.2 | No | Needs to stay at 2.13.3 for peer dependency on trussworks| -| trussworks | 3.1.0 | 3.1.0 | No | latest! | \ No newline at end of file +| trussworks | 3.1.0 | 3.1.0 | No | latest! | diff --git a/client/src/components/AreaDetail/AreaDetail.tsx b/client/src/components/AreaDetail/AreaDetail.tsx index 2f3656b8..d51c62db 100644 --- a/client/src/components/AreaDetail/AreaDetail.tsx +++ b/client/src/components/AreaDetail/AreaDetail.tsx @@ -1,8 +1,8 @@ /* eslint-disable quotes */ // External Libs: -import React from "react"; -import {MessageDescriptor, useIntl} from "gatsby-plugin-intl"; import {Accordion, Button} from "@trussworks/react-uswds"; +import {MessageDescriptor, useIntl} from "gatsby-plugin-intl"; +import React from "react"; // Components: import Category from "../Category"; @@ -15,11 +15,12 @@ import TractInfo from "../TractInfo"; import TractPrioritization from "../TractPrioritization"; // Styles and constants -import * as styles from "./areaDetail.module.scss"; import * as constants from "../../data/constants"; import * as EXPLORE_COPY from "../../data/copy/explore"; +import * as styles from "./areaDetail.module.scss"; // @ts-ignore +import IslandCopy from "../IslandCopy/IslandCopy"; import launchIcon from "/node_modules/uswds/dist/img/usa-icons/launch.svg"; interface IAreaDetailProps { @@ -1163,6 +1164,9 @@ const AreaDetail = ({properties}: IAreaDetailProps) => { null } percentTractTribal={percentTractTribal} + isGrandfathered={ + properties[constants.IS_GRANDFATHERED] + } /> { - {/* Only show the DonutCopy if Adjacency index is true and the total number of disadv ind == 0 */} + {/* Show IslandCopy if the GeoID matches an island prefix */} + {constants.TILES_ISLAND_AREA_FIPS_CODES.some((code) => { + return properties[constants.GEOID_PROPERTY].startsWith(code); + }) && ( + + )} + + {/* Only show the DonutCopy if Adjacency index is true, the total number of disadv ind == 0, + and not grandfathered. */} {properties[constants.ADJACENCY_EXCEEDS_THRESH] && - properties[constants.TOTAL_NUMBER_OF_DISADVANTAGE_INDICATORS] === 0 && ( + properties[constants.TOTAL_NUMBER_OF_DISADVANTAGE_INDICATORS] === 0 && + !properties[constants.IS_GRANDFATHERED] && ( { + const intl = useIntl(); + const percentileWhole = povertyPercentile ? + parseFloat((povertyPercentile*100).toFixed()) : null; + const threshold = 65; + + return ( +
+
+
+ {intl.formatMessage(EXPLORE_COPY.ISLAND_COPY.LOW_INC)} +
+
+
= threshold ? + styles.invert : + styles.noInvert } + `}> + +
+
+ = threshold ? true : false} + threshold={threshold} + type={'percentile'} + /> +
+
+
+
+ ); +}; + +export default IslandCopy; diff --git a/client/src/components/PrioritizationCopy/PrioritizationCopy.tsx b/client/src/components/PrioritizationCopy/PrioritizationCopy.tsx index 6769ebc3..19fffda7 100644 --- a/client/src/components/PrioritizationCopy/PrioritizationCopy.tsx +++ b/client/src/components/PrioritizationCopy/PrioritizationCopy.tsx @@ -10,6 +10,7 @@ interface IPrioritizationCopy { isAdjacencyThreshMet: boolean, isAdjacencyLowIncome: boolean, isIslandLowIncome: boolean, + isGrandfathered: boolean, tribalCountAK: number | null, tribalCountUS: null, // when this signal is supported add number type percentTractTribal: number | null @@ -38,6 +39,7 @@ const PrioritizationCopy = totalBurdensPrioritized, isAdjacencyThreshMet, isAdjacencyLowIncome, + isGrandfathered, isIslandLowIncome, tribalCountAK, tribalCountUS, @@ -57,8 +59,10 @@ const PrioritizationCopy = } else if (isAdjacencyThreshMet && !isAdjacencyLowIncome) { // if 1-2-1 if ( tribalCountAK === null && tribalCountUS === null) { + if (isGrandfathered) { + prioCopyRendered = EXPLORE_COPY.PRIORITIZATION_COPY.PRIO_GRANDFATHERED_LI; // if 1-2-1-1 - if (percentTractTribal === null) { + } else if (percentTractTribal === null) { prioCopyRendered = EXPLORE_COPY.PRIORITIZATION_COPY.NOT_PRIO_SURR_LI; // if 1-2-1-2 } else if (percentTractTribal === 0) { diff --git a/client/src/components/PrioritizationCopy2/PrioritizationCopy2.tsx b/client/src/components/PrioritizationCopy2/PrioritizationCopy2.tsx index de021bf1..63c92926 100644 --- a/client/src/components/PrioritizationCopy2/PrioritizationCopy2.tsx +++ b/client/src/components/PrioritizationCopy2/PrioritizationCopy2.tsx @@ -40,8 +40,8 @@ const PrioritizationCopy2 = tribalCountUS, percentTractTribal, }:IPrioritizationCopy2) => { - let noStyles = false; - let prioCopy2Rendered; + let prioCopy2Rendered = <>; + // if 1 if ( @@ -165,13 +165,10 @@ const PrioritizationCopy2 = (tribalCountAK !== null && tribalCountAK >= 1) ) { prioCopy2Rendered = EXPLORE_COPY.getPrioANVCopy(tribalCountAK, false); - } else { - prioCopy2Rendered = <>; - noStyles = true; }; return ( -
+
? '' : styles.prioritizationCopy2Container}> {prioCopy2Rendered}
); diff --git a/client/src/components/PrioritizationCopy2/__snapshots__/PrioritizationCopy2.test.tsx.snap b/client/src/components/PrioritizationCopy2/__snapshots__/PrioritizationCopy2.test.tsx.snap index 348e5bbb..393064e7 100644 --- a/client/src/components/PrioritizationCopy2/__snapshots__/PrioritizationCopy2.test.tsx.snap +++ b/client/src/components/PrioritizationCopy2/__snapshots__/PrioritizationCopy2.test.tsx.snap @@ -2,7 +2,9 @@ exports[`rendering of PrioritizationCopy2 Component checks if component renders The lands of Federally Recognized Tribes that cover 2% of this tract are also considered disadvantaged. when totCats = 0, totBurds = 0, isAdj = true, isAdjLI = true, tribal % = 2, 1`] = ` -
+
The lands of Federally Recognized Tribes that cover 2% of this tract are also considered disadvantaged.
@@ -10,7 +12,9 @@ exports[`rendering of PrioritizationCopy2 Component checks if component renders exports[`rendering of PrioritizationCopy2 Component checks if component renders The lands of Federally Recognized Tribes that cover 4% of this tract are also considered disadvantaged. when totCats = 0, totBurds = 1, isAdj = true, isAdjLI = true, tribal % = 4, 1`] = ` -
+
The lands of Federally Recognized Tribes that cover 4% of this tract are also considered disadvantaged.
diff --git a/client/src/data/constants.tsx b/client/src/data/constants.tsx index 2f88b502..585d7eca 100644 --- a/client/src/data/constants.tsx +++ b/client/src/data/constants.tsx @@ -71,6 +71,7 @@ export const TOTAL_NUMBER_OF_INDICATORS = "THRHLD"; export const COUNT_OF_CATEGORIES_DISADV = "CC"; export const SCORE_N_COMMUNITIES = "SN_C"; export const SCORE_N_TRIBAL = "SN_T"; +export const IS_GRANDFATHERED = "SN_GRAND"; export const SIDE_PANEL_STATE = "UI_EXP"; export const SIDE_PANEL_STATE_VALUES = { @@ -109,6 +110,8 @@ export const IS_EXCEED_BOTH_SOCIO_INDICATORS = "N_EBSI"; export const POVERTY_BELOW_200_PERCENTILE = "P200_I_PFS"; export const IS_FEDERAL_POVERTY_LEVEL_200 = "FPL200S"; +// Percentile FPL 200 for islands only +export const CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_PERCENTILE = "FPL200P"; export const HIGHER_ED_PERCENTILE = "CA"; export const IS_HIGHER_ED_PERCENTILE = "CA_LT20"; @@ -387,6 +390,8 @@ process.env.GATSBY_CDN_TILES_BASE_URL; export const TILE_PATH = process.env.DATA_SOURCE === "local" ? process.env.GATSBY_DATA_PIPELINE_SCORE_PATH_LOCAL : -process.env.GATSBY_1_0_SCORE_PATH; +process.env.GATSBY_2_0_SCORE_PATH; -export const MAP_TRACT_SEARCH_PATH = "data_pipeline/data/score/search/tracts.json"; +export const MAP_TRACT_SEARCH_PATH = process.env.DATA_SOURCE === "local" ? +process.env.GATSBY_DATA_PIPELINE_SEARCH_PATH_LOCAL : +process.env.GATSBY_2_0_MAP_TRACT_SEARCH_PATH; diff --git a/client/src/data/copy/explore.tsx b/client/src/data/copy/explore.tsx index 2429f59d..5ec6c40b 100644 --- a/client/src/data/copy/explore.tsx +++ b/client/src/data/copy/explore.tsx @@ -660,6 +660,14 @@ export const PRIORITIZATION_COPY = { bold: boldFn, }} />, + PRIO_GRANDFATHERED_LI: , }; export const getPrioNBurdenCopy = (burdens:string) => { @@ -764,6 +772,13 @@ export const DONUT_COPY = defineMessages({ description: `Navigate to the explore the map page. Click on side panel, this copy may show up`, }, }); +export const ISLAND_COPY = defineMessages({ + LOW_INC: { + id: 'explore.map.page.side.panel.island.copy.low.income', + defaultMessage: 'Low income', + description: `Navigate to the explore the map page. Click on side panel, this copy may show up`, + }, +}); export const COMMUNITY = { OF_FOCUS: AND is located in a U.S. Territory.", "description": "Navigate to the explore the map page. Click on tract, The side panel will show This tract is considered disadvantaged. It is an island territory that meets an adjusted low income threshold." diff --git a/client/src/intl/es.json b/client/src/intl/es.json index 7bd2b14d..e6665a9c 100644 --- a/client/src/intl/es.json +++ b/client/src/intl/es.json @@ -213,6 +213,7 @@ "explore.map.page.side.panel.demo.two.or.more": "Dos o más razas", "explore.map.page.side.panel.demo.white": "Personas de raza blanca", "explore.map.page.side.panel.donut.copy.adj.low.income": "Bajos ingresos ajustados", + "explore.map.page.side.panel.island.copy.low.income": "Bajos ingresos", "explore.map.page.side.panel.donut.copy.complete.surround": "completamente rodeada", "explore.map.page.side.panel.exceed.burden.answer.no": "No", "explore.map.page.side.panel.exceed.burden.answer.yes": "Sí", @@ -356,6 +357,7 @@ "explore.map.page.side.panel.prio.copy.prio.akus": "Los {numAKpoints} pueblos nativos de Alaska y las {numUSpoints} tribus de esta zona que están reconocidas a nivel federal también se consideran desfavorecidos.", "explore.map.page.side.panel.prio.copy.prio.anv": "Los {numAKpoints} pueblos nativos de Alaska y las tribus de esta zona que están reconocidas a nivel federal {also} se consideran desfavorecidos.", "explore.map.page.side.panel.prio.copy.prio.donut": "Este distrito censal se considera desfavorecido. Está rodeado de distritos censales desfavorecidos Y cumple con el umbral ajustado de bajos ingresos. El ajuste no corresponde a ninguna de las categorías.", + "explore.map.page.side.panel.prio.copy.prio.grandfathered": "Este distrito censal se considera desfavorecido porque fue identificado como desfavorecido en la versión 1.0 de esta herramienta.", "explore.map.page.side.panel.prio.copy.prio.frt": "Las tierras de las tribus reconocidas a nivel federal que cubren {amount} de esta extensión se consideran {also} desfavorecidas.", "explore.map.page.side.panel.prio.copy.prio.frt.n.points": "Los {numPoints} que son tribus reconocidas a nivel federal en este distrito censal se consideran {also} desfavorecidos.", "explore.map.page.side.panel.prio.copy.prio.n.burden": "Este distrito censal se considera desfavorecido porque cumple con el umbral de carga Y con el umbral socioeconómico asociado.", diff --git a/client/tsconfig.json b/client/tsconfig.json index 09b97a00..7064fdae 100644 --- a/client/tsconfig.json +++ b/client/tsconfig.json @@ -2,7 +2,7 @@ "compilerOptions": { "module": "commonjs", "target": "es6", - "jsx": "preserve", + "jsx": "react", "lib": ["dom", "es2015", "es2017", "es2019"], "strict": true, "noEmit": true, diff --git a/data/data-pipeline/data_pipeline/comparator.py b/data/data-pipeline/data_pipeline/comparator.py index 860be7bb..97e512c3 100644 --- a/data/data-pipeline/data_pipeline/comparator.py +++ b/data/data-pipeline/data_pipeline/comparator.py @@ -17,6 +17,7 @@ pd.set_option("display.width", 10000) pd.set_option("display.colheader_justify", "left") result_text = [] +WORKING_PATH = constants.TMP_PATH / "Comparator" / "Score" def _add_text(text: str): @@ -38,7 +39,12 @@ def _get_result_doc() -> str: def _read_from_file(file_path: Path): - """Read a CSV file into a Dataframe.""" + """ + Read a CSV file into a Dataframe. + + Args: + file_path (Path): the path of the file to read + """ if not file_path.is_file(): logger.error( f"- No score file exists at {file_path}. " @@ -53,6 +59,219 @@ def _read_from_file(file_path: Path): ).sort_index() +def _add_tract_list(tract_list: list[str]): + """ + Adds a list of tracts to the output grouped by Census state. + + Args: + tract_list (list[str]): a list of tracts + """ + if len(tract_list) > 0: + _add_text("Those tracts are:\n") + # First extract the Census states/territories + states_by_tract = [] + for tract in tract_list: + states_by_tract.append(tract[0:2]) + states = set(states_by_tract) + # Now output the grouped tracts + for state in sorted(states): + tracts_for_state = [ + item for item in tract_list if item.startswith(state) + ] + _add_text( + f"\t{state} = {len(tracts_for_state)} = {', '.join(tracts_for_state)}\n" + ) + + +def _compare_score_columns(prod_df: pd.DataFrame, local_df: pd.DataFrame): + """ + Compare the columns between scores. + + Args: + prod_df (pd.DataFrame): the production score + local_df (pd.DataFrame): the local score + """ + log_info("Comparing columns (production vs local)") + _add_text("## Columns\n") + local_score_df_columns = sorted(local_df.columns.array.tolist()) + production_score_df_columns = sorted(prod_df.columns.array.tolist()) + extra_cols_in_local = set(local_score_df_columns) - set( + production_score_df_columns + ) + extra_cols_in_prod = set(production_score_df_columns) - set( + local_score_df_columns + ) + if len(extra_cols_in_local) == 0 and len(extra_cols_in_prod) == 0: + _add_text("* There are no differences in the column names.\n") + else: + _add_text( + f"* There are {len(extra_cols_in_local)} columns that were added as compared to the production score." + ) + if len(extra_cols_in_local) > 0: + _add_text(f" Those colums are:\n{extra_cols_in_local}") + _add_text( + f"\n* There are {len(extra_cols_in_prod)} columns that were removed as compared to the production score." + ) + if len(extra_cols_in_prod) > 0: + _add_text(f" Those colums are:\n{extra_cols_in_prod}") + + +def _compare_score_results(prod_df: pd.DataFrame, local_df: pd.DataFrame): + """ + Compare the scores. + + Args: + prod_df (pd.DataFrame): the production score + local_df (pd.DataFrame): the local score + """ + log_info("Comparing dataframe contents (production vs local)") + _add_text("\n\n## Scores\n") + + production_row_count = len(prod_df.index) + local_row_count = len(local_df.index) + + # Tract comparison + _add_text( + f"* The production score has {production_row_count:,} census tracts, and the freshly calculated score has {local_row_count:,}." + ) + if production_row_count == local_row_count: + _add_text(" They match!\n") + else: + _add_text(" They don't match. The differences are:\n") + _add_text( + " * New tracts added to the local score are:\n" + f"{local_df.index.difference(prod_df.index).to_list()}" + "\n * Tracts removed from the local score are:\n" + f"{prod_df.index.difference(local_df.index).to_list()}" + "\n" + ) + + # Population comparison + production_total_population = prod_df[field_names.TOTAL_POP_FIELD].sum() + local_total_population = local_df[field_names.TOTAL_POP_FIELD].sum() + + _add_text( + f"* The total population in all census tracts in the production score is {production_total_population:,}. " + f"The total population in all census tracts locally is {local_total_population:,}. " + ) + _add_text( + "They match!\n" + if production_total_population == local_total_population + else f"The difference is {abs(production_total_population - local_total_population):,}.\n" + ) + + dacs_query = f"`{field_names.FINAL_SCORE_N_BOOLEAN}` == True" + production_disadvantaged_tracts_df = prod_df.query(dacs_query) + local_disadvantaged_tracts_df = local_df.query(dacs_query) + + production_disadvantaged_tracts_set = set( + production_disadvantaged_tracts_df.index.array + ) + local_disadvantaged_tracts_set = set( + local_disadvantaged_tracts_df.index.array + ) + + production_pct_of_population_represented = ( + production_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum() + / production_total_population + ) + local_pct_of_population_represented = ( + local_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum() + / local_total_population + ) + + # DACS comparison + _add_text( + f"* There are {len(production_disadvantaged_tracts_set):,} disadvantaged tracts in the production score representing" + f" {production_pct_of_population_represented:.1%} of the total population, and {len(local_disadvantaged_tracts_set):,}" + ) + _add_text( + f" in the locally generated score representing {local_pct_of_population_represented:.1%} of the total population." + ) + _add_text( + " The number of tracts match!\n " + if len(production_disadvantaged_tracts_set) + == len(local_disadvantaged_tracts_set) + else f" The difference is {abs(len(production_disadvantaged_tracts_set) - len(local_disadvantaged_tracts_set))} tract(s).\n " + ) + + removed_tracts = production_disadvantaged_tracts_set.difference( + local_disadvantaged_tracts_set + ) + added_tracts = local_disadvantaged_tracts_set.difference( + production_disadvantaged_tracts_set + ) + _add_text( + f"* There are {len(removed_tracts):,} tract(s) marked as disadvantaged in the production score that are not disadvantaged in the locally" + f" generated score (i.e. disadvantaged tracts that were removed by the new score). " + ) + _add_tract_list(removed_tracts) + + _add_text( + f"\n* There are {len(added_tracts):,} tract(s) marked as disadvantaged in the locally generated score that are not disadvantaged in the" + f" production score (i.e. disadvantaged tracts that were added by the new score). " + ) + _add_tract_list(added_tracts) + + # Grandfathered tracts from v1.0 + grandfathered_tracts = local_df.loc[ + local_df[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0] + ].index + if len(grandfathered_tracts) > 0: + _add_text( + f"* This includes {len(grandfathered_tracts)} grandfathered tract(s) from v1.0 scoring." + ) + _add_tract_list(grandfathered_tracts) + else: + _add_text("* There are NO grandfathered tracts from v1.0 scoring.\n") + + +def _generate_delta(prod_df: pd.DataFrame, local_df: pd.DataFrame): + """ + Generate a delta of scores + + Args: + prod_df (pd.DataFrame): the production score + local_df (pd.DataFrame): the local score + """ + _add_text("\n## Delta\n") + # First we make the columns on two dataframes to be the same to be able to compare + local_score_df_columns = local_df.columns.array.tolist() + production_score_df_columns = prod_df.columns.array.tolist() + extra_cols_in_local = set(local_score_df_columns) - set( + production_score_df_columns + ) + extra_cols_in_prod = set(production_score_df_columns) - set( + local_score_df_columns + ) + trimmed_prod_df = prod_df.drop(extra_cols_in_prod, axis=1) + trimmed_local_df = local_df.drop(extra_cols_in_local, axis=1) + try: + + comparison_results_df = trimmed_prod_df.compare( + trimmed_local_df, align_axis=1, keep_shape=False, keep_equal=False + ).rename({"self": "Production", "other": "Local"}, axis=1, level=1) + + _add_text( + "* I compared all values across all census tracts. Note this ignores any columns that have been added or removed." + f" There are {len(comparison_results_df.index):,} tracts with at least one difference.\n" + ) + + comparison_path = WORKING_PATH / "deltas.csv" + comparison_results_df.to_csv(path_or_buf=comparison_path) + + _add_text(f"* Wrote comparison results to {comparison_path}") + + except ValueError as e: + _add_text( + "* I could not run a full comparison. This is likely because there are column or index (census tract) differences." + " Please examine the logs or run the score comparison locally to find out more.\n" + ) + _add_text( + f"Encountered an exception while performing the comparison: {repr(e)}\n" + ) + + @click.group() def cli(): """ @@ -101,7 +320,6 @@ def compare_score( """ FLOAT_ROUNDING_PLACES = 2 - WORKING_PATH = constants.TMP_PATH / "Comparator" / "Score" log_title("Compare Score", "Compare production score to local score") @@ -132,188 +350,21 @@ def compare_score( production_score_df = production_score_df.round(FLOAT_ROUNDING_PLACES) local_score_df = local_score_df.round(FLOAT_ROUNDING_PLACES) - local_score_df_columns = sorted(local_score_df.columns.array.tolist()) - production_score_df_columns = sorted( - production_score_df.columns.array.tolist() - ) - extra_cols_in_local = set(local_score_df_columns) - set( - production_score_df_columns - ) - extra_cols_in_prod = set(production_score_df_columns) - set( - local_score_df_columns - ) - _add_text("# Score Comparison Summary\n") _add_text( f"Hi! I'm the Score Comparator. I compared the score in production (version {compare_to_version}) to the" " locally calculated score. Here are the results:\n\n" ) - ##################### - # Compare the columns - ##################### - log_info("Comparing columns (production vs local)") - _add_text("## Columns\n") - if len(extra_cols_in_local) == 0 and len(extra_cols_in_prod) == 0: - _add_text("* There are no differences in the column names.\n") - else: - _add_text( - f"* There are {len(extra_cols_in_local)} columns that were added as compared to the production score." - ) - if len(extra_cols_in_local) > 0: - _add_text(f" Those colums are:\n{extra_cols_in_local}") - _add_text( - f"\n* There are {len(extra_cols_in_prod)} columns that were removed as compared to the production score." - ) - if len(extra_cols_in_prod) > 0: - _add_text(f" Those colums are:\n{extra_cols_in_prod}") - - #################### - # Compare the scores - #################### - log_info("Comparing dataframe contents (production vs local)") - _add_text("\n\n## Scores\n") - - production_row_count = len(production_score_df.index) - local_row_count = len(local_score_df.index) - - # Tract comparison - _add_text( - f"* The production score has {production_row_count:,} census tracts, and the freshly calculated score has {local_row_count:,}." - ) - if production_row_count == local_row_count: - _add_text(" They match!\n") - else: - _add_text(" They don't match. The differences are:\n") - _add_text( - " * New tracts added to the local score are:\n" - f"{local_score_df.index.difference(production_score_df.index).to_list()}" - "\n * Tracts removed from the local score are:\n" - f"{production_score_df.index.difference(local_score_df.index).to_list()}" - "\n" - ) - - # Population comparison - production_total_population = production_score_df[ - field_names.TOTAL_POP_FIELD - ].sum() - local_total_population = local_score_df[field_names.TOTAL_POP_FIELD].sum() - - _add_text( - f"* The total population in all census tracts in the production score is {production_total_population:,}. " - f"The total population in all census tracts locally is {local_total_population:,}. " - ) - _add_text( - "They match!\n" - if production_total_population == local_total_population - else f"The difference is {abs(production_total_population - local_total_population):,}.\n" - ) - - dacs_query = f"`{field_names.FINAL_SCORE_N_BOOLEAN}` == True" - production_disadvantaged_tracts_df = production_score_df.query(dacs_query) - local_disadvantaged_tracts_df = local_score_df.query(dacs_query) - - production_disadvantaged_tracts_set = set( - production_disadvantaged_tracts_df.index.array - ) - local_disadvantaged_tracts_set = set( - local_disadvantaged_tracts_df.index.array - ) - - production_pct_of_population_represented = ( - production_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum() - / production_total_population - ) - local_pct_of_population_represented = ( - local_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum() - / local_total_population - ) - - # DACS comparison - _add_text( - f"* There are {len(production_disadvantaged_tracts_set):,} disadvantaged tracts in the production score representing" - f" {production_pct_of_population_represented:.1%} of the total population, and {len(local_disadvantaged_tracts_set):,}" - ) - _add_text( - f" in the locally generated score representing {local_pct_of_population_represented:.1%} of the total population." - ) - _add_text( - " The number of tracts match!\n " - if len(production_disadvantaged_tracts_set) - == len(local_disadvantaged_tracts_set) - else f" The difference is {abs(len(production_disadvantaged_tracts_set) - len(local_disadvantaged_tracts_set))} tract(s).\n " - ) - - removed_tracts = production_disadvantaged_tracts_set.difference( - local_disadvantaged_tracts_set - ) - added_tracts = local_disadvantaged_tracts_set.difference( - production_disadvantaged_tracts_set - ) - _add_text( - f"* There are {len(removed_tracts):,} tract(s) marked as disadvantaged in the production score that are not disadvantaged in the locally" - f" generated score (i.e. disadvantaged tracts that were removed by the new score). " - ) - if len(removed_tracts) > 0: - _add_text(f"Those tracts are:\n{removed_tracts}") - - _add_text( - f"\n* There are {len(added_tracts):,} tract(s) marked as disadvantaged in the locally generated score that are not disadvantaged in the" - f" production score (i.e. disadvantaged tracts that were added by the new score). " - ) - if len(added_tracts) > 0: - _add_text(f"Those tracts are:\n{added_tracts}\n") - - # Grandfathered tracts from v1.0 - grandfathered_tracts = local_score_df.loc[ - local_score_df[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0] - ].index - if len(grandfathered_tracts) > 0: - _add_text( - f"* This includes {len(grandfathered_tracts)} grandfathered tract(s) from v1.0 scoring. They are:\n" - f"{grandfathered_tracts.to_list()}\n" - ) - else: - _add_text("* There are NO grandfathered tracts from v1.0 scoring.\n") - - ################ - # Create a delta - ################ - _add_text("\n## Delta\n") - # First we make the columns on two dataframes to be the same to be able to compare - trimmed_prod_df = production_score_df.drop(extra_cols_in_prod, axis=1) - trimmed_local_df = local_score_df.drop(extra_cols_in_local, axis=1) - try: - - comparison_results_df = trimmed_prod_df.compare( - trimmed_local_df, align_axis=1, keep_shape=False, keep_equal=False - ).rename({"self": "Production", "other": "Local"}, axis=1, level=1) - - _add_text( - "* I compared all values across all census tracts. Note this ignores any columns that have been added or removed." - f" There are {len(comparison_results_df.index):,} tracts with at least one difference.\n" - ) - - comparison_path = WORKING_PATH / "deltas.csv" - comparison_results_df.to_csv(path_or_buf=comparison_path) - - _add_text(f"* Wrote comparison results to {comparison_path}") - - except ValueError as e: - _add_text( - "* I could not run a full comparison. This is likely because there are column or index (census tract) differences." - " Please examine the logs or run the score comparison locally to find out more.\n" - ) - _add_text( - f"Encountered an exception while performing the comparison: {repr(e)}\n" - ) + _compare_score_columns(production_score_df, local_score_df) + _compare_score_results(production_score_df, local_score_df) + _generate_delta(production_score_df, local_score_df) result_doc = _get_result_doc() print(result_doc) # Write the report summary_path = WORKING_PATH / "comparison-summary.md" - with open(summary_path, "w", encoding="utf-8") as f: f.write(result_doc) log_info(f"Wrote comparison summary to {summary_path}") diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 964a76f3..ac53d1f9 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -282,6 +282,7 @@ TILES_SCORE_COLUMNS = { # The NEW final score value INCLUDES the adjacency index. field_names.FINAL_SCORE_N_BOOLEAN: "SN_C", field_names.FINAL_SCORE_N_BOOLEAN_V1_0: "SN_C_V10", + field_names.GRANDFATHERED_N_COMMUNITIES_V1_0: "SN_GRAND", field_names.IS_TRIBAL_DAC: "SN_T", field_names.DIABETES_LOW_INCOME_FIELD: "DLI", field_names.ASTHMA_LOW_INCOME_FIELD: "ALI", @@ -346,6 +347,8 @@ TILES_SCORE_COLUMNS = { # These are the booleans for socioeconomic indicators ## this measures low income boolean field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED: "FPL200S", + # Percentile FPL 200 for islands only + field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_PERCENTILE: "FPL200P", ## Low high school for t&wd field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "N_WKFC_EBSI", field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET", diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py b/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py index 81a029a2..4450672f 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py +++ b/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py @@ -132,7 +132,9 @@ def tile_data_expected(): @pytest.fixture() def create_tile_score_data_input(): - return pd.read_pickle(pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl") + return pd.read_pickle( + pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl" + ) @pytest.fixture() diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/README.md b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/README.md index 01ae8488..4c2693ee 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/README.md +++ b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/README.md @@ -1,23 +1,25 @@ -These files are used as inputs to unit tests. Some notes in their creation is below. +# How to generate the sample data in this folder + +The sample data in this folder can be easily generated by debugging the `data_pipeline/etl/score/etl_score_post.py` file +and exporting data using the debugger console. Examples of this exporting are below. + +## Why in pickle format? + +Exporting as a Pickle file keeps all the metadata about the columns including the data types. If we were to export as CSV then we will need +to code the data types in the test fixtures for all the columns for the comparison to be correct. + +## Exporting the test data + +First, verify the code works as expected before exporting the data. You will not be able to inspect the data exports as they are in binary. +You will be using the debugger to export the data. Note that it is best to export a small subset of the data for faster test execution. + +### create_tile_data test +1. Place a breakpoint in `data_pipeline/etl/score/etl_score_post.py` in the `transform` method right after the call to +`_create_tile_data` and start the debugger running the Generate Post Score command (`generate-score-post`). +1. Partially export the `output_score_county_state_merged_df` and `self.output_score_tiles_df` data to a pickle file once the debugger pauses +at the breakpoint. Use these sample commands in the debugger console. Note that we are using head and tail to have territories in the sample data. -### create_tile_data_expected.pkl -1. Set a breakpoint in the `test_create_tile_data` method in `data_pipeline/etl/score/tests/test_score_post.py` -after the call to `_create_tile_data` and debug the test. -2. Extract a subset of the `output_tiles_df_actual` dataframe. Do not extract the whole score as the file -will be too big and the test will run slow. Also, you need to extract the same tracts that are in -the `create_tile_score_data_input.pkl` input data. For example, use the following command once the breakpoint is reached -to extract a few rows at the top and bottom of the score. This will some capture states and territories. ```python -import pandas as pd -pd.concat([output_tiles_df_actual.head(3), output_tiles_df_actual.tail(3)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl') +pd.concat([output_score_county_state_merged_df.head(3), output_score_county_state_merged_df.tail(4)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl') +pd.concat([self.output_score_tiles_df.head(3), self.output_score_tiles_df.tail(4)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl') ``` - -### create_tile_score_data_input.pkl -1. Set a breakpoint in the transform method in `data_pipeline/etl/score/etl_score_post.py` before the call to -`_create_tile_data` and run the post scoring. -2. Extract a subset of the `output_score_county_state_merged_df` dataframe. Do not extract the whole score as the file -will be too big and the test will run slow. For example, use the following command once the breakpoint is reached -to extract a few rows at the top and bottom of the score. This will some capture states and territories. -```python -pd.concat([output_score_county_state_merged_df.head(3), output_score_county_state_merged_df.tail(3)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl') -``` \ No newline at end of file diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl index 3257e33c..e01164df 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl index 8d21a5b9..3458d992 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py index d3c762c6..23114378 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py @@ -83,7 +83,9 @@ def test_create_score_data( ) -def test_create_tile_data(etl, create_tile_score_data_input, create_tile_data_expected): +def test_create_tile_data( + etl, create_tile_score_data_input, create_tile_data_expected +): output_tiles_df_actual = etl._create_tile_data(create_tile_score_data_input) pdt.assert_frame_equal( output_tiles_df_actual, @@ -158,8 +160,10 @@ def test_load_downloadable_zip(etl, monkeypatch, score_data_expected): def test_create_tract_search_data(census_geojson_sample_data: gpd.GeoDataFrame): # Sanity check assert len(census_geojson_sample_data) > 0 - - result = PostScoreETL()._create_tract_search_data(census_geojson_sample_data) + + result = PostScoreETL()._create_tract_search_data( + census_geojson_sample_data + ) assert isinstance(result, pd.DataFrame) assert not result.columns.empty columns = ["GEOID10", "INTPTLAT10", "INTPTLON10"] diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 9b548f47..57fd387f 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -200,6 +200,11 @@ CENSUS_DECENNIAL_MEDIAN_INCOME_2019 = ( CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019 = f"Percentage households below 100% of federal poverty line in {DEC_DATA_YEAR}" CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019 = f"Percentage households below 200% of federal poverty line in {DEC_DATA_YEAR}" CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019 = f"{CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019}, adjusted and imputed" +CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_PERCENTILE = ( + CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019 + + ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD + + PERCENTILE_FIELD_SUFFIX +) CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2019 = f"Percent individuals age 25 or over with less than high school degree in {DEC_DATA_YEAR}" CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019 = ( f"Unemployment (percent) in {DEC_DATA_YEAR}" diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py index 3a42879e..63f1a163 100644 --- a/data/data-pipeline/data_pipeline/score/score_narwhal.py +++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py @@ -1053,6 +1053,8 @@ class ScoreNarwhal(Score): # Now we set the low income flag only for territories, but we need to rank them # with all other tracts. + # Note: This specific method call will generate the + # CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_PERCENTILE column in the score. ( self.df, island_areas_poverty_200_criteria_field_name, diff --git a/data/data-pipeline/poetry.lock b/data/data-pipeline/poetry.lock index 51b54284..6141851e 100644 --- a/data/data-pipeline/poetry.lock +++ b/data/data-pipeline/poetry.lock @@ -5053,4 +5053,4 @@ test = ["mypy", "pre-commit", "pytest", "pytest-asyncio", "websockets (>=10.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "bdce0f2249243262fbfd1e73df3f2525c8ca624df6da458480636a19db26c4fe" +content-hash = "04639d2eaf33218ba4fef190f76620b00fb2285d86d58458511d85dafd304658" diff --git a/data/data-pipeline/pyproject.toml b/data/data-pipeline/pyproject.toml index e6fff8e8..f7122078 100644 --- a/data/data-pipeline/pyproject.toml +++ b/data/data-pipeline/pyproject.toml @@ -60,6 +60,11 @@ seaborn = "^0.11.2" papermill = "^2.3.4" jupyterlab = "^3.6.7" + +[tool.poetry.group.test.dependencies] +openpyxl = "^3.1.5" +pytest-snapshot = "^0.9.0" + [build-system] build-backend = "poetry.core.masonry.api" requires = ["poetry-core>=1.0.0"] diff --git a/data/data-pipeline/tox.ini b/data/data-pipeline/tox.ini deleted file mode 100644 index ebf462eb..00000000 --- a/data/data-pipeline/tox.ini +++ /dev/null @@ -1,27 +0,0 @@ -[tox] -# required because we use pyproject.toml -isolated_build = true -envlist = py310, lint, checkdeps, pytest -# only checks python versions installed locally -skip_missing_interpreters = true - -[testenv:lint] -deps = pytest -# lints python code in src and tests -commands = black data_pipeline - flake8 data_pipeline - pylint data_pipeline - -[testenv:checkdeps] -# checks the dependencies for security vulnerabilities and open source licenses -allowlist_externals = bash -commands = pip install -U wheel - # known issue: https://github.com/pyupio/safety/issues/364 - # jinja2 false positive for our use: https://data.safetycli.com/v/70612/f17 - safety check --ignore 51457 --ignore 44715 --ignore 70612 - bash scripts/run-liccheck.sh - -[testenv:pytest] -# Run tests -deps = pytest -commands = pytest --full-trace