Merge branch 'main' into nmb/backend-rollout-plans

This commit is contained in:
Neil Martinsen-Burrell 2024-12-10 10:56:46 -06:00
commit a167e3b35b
32 changed files with 713 additions and 294 deletions

View file

@ -1,44 +0,0 @@
# This runs tox in the two directories under data
name: Data Checks
on:
pull_request:
paths:
- "data/**"
jobs:
data-pipeline:
runs-on: ubuntu-latest
defaults:
run:
working-directory: data/data-pipeline
strategy:
matrix:
# checks all of the versions allowed in pyproject.toml
python-version: [3.10.15]
steps:
# installs Python
# one execution of the tests per version listed above
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade wheel
run: pip install -U wheel
- name: Print variables to help debug
uses: hmarr/debug-action@v2
- name: Load cached Poetry installation
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/data-checks.yml') }}
- name: Install poetry
uses: snok/install-poetry@v1
- name: Print Poetry settings
run: poetry show -v
- name: Install dependencies
run: poetry install
# TODO: investigate why caching layer started failing.
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
- name: Run tox
run: poetry run tox

View file

@ -1,5 +1,8 @@
name: Deploy Backend Main
on: workflow_dispatch
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
CENSUS_API_KEY: ${{ secrets.CENSUS_API_KEY }}
J40_VERSION_LABEL_STRING: ${{ vars.SCORE_VERSION }}
@ -47,15 +50,27 @@ jobs:
sudo apt-get update
sudo apt-get -y install gdal-bin
ogrinfo --version
- name: Cleanup Data
run: |
poetry run python3 -m data_pipeline.application data-cleanup
- name: Get Census Data
run: |
poetry run python3 data_pipeline/application.py pull-census-data -s aws
poetry run python3 -m data_pipeline.application census-data-download
- name: Extract Data Sources
run: |
poetry run python3 -m data_pipeline.application extract-data-sources
- name: Run ETL
run: |
poetry run python3 -m data_pipeline.application etl-run
- name: Generate Score
run: |
poetry run python3 data_pipeline/application.py score-full-run
poetry run python3 -m data_pipeline.application score-run
- name: Score Compare
run: |
poetry run python3 -m data_pipeline.comparator compare-score
- name: Generate Score Post
run: |
poetry run python3 data_pipeline/application.py generate-score-post -s aws
poetry run python3 -m data_pipeline.application generate-score-post
- name: Confirm we generated the version of the score we think we did
if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' || env.J40_VERSION_LABEL_STRING == 'test' }}
run: |
@ -66,7 +81,7 @@ jobs:
grep -v "Identified as disadvantaged due to tribal overlap" data_pipeline/data/score/downloadable/* > /dev/null
- name: Generate Score Geo
run: |
poetry run python3 data_pipeline/application.py geo-score
poetry run python3 -m data_pipeline.application geo-score
- name: Run smoketest for 1.0
if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' }}
run: |
@ -76,6 +91,7 @@ jobs:
poetry run s4cmd put ./data_pipeline/data/score/csv/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/csv --sync-check --recursive --force
poetry run s4cmd put ./data_pipeline/files/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/downloadable --sync-check --recursive --force
poetry run s4cmd put ./data_pipeline/data/score/downloadable/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/downloadable --sync-check --recursive --force
poetry run s4cmd put ./data_pipeline/data/score/search/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/search --sync-check --recursive --force
- name: Deploy 1.0 score post
if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' }}
run: |
@ -112,7 +128,7 @@ jobs:
tippecanoe -v
- name: Generate Tiles
run: |
poetry run python3 data_pipeline/application.py generate-map-tiles
poetry run python3 -m data_pipeline.application generate-map-tiles
- name: Deploy Map to Geoplatform AWS
run: |
poetry run s4cmd put ./data_pipeline/data/score/geojson/ s3://${{secrets.S3_DATA_BUCKET}}/data-versions/${{env.J40_VERSION_LABEL_STRING}}/data/score/geojson --sync-check --recursive --force --delete-removed --num-threads=250

View file

@ -4,6 +4,9 @@ on:
branches: [main]
paths:
- "client/**/*"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-latest

166
.github/workflows/pr_backend.yml vendored Normal file
View file

@ -0,0 +1,166 @@
name: Pull Request Backend
on:
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
# JOB to run change detection
detect-be-changes:
name: Detect backend changes
runs-on: ubuntu-latest
# Required permissions
permissions:
pull-requests: read
# Set job outputs to values from filter step
outputs:
backend: ${{ steps.filter.outputs.backend }}
steps:
# For pull requests it's not necessary to checkout the code
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
backend:
- 'data/**'
- '.github/workflows/pr_backend.yml'
code-quality-checks:
name: Code quality checks and tests - ${{ matrix.python-version }}
needs: detect-be-changes
if: ${{ needs.detect-be-changes.outputs.backend == 'true' }}
runs-on: ubuntu-latest
defaults:
run:
working-directory: data/data-pipeline
strategy:
matrix:
python-version: ['3.10']
environment: PR
steps:
- name: Checkout source
uses: actions/checkout@v4
- name: Print variables to help debug
uses: hmarr/debug-action@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Load cached Poetry installation
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }}
- name: Install poetry
uses: snok/install-poetry@v1
- name: Install dependencies
run: poetry install
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
- name: Check code is formatted
run: poetry run black --check data_pipeline/
- name: Check code style consistency
run: poetry run flake8 -v data_pipeline/
- name: Run static code analysis
run: poetry run pylint data_pipeline/
- name: Check library safety
run: poetry run safety check --ignore 51457 --ignore 44715 --ignore 70612
- name: Run unit tests
run: |
poetry run pytest data_pipeline/
generate-score-tiles:
name: Score and tile generation - ${{ matrix.python-version }}
needs: detect-be-changes
if: ${{ needs.detect-be-changes.outputs.backend == 'true' }}
runs-on: ubuntu-latest
defaults:
run:
working-directory: data/data-pipeline
strategy:
matrix:
python-version: ['3.10']
environment: PR
steps:
- name: Checkout source
uses: actions/checkout@v4
- name: Print variables to help debug
uses: hmarr/debug-action@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Load cached Poetry installation
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: env-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('.github/workflows/deploy_backend_main.yml') }}
- name: Install poetry
uses: snok/install-poetry@v1
- name: Print Poetry settings
run: poetry show -v
- name: Install dependencies
run: poetry add s4cmd && poetry install
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
- name: Install GDAL/ogr2ogr
run: |
sudo apt-get update
sudo apt-get -y install gdal-bin
ogrinfo --version
- name: Cleanup Data
run: |
poetry run python3 -m data_pipeline.application data-cleanup
- name: Get Census Data
run: |
poetry run python3 -m data_pipeline.application census-data-download
- name: Extract Data Sources
run: |
poetry run python3 -m data_pipeline.application extract-data-sources
- name: Run ETL
run: |
poetry run python3 -m data_pipeline.application etl-run
- name: Generate Score
run: |
poetry run python3 -m data_pipeline.application score-run
- name: Score Compare
run: |
poetry run python3 -m data_pipeline.comparator compare-score
- name: Generate Score Post
run: |
poetry run python3 -m data_pipeline.application generate-score-post
- name: Confirm we generated the version of the score we think we did
if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' || env.J40_VERSION_LABEL_STRING == 'test' }}
run: |
grep "Identified as disadvantaged due to tribal overlap" data_pipeline/data/score/downloadable/* > /dev/null
- name: Confirm we generated the version of the score we think we did
if: ${{ env.J40_VERSION_LABEL_STRING == '2.0' || env.J40_VERSION_LABEL_STRING == 'beta' }}
run: |
grep -v "Identified as disadvantaged due to tribal overlap" data_pipeline/data/score/downloadable/* > /dev/null
- name: Generate Score Geo
run: |
poetry run python3 -m data_pipeline.application geo-score
- name: Run smoketest for 1.0
if: ${{ env.J40_VERSION_LABEL_STRING == '1.0' }}
run: |
poetry run pytest data_pipeline/ -m smoketest
- name: Set timezone for tippecanoe
uses: szenius/set-timezone@v2.0
with:
timezoneLinux: "America/Los_Angeles"
- name: Get tippecanoe
run: |
sudo apt-get install -y software-properties-common libsqlite3-dev zlib1g-dev
sudo apt-add-repository -y ppa:git-core/ppa
sudo mkdir -p /tmp/tippecanoe-src
sudo git clone https://github.com/mapbox/tippecanoe.git /tmp/tippecanoe-src
- name: Make tippecanoe
working-directory: /tmp/tippecanoe-src
run: |
sudo /usr/bin/bash -c make
mkdir -p /usr/local/bin
cp tippecanoe /usr/local/bin/tippecanoe
tippecanoe -v
- name: Generate Tiles
run: |
poetry run python3 -m data_pipeline.application generate-map-tiles

66
.github/workflows/pr_frontend.yml vendored Normal file
View file

@ -0,0 +1,66 @@
name: Pull Request Frontend
on:
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
# JOB to run change detection
detect-fe-changes:
name: Detect frontend changes
runs-on: ubuntu-latest
# Required permissions
permissions:
pull-requests: read
# Set job outputs to values from filter step
outputs:
frontend: ${{ steps.filter.outputs.frontend }}
steps:
# For pull requests it's not necessary to checkout the code
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
frontend:
- 'client/**'
- '.github/workflows/pr_frontend.yml'
frontend-build:
name: Frontend build - ${{ matrix.node-version }}
needs: detect-fe-changes
if: ${{ needs.detect-fe-changes.outputs.frontend == 'true' }}
runs-on: ubuntu-latest
environment: PR
defaults:
run:
working-directory: client
strategy:
matrix:
node-version: [14.x]
steps:
- uses: actions/checkout@v4
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v2
with:
node-version: ${{ matrix.node-version }}
- name: Install
run: npm ci
- name: Build
run: npm run build --if-present
env:
# See the client readme for more info on environment variables:
# https://github.com/usds/justice40-tool/blob/main/client/README.md
DATA_SOURCE: cdn
# TODO: Update main URL when either is back up
SITE_URL: "${{ secrets.SITE_URL }}"
MAPBOX_STYLES_READ_TOKEN: "${{ secrets.MAPBOX_STYLES_READ_TOKEN }}"
- name: Get directory contents
run: ls -la public
- name: Lint
run: npm run lint
# Disabling for now due to jsonlint - TODO: put this back
# - name: License Check
# run: npm run licenses
- name: Test
run: npm test
# - name: Check for security vulnerabilities
# run: npm audit --production

View file

@ -12,6 +12,9 @@ GATSBY_DATA_PIPELINE_TRIBAL_PATH=data-pipeline/data/tribal
GATSBY_BETA_SCORE_PATH = data-versions/beta/data/score
GATSBY_2_0_SCORE_PATH = data-versions/2.0/data/score
GATSBY_DATA_PIPELINE_SEARCH_PATH_LOCAL = data_pipeline/data/score/search/tracts.json
GATSBY_2_0_MAP_TRACT_SEARCH_PATH = data-versions/2.0/data/score/search/tracts.json
GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_XLS=downloadable/beta-communities.xlsx
GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_CSV=downloadable/beta-communities.csv
GATSBY_FILE_DL_PATH_BETA_SHAPE_FILE_ZIP=downloadable/beta-shapefile-codebook.zip

View file

@ -10,6 +10,8 @@ GATSBY_DATA_PIPELINE_TRIBAL_PATH=data-pipeline/data/tribal
GATSBY_BETA_SCORE_PATH = data-versions/beta/data/score
GATSBY_2_0_SCORE_PATH = data-versions/2.0/data/score
GATSBY_2_0_MAP_TRACT_SEARCH_PATH = data-versions/2.0/data/score/search/tracts.json
GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_XLS=downloadable/beta-communities.xlsx
GATSBY_FILE_DL_PATH_BETA_COMMUNITIES_LIST_CSV=downloadable/beta-communities.csv
GATSBY_FILE_DL_PATH_BETA_SHAPE_FILE_ZIP=downloadable/beta-shapefile-codebook.zip

View file

@ -1,7 +1,7 @@
[![Staging](https://github.com/usds/justice40-tool/actions/workflows/deploy_fe_staging.yml/badge.svg)](https://github.com/usds/justice40-tool/actions/workflows/deploy_fe_staging.yml)
[![Production](https://github.com/usds/justice40-tool/actions/workflows/deploy_fe_main.yml/badge.svg)](https://github.com/usds/justice40-tool/actions/workflows/deploy_fe_main.yml)
# Justice40 Client
# Justice40 Clientss
This README contains the following content:
@ -213,8 +213,6 @@ When developing, to use a flag:
5. Install the [CORS chrome extension](https://chrome.google.com/webstore/detail/allow-cors-access-control/lhobafahddgcelffkeicbaginigeejlf?hl=en) in the browser that is launched by the debugger.
6. Set breakpoints in VS code!
## Package Versions
The following attemps to explain why certain packages versions have been chosen and what their current limitations are
@ -225,4 +223,4 @@ The following attemps to explain why certain packages versions have been chosen
| gatsby-cli | 3.14.2 | 4.15.2 | No | when attempting to update - breaks all unit tests. Compatibility warning come up with all plugins but this doesn't seems to effect functionality. This is the latest version we can release without investigating unit tests.|
| sass | 1.32.12 | 1.52.3 | No | This version is needed to surpress the dart warnings on / as division for each component. See [here](https://github.com/twbs/bootstrap/issues/34051#issuecomment-845884423) for more information |
| uswds | 2.13.3 | 3.0.2 | No | Needs to stay at 2.13.3 for peer dependency on trussworks|
| trussworks | 3.1.0 | 3.1.0 | No | latest! |
| trussworks | 3.1.0 | 3.1.0 | No | latest! |

View file

@ -1,8 +1,8 @@
/* eslint-disable quotes */
// External Libs:
import React from "react";
import {MessageDescriptor, useIntl} from "gatsby-plugin-intl";
import {Accordion, Button} from "@trussworks/react-uswds";
import {MessageDescriptor, useIntl} from "gatsby-plugin-intl";
import React from "react";
// Components:
import Category from "../Category";
@ -15,11 +15,12 @@ import TractInfo from "../TractInfo";
import TractPrioritization from "../TractPrioritization";
// Styles and constants
import * as styles from "./areaDetail.module.scss";
import * as constants from "../../data/constants";
import * as EXPLORE_COPY from "../../data/copy/explore";
import * as styles from "./areaDetail.module.scss";
// @ts-ignore
import IslandCopy from "../IslandCopy/IslandCopy";
import launchIcon from "/node_modules/uswds/dist/img/usa-icons/launch.svg";
interface IAreaDetailProps {
@ -1163,6 +1164,9 @@ const AreaDetail = ({properties}: IAreaDetailProps) => {
null
}
percentTractTribal={percentTractTribal}
isGrandfathered={
properties[constants.IS_GRANDFATHERED]
}
/>
<PrioritizationCopy2
totalCategoriesPrioritized={
@ -1189,9 +1193,18 @@ const AreaDetail = ({properties}: IAreaDetailProps) => {
</div>
</div>
{/* Only show the DonutCopy if Adjacency index is true and the total number of disadv ind == 0 */}
{/* Show IslandCopy if the GeoID matches an island prefix */}
{constants.TILES_ISLAND_AREA_FIPS_CODES.some((code) => {
return properties[constants.GEOID_PROPERTY].startsWith(code);
}) && (
<IslandCopy povertyPercentile={ properties[constants.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_PERCENTILE]} />
)}
{/* Only show the DonutCopy if Adjacency index is true, the total number of disadv ind == 0,
and not grandfathered. */}
{properties[constants.ADJACENCY_EXCEEDS_THRESH] &&
properties[constants.TOTAL_NUMBER_OF_DISADVANTAGE_INDICATORS] === 0 && (
properties[constants.TOTAL_NUMBER_OF_DISADVANTAGE_INDICATORS] === 0 &&
!properties[constants.IS_GRANDFATHERED] && (
<DonutCopy
isAdjacent={properties[constants.ADJACENCY_EXCEEDS_THRESH]}
povertyBelow200Percentile={

View file

@ -0,0 +1,44 @@
@use '../../styles/design-system.scss' as *;
@import "../utils.scss";
.islandCopyContainer{
@include u-display('flex');
flex-direction: column;
@include u-padding-left(2);
@include u-padding-right(2.5);
@include u-padding-top(2);
.islandRow {
@include u-display('flex');
justify-content: space-between;
.islandRowLabel {
font-weight: bold;
}
.valueSubTextContainer {
@include u-display('flex');
flex-direction: column;
.subTextContainer{
@include indicatorValueSubTextContainer();
}
}
}
.islandRow:first-child {
@include u-padding-bottom("05");
}
.valueContainer {
@include u-text('bold');
}
.invert {
align-self: flex-end;
@include invert();
}
.noInvert {
align-self: flex-end;
}
}

View file

@ -0,0 +1,19 @@
declare namespace IslandCopyNamespace {
export interface IIslandCopyScss {
islandCopyContainer: string;
islandRow: string;
islandRowLabel: string;
invert: string;
noInvert: string;
valueSubTextContainer: string;
valueContainer: string;
subTextContainer: string;
}
}
declare const IslandCopyScssModule: IslandCopyNamespace.IIslandCopyScss & {
/** WARNING: Only available when "css-loader" is used without "style-loader" or "mini-css-extract-plugin" */
locals: IslandCopyNamespace.IIslandCopyScss;
};
export = IslandCopyScssModule;

View file

@ -0,0 +1,51 @@
import {useIntl} from 'gatsby-plugin-intl';
import React from 'react';
import {IndicatorValue, IndicatorValueSubText} from '../Indicator/Indicator';
import * as styles from './IslandCopy.module.scss';
import * as EXPLORE_COPY from '../../data/copy/explore';
export interface IIslandCopyProps {
povertyPercentile: number | null
}
const IslandCopy = ({povertyPercentile}: IIslandCopyProps) => {
const intl = useIntl();
const percentileWhole = povertyPercentile ?
parseFloat((povertyPercentile*100).toFixed()) : null;
const threshold = 65;
return (
<div className={styles.islandCopyContainer}>
<div className={styles.islandRow}>
<div className={styles.islandRowLabel}>
{intl.formatMessage(EXPLORE_COPY.ISLAND_COPY.LOW_INC)}
</div>
<div className={styles.valueSubTextContainer}>
<div className={`${styles.valueContainer}
${ percentileWhole && percentileWhole >= threshold ?
styles.invert :
styles.noInvert }
`}>
<IndicatorValue
type={'percentile'}
displayStat={percentileWhole}
/>
</div>
<div className={styles.subTextContainer}>
<IndicatorValueSubText
value={percentileWhole}
isAboveThresh={percentileWhole && percentileWhole >= threshold ? true : false}
threshold={threshold}
type={'percentile'}
/>
</div>
</div>
</div>
</div>
);
};
export default IslandCopy;

View file

@ -10,6 +10,7 @@ interface IPrioritizationCopy {
isAdjacencyThreshMet: boolean,
isAdjacencyLowIncome: boolean,
isIslandLowIncome: boolean,
isGrandfathered: boolean,
tribalCountAK: number | null,
tribalCountUS: null, // when this signal is supported add number type
percentTractTribal: number | null
@ -38,6 +39,7 @@ const PrioritizationCopy =
totalBurdensPrioritized,
isAdjacencyThreshMet,
isAdjacencyLowIncome,
isGrandfathered,
isIslandLowIncome,
tribalCountAK,
tribalCountUS,
@ -57,8 +59,10 @@ const PrioritizationCopy =
} else if (isAdjacencyThreshMet && !isAdjacencyLowIncome) {
// if 1-2-1
if ( tribalCountAK === null && tribalCountUS === null) {
if (isGrandfathered) {
prioCopyRendered = EXPLORE_COPY.PRIORITIZATION_COPY.PRIO_GRANDFATHERED_LI;
// if 1-2-1-1
if (percentTractTribal === null) {
} else if (percentTractTribal === null) {
prioCopyRendered = EXPLORE_COPY.PRIORITIZATION_COPY.NOT_PRIO_SURR_LI;
// if 1-2-1-2
} else if (percentTractTribal === 0) {

View file

@ -40,8 +40,8 @@ const PrioritizationCopy2 =
tribalCountUS,
percentTractTribal,
}:IPrioritizationCopy2) => {
let noStyles = false;
let prioCopy2Rendered;
let prioCopy2Rendered = <></>;
// if 1
if (
@ -165,13 +165,10 @@ const PrioritizationCopy2 =
(tribalCountAK !== null && tribalCountAK >= 1)
) {
prioCopy2Rendered = EXPLORE_COPY.getPrioANVCopy(tribalCountAK, false);
} else {
prioCopy2Rendered = <></>;
noStyles = true;
};
return (
<div className={noStyles ? '' : styles.prioritizationCopy2Container}>
<div className={prioCopy2Rendered !== <></> ? '' : styles.prioritizationCopy2Container}>
{prioCopy2Rendered}
</div>
);

View file

@ -2,7 +2,9 @@
exports[`rendering of PrioritizationCopy2 Component checks if component renders The lands of Federally Recognized Tribes that cover 2% of this tract are also considered disadvantaged. when totCats = 0, totBurds = 0, isAdj = true, isAdjLI = true, tribal % = 2, 1`] = `
<DocumentFragment>
<div>
<div
class=""
>
The lands of Federally Recognized Tribes that cover 2% of this tract are also considered disadvantaged.
</div>
</DocumentFragment>
@ -10,7 +12,9 @@ exports[`rendering of PrioritizationCopy2 Component checks if component renders
exports[`rendering of PrioritizationCopy2 Component checks if component renders The lands of Federally Recognized Tribes that cover 4% of this tract are also considered disadvantaged. when totCats = 0, totBurds = 1, isAdj = true, isAdjLI = true, tribal % = 4, 1`] = `
<DocumentFragment>
<div>
<div
class=""
>
The lands of Federally Recognized Tribes that cover 4% of this tract are also considered disadvantaged.
</div>
</DocumentFragment>

View file

@ -71,6 +71,7 @@ export const TOTAL_NUMBER_OF_INDICATORS = "THRHLD";
export const COUNT_OF_CATEGORIES_DISADV = "CC";
export const SCORE_N_COMMUNITIES = "SN_C";
export const SCORE_N_TRIBAL = "SN_T";
export const IS_GRANDFATHERED = "SN_GRAND";
export const SIDE_PANEL_STATE = "UI_EXP";
export const SIDE_PANEL_STATE_VALUES = {
@ -109,6 +110,8 @@ export const IS_EXCEED_BOTH_SOCIO_INDICATORS = "N_EBSI";
export const POVERTY_BELOW_200_PERCENTILE = "P200_I_PFS";
export const IS_FEDERAL_POVERTY_LEVEL_200 = "FPL200S";
// Percentile FPL 200 for islands only
export const CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_PERCENTILE = "FPL200P";
export const HIGHER_ED_PERCENTILE = "CA";
export const IS_HIGHER_ED_PERCENTILE = "CA_LT20";
@ -387,6 +390,8 @@ process.env.GATSBY_CDN_TILES_BASE_URL;
export const TILE_PATH = process.env.DATA_SOURCE === "local" ?
process.env.GATSBY_DATA_PIPELINE_SCORE_PATH_LOCAL :
process.env.GATSBY_1_0_SCORE_PATH;
process.env.GATSBY_2_0_SCORE_PATH;
export const MAP_TRACT_SEARCH_PATH = "data_pipeline/data/score/search/tracts.json";
export const MAP_TRACT_SEARCH_PATH = process.env.DATA_SOURCE === "local" ?
process.env.GATSBY_DATA_PIPELINE_SEARCH_PATH_LOCAL :
process.env.GATSBY_2_0_MAP_TRACT_SEARCH_PATH;

View file

@ -660,6 +660,14 @@ export const PRIORITIZATION_COPY = {
bold: boldFn,
}}
/>,
PRIO_GRANDFATHERED_LI: <FormattedMessage
id={'explore.map.page.side.panel.prio.copy.prio.grandfathered'}
defaultMessage={'This tract is considered disadvantaged because it was identified as disadvantaged in version 1.0 of the tool.'}
description={`Navigate to the explore the map page. Click on tract, The side panel will show This tract is considered disadvantaged. This tract is considered disadvantaged because it was identified as disadvantaged in version 1.0 of the tool.`}
values={{
bold: boldFn,
}}
/>,
};
export const getPrioNBurdenCopy = (burdens:string) => {
@ -764,6 +772,13 @@ export const DONUT_COPY = defineMessages({
description: `Navigate to the explore the map page. Click on side panel, this copy may show up`,
},
});
export const ISLAND_COPY = defineMessages({
LOW_INC: {
id: 'explore.map.page.side.panel.island.copy.low.income',
defaultMessage: 'Low income',
description: `Navigate to the explore the map page. Click on side panel, this copy may show up`,
},
});
export const COMMUNITY = {
OF_FOCUS: <FormattedMessage
id={'explore.map.page.side.panel.community.of.focus'}

View file

@ -1375,6 +1375,10 @@
"defaultMessage": "Identified as disadvantaged?",
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show asking IF the communities is focused on"
},
"explore.map.page.side.panel.island.copy.low.income": {
"defaultMessage": "Low income",
"description": "Navigate to the explore the map page. Click on side panel, this copy may show up"
},
"explore.map.page.side.panel.not.community.of.focus": {
"defaultMessage": "NO",
"description": "Navigate to the explore the map page. When the map is in view, click on the map. The side panel will show the communities the score currently is not focused on"
@ -1427,6 +1431,10 @@
"defaultMessage": "The {numPoints} that are Federally Recognized Tribes in this tract are are {also} considered disadvantaged.",
"description": "Navigate to the explore the map page. Click on tract, The {numPoints} that are Federally Recognized Tribes in this tract ares are {also} considered disadvantaged."
},
"explore.map.page.side.panel.prio.copy.prio.grandfathered": {
"defaultMessage": "This tract is considered disadvantaged because it was identified as disadvantaged in version 1.0 of the tool.",
"description": "Navigate to the explore the map page. Click on tract, The side panel will show This tract is considered disadvantaged. This tract is considered disadvantaged because it was identified as disadvantaged in version 1.0 of the tool."
},
"explore.map.page.side.panel.prio.copy.prio.island.li": {
"defaultMessage": "This tract is considered disadvantaged because it meets the low income threshold <bold>AND</bold> is located in a U.S. Territory.",
"description": "Navigate to the explore the map page. Click on tract, The side panel will show This tract is considered disadvantaged. It is an island territory that meets an adjusted low income threshold."

View file

@ -213,6 +213,7 @@
"explore.map.page.side.panel.demo.two.or.more": "Dos o más razas",
"explore.map.page.side.panel.demo.white": "Personas de raza blanca",
"explore.map.page.side.panel.donut.copy.adj.low.income": "Bajos ingresos ajustados",
"explore.map.page.side.panel.island.copy.low.income": "Bajos ingresos",
"explore.map.page.side.panel.donut.copy.complete.surround": "completamente rodeada",
"explore.map.page.side.panel.exceed.burden.answer.no": "No",
"explore.map.page.side.panel.exceed.burden.answer.yes": "Sí",
@ -356,6 +357,7 @@
"explore.map.page.side.panel.prio.copy.prio.akus": "Los {numAKpoints} pueblos nativos de Alaska y las {numUSpoints} tribus de esta zona que están reconocidas a nivel federal también se consideran desfavorecidos.",
"explore.map.page.side.panel.prio.copy.prio.anv": "Los {numAKpoints} pueblos nativos de Alaska y las tribus de esta zona que están reconocidas a nivel federal {also} se consideran desfavorecidos.",
"explore.map.page.side.panel.prio.copy.prio.donut": "Este distrito censal se considera desfavorecido. Está rodeado de distritos censales desfavorecidos <bold>Y</bold> cumple con el umbral ajustado de bajos ingresos. El ajuste no corresponde a ninguna de las categorías.",
"explore.map.page.side.panel.prio.copy.prio.grandfathered": "Este distrito censal se considera desfavorecido porque fue identificado como desfavorecido en la versión 1.0 de esta herramienta.",
"explore.map.page.side.panel.prio.copy.prio.frt": "Las tierras de las tribus reconocidas a nivel federal que cubren {amount} de esta extensión se consideran {also} desfavorecidas.",
"explore.map.page.side.panel.prio.copy.prio.frt.n.points": "Los {numPoints} que son tribus reconocidas a nivel federal en este distrito censal se consideran {also} desfavorecidos.",
"explore.map.page.side.panel.prio.copy.prio.n.burden": "Este distrito censal se considera desfavorecido porque cumple con el umbral de carga <bold>Y</bold> con el umbral socioeconómico asociado.",

View file

@ -2,7 +2,7 @@
"compilerOptions": {
"module": "commonjs",
"target": "es6",
"jsx": "preserve",
"jsx": "react",
"lib": ["dom", "es2015", "es2017", "es2019"],
"strict": true,
"noEmit": true,

View file

@ -17,6 +17,7 @@ pd.set_option("display.width", 10000)
pd.set_option("display.colheader_justify", "left")
result_text = []
WORKING_PATH = constants.TMP_PATH / "Comparator" / "Score"
def _add_text(text: str):
@ -38,7 +39,12 @@ def _get_result_doc() -> str:
def _read_from_file(file_path: Path):
"""Read a CSV file into a Dataframe."""
"""
Read a CSV file into a Dataframe.
Args:
file_path (Path): the path of the file to read
"""
if not file_path.is_file():
logger.error(
f"- No score file exists at {file_path}. "
@ -53,6 +59,219 @@ def _read_from_file(file_path: Path):
).sort_index()
def _add_tract_list(tract_list: list[str]):
"""
Adds a list of tracts to the output grouped by Census state.
Args:
tract_list (list[str]): a list of tracts
"""
if len(tract_list) > 0:
_add_text("Those tracts are:\n")
# First extract the Census states/territories
states_by_tract = []
for tract in tract_list:
states_by_tract.append(tract[0:2])
states = set(states_by_tract)
# Now output the grouped tracts
for state in sorted(states):
tracts_for_state = [
item for item in tract_list if item.startswith(state)
]
_add_text(
f"\t{state} = {len(tracts_for_state)} = {', '.join(tracts_for_state)}\n"
)
def _compare_score_columns(prod_df: pd.DataFrame, local_df: pd.DataFrame):
"""
Compare the columns between scores.
Args:
prod_df (pd.DataFrame): the production score
local_df (pd.DataFrame): the local score
"""
log_info("Comparing columns (production vs local)")
_add_text("## Columns\n")
local_score_df_columns = sorted(local_df.columns.array.tolist())
production_score_df_columns = sorted(prod_df.columns.array.tolist())
extra_cols_in_local = set(local_score_df_columns) - set(
production_score_df_columns
)
extra_cols_in_prod = set(production_score_df_columns) - set(
local_score_df_columns
)
if len(extra_cols_in_local) == 0 and len(extra_cols_in_prod) == 0:
_add_text("* There are no differences in the column names.\n")
else:
_add_text(
f"* There are {len(extra_cols_in_local)} columns that were added as compared to the production score."
)
if len(extra_cols_in_local) > 0:
_add_text(f" Those colums are:\n{extra_cols_in_local}")
_add_text(
f"\n* There are {len(extra_cols_in_prod)} columns that were removed as compared to the production score."
)
if len(extra_cols_in_prod) > 0:
_add_text(f" Those colums are:\n{extra_cols_in_prod}")
def _compare_score_results(prod_df: pd.DataFrame, local_df: pd.DataFrame):
"""
Compare the scores.
Args:
prod_df (pd.DataFrame): the production score
local_df (pd.DataFrame): the local score
"""
log_info("Comparing dataframe contents (production vs local)")
_add_text("\n\n## Scores\n")
production_row_count = len(prod_df.index)
local_row_count = len(local_df.index)
# Tract comparison
_add_text(
f"* The production score has {production_row_count:,} census tracts, and the freshly calculated score has {local_row_count:,}."
)
if production_row_count == local_row_count:
_add_text(" They match!\n")
else:
_add_text(" They don't match. The differences are:\n")
_add_text(
" * New tracts added to the local score are:\n"
f"{local_df.index.difference(prod_df.index).to_list()}"
"\n * Tracts removed from the local score are:\n"
f"{prod_df.index.difference(local_df.index).to_list()}"
"\n"
)
# Population comparison
production_total_population = prod_df[field_names.TOTAL_POP_FIELD].sum()
local_total_population = local_df[field_names.TOTAL_POP_FIELD].sum()
_add_text(
f"* The total population in all census tracts in the production score is {production_total_population:,}. "
f"The total population in all census tracts locally is {local_total_population:,}. "
)
_add_text(
"They match!\n"
if production_total_population == local_total_population
else f"The difference is {abs(production_total_population - local_total_population):,}.\n"
)
dacs_query = f"`{field_names.FINAL_SCORE_N_BOOLEAN}` == True"
production_disadvantaged_tracts_df = prod_df.query(dacs_query)
local_disadvantaged_tracts_df = local_df.query(dacs_query)
production_disadvantaged_tracts_set = set(
production_disadvantaged_tracts_df.index.array
)
local_disadvantaged_tracts_set = set(
local_disadvantaged_tracts_df.index.array
)
production_pct_of_population_represented = (
production_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum()
/ production_total_population
)
local_pct_of_population_represented = (
local_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum()
/ local_total_population
)
# DACS comparison
_add_text(
f"* There are {len(production_disadvantaged_tracts_set):,} disadvantaged tracts in the production score representing"
f" {production_pct_of_population_represented:.1%} of the total population, and {len(local_disadvantaged_tracts_set):,}"
)
_add_text(
f" in the locally generated score representing {local_pct_of_population_represented:.1%} of the total population."
)
_add_text(
" The number of tracts match!\n "
if len(production_disadvantaged_tracts_set)
== len(local_disadvantaged_tracts_set)
else f" The difference is {abs(len(production_disadvantaged_tracts_set) - len(local_disadvantaged_tracts_set))} tract(s).\n "
)
removed_tracts = production_disadvantaged_tracts_set.difference(
local_disadvantaged_tracts_set
)
added_tracts = local_disadvantaged_tracts_set.difference(
production_disadvantaged_tracts_set
)
_add_text(
f"* There are {len(removed_tracts):,} tract(s) marked as disadvantaged in the production score that are not disadvantaged in the locally"
f" generated score (i.e. disadvantaged tracts that were removed by the new score). "
)
_add_tract_list(removed_tracts)
_add_text(
f"\n* There are {len(added_tracts):,} tract(s) marked as disadvantaged in the locally generated score that are not disadvantaged in the"
f" production score (i.e. disadvantaged tracts that were added by the new score). "
)
_add_tract_list(added_tracts)
# Grandfathered tracts from v1.0
grandfathered_tracts = local_df.loc[
local_df[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0]
].index
if len(grandfathered_tracts) > 0:
_add_text(
f"* This includes {len(grandfathered_tracts)} grandfathered tract(s) from v1.0 scoring."
)
_add_tract_list(grandfathered_tracts)
else:
_add_text("* There are NO grandfathered tracts from v1.0 scoring.\n")
def _generate_delta(prod_df: pd.DataFrame, local_df: pd.DataFrame):
"""
Generate a delta of scores
Args:
prod_df (pd.DataFrame): the production score
local_df (pd.DataFrame): the local score
"""
_add_text("\n## Delta\n")
# First we make the columns on two dataframes to be the same to be able to compare
local_score_df_columns = local_df.columns.array.tolist()
production_score_df_columns = prod_df.columns.array.tolist()
extra_cols_in_local = set(local_score_df_columns) - set(
production_score_df_columns
)
extra_cols_in_prod = set(production_score_df_columns) - set(
local_score_df_columns
)
trimmed_prod_df = prod_df.drop(extra_cols_in_prod, axis=1)
trimmed_local_df = local_df.drop(extra_cols_in_local, axis=1)
try:
comparison_results_df = trimmed_prod_df.compare(
trimmed_local_df, align_axis=1, keep_shape=False, keep_equal=False
).rename({"self": "Production", "other": "Local"}, axis=1, level=1)
_add_text(
"* I compared all values across all census tracts. Note this ignores any columns that have been added or removed."
f" There are {len(comparison_results_df.index):,} tracts with at least one difference.\n"
)
comparison_path = WORKING_PATH / "deltas.csv"
comparison_results_df.to_csv(path_or_buf=comparison_path)
_add_text(f"* Wrote comparison results to {comparison_path}")
except ValueError as e:
_add_text(
"* I could not run a full comparison. This is likely because there are column or index (census tract) differences."
" Please examine the logs or run the score comparison locally to find out more.\n"
)
_add_text(
f"Encountered an exception while performing the comparison: {repr(e)}\n"
)
@click.group()
def cli():
"""
@ -101,7 +320,6 @@ def compare_score(
"""
FLOAT_ROUNDING_PLACES = 2
WORKING_PATH = constants.TMP_PATH / "Comparator" / "Score"
log_title("Compare Score", "Compare production score to local score")
@ -132,188 +350,21 @@ def compare_score(
production_score_df = production_score_df.round(FLOAT_ROUNDING_PLACES)
local_score_df = local_score_df.round(FLOAT_ROUNDING_PLACES)
local_score_df_columns = sorted(local_score_df.columns.array.tolist())
production_score_df_columns = sorted(
production_score_df.columns.array.tolist()
)
extra_cols_in_local = set(local_score_df_columns) - set(
production_score_df_columns
)
extra_cols_in_prod = set(production_score_df_columns) - set(
local_score_df_columns
)
_add_text("# Score Comparison Summary\n")
_add_text(
f"Hi! I'm the Score Comparator. I compared the score in production (version {compare_to_version}) to the"
" locally calculated score. Here are the results:\n\n"
)
#####################
# Compare the columns
#####################
log_info("Comparing columns (production vs local)")
_add_text("## Columns\n")
if len(extra_cols_in_local) == 0 and len(extra_cols_in_prod) == 0:
_add_text("* There are no differences in the column names.\n")
else:
_add_text(
f"* There are {len(extra_cols_in_local)} columns that were added as compared to the production score."
)
if len(extra_cols_in_local) > 0:
_add_text(f" Those colums are:\n{extra_cols_in_local}")
_add_text(
f"\n* There are {len(extra_cols_in_prod)} columns that were removed as compared to the production score."
)
if len(extra_cols_in_prod) > 0:
_add_text(f" Those colums are:\n{extra_cols_in_prod}")
####################
# Compare the scores
####################
log_info("Comparing dataframe contents (production vs local)")
_add_text("\n\n## Scores\n")
production_row_count = len(production_score_df.index)
local_row_count = len(local_score_df.index)
# Tract comparison
_add_text(
f"* The production score has {production_row_count:,} census tracts, and the freshly calculated score has {local_row_count:,}."
)
if production_row_count == local_row_count:
_add_text(" They match!\n")
else:
_add_text(" They don't match. The differences are:\n")
_add_text(
" * New tracts added to the local score are:\n"
f"{local_score_df.index.difference(production_score_df.index).to_list()}"
"\n * Tracts removed from the local score are:\n"
f"{production_score_df.index.difference(local_score_df.index).to_list()}"
"\n"
)
# Population comparison
production_total_population = production_score_df[
field_names.TOTAL_POP_FIELD
].sum()
local_total_population = local_score_df[field_names.TOTAL_POP_FIELD].sum()
_add_text(
f"* The total population in all census tracts in the production score is {production_total_population:,}. "
f"The total population in all census tracts locally is {local_total_population:,}. "
)
_add_text(
"They match!\n"
if production_total_population == local_total_population
else f"The difference is {abs(production_total_population - local_total_population):,}.\n"
)
dacs_query = f"`{field_names.FINAL_SCORE_N_BOOLEAN}` == True"
production_disadvantaged_tracts_df = production_score_df.query(dacs_query)
local_disadvantaged_tracts_df = local_score_df.query(dacs_query)
production_disadvantaged_tracts_set = set(
production_disadvantaged_tracts_df.index.array
)
local_disadvantaged_tracts_set = set(
local_disadvantaged_tracts_df.index.array
)
production_pct_of_population_represented = (
production_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum()
/ production_total_population
)
local_pct_of_population_represented = (
local_disadvantaged_tracts_df[field_names.TOTAL_POP_FIELD].sum()
/ local_total_population
)
# DACS comparison
_add_text(
f"* There are {len(production_disadvantaged_tracts_set):,} disadvantaged tracts in the production score representing"
f" {production_pct_of_population_represented:.1%} of the total population, and {len(local_disadvantaged_tracts_set):,}"
)
_add_text(
f" in the locally generated score representing {local_pct_of_population_represented:.1%} of the total population."
)
_add_text(
" The number of tracts match!\n "
if len(production_disadvantaged_tracts_set)
== len(local_disadvantaged_tracts_set)
else f" The difference is {abs(len(production_disadvantaged_tracts_set) - len(local_disadvantaged_tracts_set))} tract(s).\n "
)
removed_tracts = production_disadvantaged_tracts_set.difference(
local_disadvantaged_tracts_set
)
added_tracts = local_disadvantaged_tracts_set.difference(
production_disadvantaged_tracts_set
)
_add_text(
f"* There are {len(removed_tracts):,} tract(s) marked as disadvantaged in the production score that are not disadvantaged in the locally"
f" generated score (i.e. disadvantaged tracts that were removed by the new score). "
)
if len(removed_tracts) > 0:
_add_text(f"Those tracts are:\n{removed_tracts}")
_add_text(
f"\n* There are {len(added_tracts):,} tract(s) marked as disadvantaged in the locally generated score that are not disadvantaged in the"
f" production score (i.e. disadvantaged tracts that were added by the new score). "
)
if len(added_tracts) > 0:
_add_text(f"Those tracts are:\n{added_tracts}\n")
# Grandfathered tracts from v1.0
grandfathered_tracts = local_score_df.loc[
local_score_df[field_names.GRANDFATHERED_N_COMMUNITIES_V1_0]
].index
if len(grandfathered_tracts) > 0:
_add_text(
f"* This includes {len(grandfathered_tracts)} grandfathered tract(s) from v1.0 scoring. They are:\n"
f"{grandfathered_tracts.to_list()}\n"
)
else:
_add_text("* There are NO grandfathered tracts from v1.0 scoring.\n")
################
# Create a delta
################
_add_text("\n## Delta\n")
# First we make the columns on two dataframes to be the same to be able to compare
trimmed_prod_df = production_score_df.drop(extra_cols_in_prod, axis=1)
trimmed_local_df = local_score_df.drop(extra_cols_in_local, axis=1)
try:
comparison_results_df = trimmed_prod_df.compare(
trimmed_local_df, align_axis=1, keep_shape=False, keep_equal=False
).rename({"self": "Production", "other": "Local"}, axis=1, level=1)
_add_text(
"* I compared all values across all census tracts. Note this ignores any columns that have been added or removed."
f" There are {len(comparison_results_df.index):,} tracts with at least one difference.\n"
)
comparison_path = WORKING_PATH / "deltas.csv"
comparison_results_df.to_csv(path_or_buf=comparison_path)
_add_text(f"* Wrote comparison results to {comparison_path}")
except ValueError as e:
_add_text(
"* I could not run a full comparison. This is likely because there are column or index (census tract) differences."
" Please examine the logs or run the score comparison locally to find out more.\n"
)
_add_text(
f"Encountered an exception while performing the comparison: {repr(e)}\n"
)
_compare_score_columns(production_score_df, local_score_df)
_compare_score_results(production_score_df, local_score_df)
_generate_delta(production_score_df, local_score_df)
result_doc = _get_result_doc()
print(result_doc)
# Write the report
summary_path = WORKING_PATH / "comparison-summary.md"
with open(summary_path, "w", encoding="utf-8") as f:
f.write(result_doc)
log_info(f"Wrote comparison summary to {summary_path}")

View file

@ -282,6 +282,7 @@ TILES_SCORE_COLUMNS = {
# The NEW final score value INCLUDES the adjacency index.
field_names.FINAL_SCORE_N_BOOLEAN: "SN_C",
field_names.FINAL_SCORE_N_BOOLEAN_V1_0: "SN_C_V10",
field_names.GRANDFATHERED_N_COMMUNITIES_V1_0: "SN_GRAND",
field_names.IS_TRIBAL_DAC: "SN_T",
field_names.DIABETES_LOW_INCOME_FIELD: "DLI",
field_names.ASTHMA_LOW_INCOME_FIELD: "ALI",
@ -346,6 +347,8 @@ TILES_SCORE_COLUMNS = {
# These are the booleans for socioeconomic indicators
## this measures low income boolean
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED: "FPL200S",
# Percentile FPL 200 for islands only
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_PERCENTILE: "FPL200P",
## Low high school for t&wd
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "N_WKFC_EBSI",
field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET",

View file

@ -132,7 +132,9 @@ def tile_data_expected():
@pytest.fixture()
def create_tile_score_data_input():
return pd.read_pickle(pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl")
return pd.read_pickle(
pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl"
)
@pytest.fixture()

View file

@ -1,23 +1,25 @@
These files are used as inputs to unit tests. Some notes in their creation is below.
# How to generate the sample data in this folder
The sample data in this folder can be easily generated by debugging the `data_pipeline/etl/score/etl_score_post.py` file
and exporting data using the debugger console. Examples of this exporting are below.
## Why in pickle format?
Exporting as a Pickle file keeps all the metadata about the columns including the data types. If we were to export as CSV then we will need
to code the data types in the test fixtures for all the columns for the comparison to be correct.
## Exporting the test data
First, verify the code works as expected before exporting the data. You will not be able to inspect the data exports as they are in binary.
You will be using the debugger to export the data. Note that it is best to export a small subset of the data for faster test execution.
### create_tile_data test
1. Place a breakpoint in `data_pipeline/etl/score/etl_score_post.py` in the `transform` method right after the call to
`_create_tile_data` and start the debugger running the Generate Post Score command (`generate-score-post`).
1. Partially export the `output_score_county_state_merged_df` and `self.output_score_tiles_df` data to a pickle file once the debugger pauses
at the breakpoint. Use these sample commands in the debugger console. Note that we are using head and tail to have territories in the sample data.
### create_tile_data_expected.pkl
1. Set a breakpoint in the `test_create_tile_data` method in `data_pipeline/etl/score/tests/test_score_post.py`
after the call to `_create_tile_data` and debug the test.
2. Extract a subset of the `output_tiles_df_actual` dataframe. Do not extract the whole score as the file
will be too big and the test will run slow. Also, you need to extract the same tracts that are in
the `create_tile_score_data_input.pkl` input data. For example, use the following command once the breakpoint is reached
to extract a few rows at the top and bottom of the score. This will some capture states and territories.
```python
import pandas as pd
pd.concat([output_tiles_df_actual.head(3), output_tiles_df_actual.tail(3)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl')
pd.concat([output_score_county_state_merged_df.head(3), output_score_county_state_merged_df.tail(4)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl')
pd.concat([self.output_score_tiles_df.head(3), self.output_score_tiles_df.tail(4)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl')
```
### create_tile_score_data_input.pkl
1. Set a breakpoint in the transform method in `data_pipeline/etl/score/etl_score_post.py` before the call to
`_create_tile_data` and run the post scoring.
2. Extract a subset of the `output_score_county_state_merged_df` dataframe. Do not extract the whole score as the file
will be too big and the test will run slow. For example, use the following command once the breakpoint is reached
to extract a few rows at the top and bottom of the score. This will some capture states and territories.
```python
pd.concat([output_score_county_state_merged_df.head(3), output_score_county_state_merged_df.tail(3)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl')
```

View file

@ -83,7 +83,9 @@ def test_create_score_data(
)
def test_create_tile_data(etl, create_tile_score_data_input, create_tile_data_expected):
def test_create_tile_data(
etl, create_tile_score_data_input, create_tile_data_expected
):
output_tiles_df_actual = etl._create_tile_data(create_tile_score_data_input)
pdt.assert_frame_equal(
output_tiles_df_actual,
@ -158,8 +160,10 @@ def test_load_downloadable_zip(etl, monkeypatch, score_data_expected):
def test_create_tract_search_data(census_geojson_sample_data: gpd.GeoDataFrame):
# Sanity check
assert len(census_geojson_sample_data) > 0
result = PostScoreETL()._create_tract_search_data(census_geojson_sample_data)
result = PostScoreETL()._create_tract_search_data(
census_geojson_sample_data
)
assert isinstance(result, pd.DataFrame)
assert not result.columns.empty
columns = ["GEOID10", "INTPTLAT10", "INTPTLON10"]

View file

@ -200,6 +200,11 @@ CENSUS_DECENNIAL_MEDIAN_INCOME_2019 = (
CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2019 = f"Percentage households below 100% of federal poverty line in {DEC_DATA_YEAR}"
CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019 = f"Percentage households below 200% of federal poverty line in {DEC_DATA_YEAR}"
CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019 = f"{CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_FIELD_2019}, adjusted and imputed"
CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_PERCENTILE = (
CENSUS_DECENNIAL_ADJUSTED_POVERTY_LESS_THAN_200_FPL_FIELD_2019
+ ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
+ PERCENTILE_FIELD_SUFFIX
)
CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2019 = f"Percent individuals age 25 or over with less than high school degree in {DEC_DATA_YEAR}"
CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2019 = (
f"Unemployment (percent) in {DEC_DATA_YEAR}"

View file

@ -1053,6 +1053,8 @@ class ScoreNarwhal(Score):
# Now we set the low income flag only for territories, but we need to rank them
# with all other tracts.
# Note: This specific method call will generate the
# CENSUS_DECENNIAL_POVERTY_LESS_THAN_200_FPL_PERCENTILE column in the score.
(
self.df,
island_areas_poverty_200_criteria_field_name,

View file

@ -5053,4 +5053,4 @@ test = ["mypy", "pre-commit", "pytest", "pytest-asyncio", "websockets (>=10.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "bdce0f2249243262fbfd1e73df3f2525c8ca624df6da458480636a19db26c4fe"
content-hash = "04639d2eaf33218ba4fef190f76620b00fb2285d86d58458511d85dafd304658"

View file

@ -60,6 +60,11 @@ seaborn = "^0.11.2"
papermill = "^2.3.4"
jupyterlab = "^3.6.7"
[tool.poetry.group.test.dependencies]
openpyxl = "^3.1.5"
pytest-snapshot = "^0.9.0"
[build-system]
build-backend = "poetry.core.masonry.api"
requires = ["poetry-core>=1.0.0"]

View file

@ -1,27 +0,0 @@
[tox]
# required because we use pyproject.toml
isolated_build = true
envlist = py310, lint, checkdeps, pytest
# only checks python versions installed locally
skip_missing_interpreters = true
[testenv:lint]
deps = pytest
# lints python code in src and tests
commands = black data_pipeline
flake8 data_pipeline
pylint data_pipeline
[testenv:checkdeps]
# checks the dependencies for security vulnerabilities and open source licenses
allowlist_externals = bash
commands = pip install -U wheel
# known issue: https://github.com/pyupio/safety/issues/364
# jinja2 false positive for our use: https://data.safetycli.com/v/70612/f17
safety check --ignore 51457 --ignore 44715 --ignore 70612
bash scripts/run-liccheck.sh
[testenv:pytest]
# Run tests
deps = pytest
commands = pytest --full-trace