Merge branch 'main' into issue-2119-90th-percentile-bug

Merging main into this branch
This commit is contained in:
Travis Newby 2023-02-28 14:54:40 -06:00
commit 47dd86ab46
97 changed files with 852 additions and 558 deletions

View file

@ -72,7 +72,6 @@ jobs:
run: |
poetry run s4cmd put ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --force --API-ACL=public-read
poetry run s4cmd put ./data_pipeline/files/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable --recursive --force --API-ACL=public-read
- name: Update PR with deployed Score URLs
uses: mshick/add-pr-comment@v1
with:
@ -85,6 +84,15 @@ jobs:
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: "github-actions[bot]"
allow-repeats: false
- name: Perform Score Comparisons
run: |
poetry run python3 data_pipeline/comparator.py compare-score
- name: Update PR with Score Comparisons
uses: mshick/add-pr-comment@v2
with:
message-path: ./data/data-pipeline/data_pipeline/data/tmp/Comparator/Score/comparison-summary.md
repo-token: ${{ secrets.GITHUB_TOKEN }}
allow-repeats: false
- name: Set timezone for tippecanoe
uses: szenius/set-timezone@v1.0
with:

View file

@ -23,7 +23,7 @@ GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_XLS=downloadable/1.0-communities.xlsx
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_CSV=downloadable/1.0-communities.csv
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_PDF=downloadable/1.0-communities-list.pdf
GATSBY_FILE_DL_PATH_1_0_SHAPE_FILE_ZIP=downloadable/1.0-shapefile-codebook.zip
GATSBY_FILE_DL_PATH_1_0_INSTRUCT_PDF=downloadable/cejst-instructions-for-federal-agencies.pdf
GATSBY_FILE_DL_PATH_1_0_INSTRUCT_PDF=downloadable/CEQ-CEJST-Instructions.pdf
GATSBY_FILE_DL_PATH_1_0_COMP_CHART_PDF=downloadable/total-comparison-chart.pdf
GATSBY_FILE_DL_PATH_1_0_TSD_PDF=downloadable/1.0-cejst-technical-support-document.pdf
GATSBY_FILE_DL_PATH_1_0_TOOL_COMP_PDF=downloadable/1.0-climate-and-environmental-burden-tool-comparison.pdf

View file

@ -20,7 +20,7 @@ GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_XLS=downloadable/1.0-communities.xlsx
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_CSV=downloadable/1.0-communities.csv
GATSBY_FILE_DL_PATH_1_0_COMMUNITIES_LIST_PDF=downloadable/1.0-communities-list.pdf
GATSBY_FILE_DL_PATH_1_0_SHAPE_FILE_ZIP=downloadable/1.0-shapefile-codebook.zip
GATSBY_FILE_DL_PATH_1_0_INSTRUCT_PDF=downloadable/cejst-instructions-for-federal-agencies.pdf
GATSBY_FILE_DL_PATH_1_0_INSTRUCT_PDF=downloadable/CEQ-CEJST-Instructions.pdf
GATSBY_FILE_DL_PATH_1_0_COMP_CHART_PDF=downloadable/total-comparison-chart.pdf
GATSBY_FILE_DL_PATH_1_0_TSD_PDF=downloadable/1.0-cejst-technical-support-document.pdf
GATSBY_FILE_DL_PATH_1_0_TOOL_COMP_PDF=downloadable/1.0-climate-and-environmental-burden-tool-comparison.pdf

View file

@ -20,7 +20,7 @@ import * as constants from '../../data/constants';
import * as EXPLORE_COPY from '../../data/copy/explore';
// @ts-ignore
import mailIcon from '/node_modules/uswds/dist/img/usa-icons/mail_outline.svg';
import launchIcon from '/node_modules/uswds/dist/img/usa-icons/launch.svg';
interface IAreaDetailProps {
properties: constants.J40Properties,
@ -886,7 +886,7 @@ const AreaDetail = ({properties}: IAreaDetailProps) => {
</div>
<img
className={styles.buttonImage}
src={mailIcon}
src={launchIcon}
alt={intl.formatMessage(EXPLORE_COPY.COMMUNITY.SEND_FEEDBACK.IMG_ICON.ALT_TAG)}
/>
</div>

View file

@ -254,7 +254,7 @@ exports[`rendering of the Categories checks if component renders 1`] = `
>
historic underinvestment
</a>
OR are at or above the 90th percentile for the
OR are at or above the 90th percentile for
<a
class="usa-link"
href="#house-burden"

View file

@ -43,9 +43,9 @@ exports[`rendering of indicator dataset card checks if component renders 1`] = `
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Census
U.S. Census
</li>
<li>
<span>

View file

@ -12,3 +12,7 @@
justify-content: flex-end;
@include u-margin-bottom(4);
}
.alignItemsCenter {
align-items: center;
}

View file

@ -2,6 +2,7 @@ declare namespace DatasetContainerScssNamespace {
export interface IDatasetContainerScss {
datasetCardsContainer: string;
returnToTop: string;
alignItemsCenter: string;
}
}

View file

@ -27,9 +27,10 @@ const DatasetContainer = () => {
</Grid>
</Grid>
<Grid row gap>
<Grid row gap className={styles.alignItemsCenter}>
<Grid desktop={{col: 7}}>
<p>{intl.formatMessage(METHODOLOGY_COPY.DATASETS.INFO)}</p>
<p>{intl.formatMessage(METHODOLOGY_COPY.DATASETS.CENSUS_DISCLAIMER)}</p>
</Grid>
<Grid desktop={{col: 1}}>
</Grid>

View file

@ -35,6 +35,11 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
The tool's datasets are public and consistent nationwide. They come from different sources and are high quality. The Council on Environmental Quality (CEQ) chose them based on relevance, availability, and quality. They identify climate, environmental, and other burdens on communities.
</p>
<p>
This product uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.
</p>
</div>
<div
@ -114,9 +119,9 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Census
U.S. Census
</li>
<li>
<span>
@ -166,7 +171,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -217,7 +222,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Federal Emergency Management Agency (FEMA)
</li>
@ -268,7 +273,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Federal Emergency Management Agency (FEMA)
</li>
@ -331,7 +336,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Federal Emergency Management Agency (FEMA)
</li>
@ -388,7 +393,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
First Street Foundation
</li>
@ -445,7 +450,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
First Street Foundation
</li>
@ -496,7 +501,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Energy (DOE)
</li>
@ -547,7 +552,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA) Office of Air and Radiation (OAR)
</li>
@ -599,7 +604,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Centers for Disease Control and Prevention (CDC)
</li>
@ -650,7 +655,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Centers for Disease Control and Prevention (CDC)
</li>
@ -701,7 +706,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Centers for Disease Control and Prevention (CDC)
</li>
@ -763,7 +768,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Centers for Disease Control and Prevention (CDC)
</li>
@ -845,7 +850,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
National Community Reinvestment Coalition (NCRC)
</li>
@ -896,7 +901,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Housing and Urban Development (HUD)
</li>
@ -954,7 +959,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Data from
@ -1043,7 +1048,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Housing and Urban Development (HUD)
</li>
@ -1094,7 +1099,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -1151,7 +1156,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of the Interior (DOI)
</li>
@ -1208,7 +1213,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Army Corps of Engineers
</li>
@ -1259,7 +1264,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -1314,7 +1319,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -1365,7 +1370,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -1416,7 +1421,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -1484,7 +1489,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Transportation (DOT)
</li>
@ -1535,7 +1540,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Transportation (DOT)
</li>
@ -1592,7 +1597,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -1646,7 +1651,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -1697,7 +1702,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -1770,7 +1775,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -1843,7 +1848,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -1916,7 +1921,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -1989,7 +1994,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -2081,7 +2086,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Bureau of Indian Affairs (BIA)
</li>

View file

@ -1,7 +1,6 @@
@use '../../styles/design-system.scss' as *;
.datasetsButtonContainer{
@include u-margin-top(3);
@include u-height(6);
z-index: 2;

View file

@ -343,7 +343,7 @@ exports[`test rendering of Indicator value sub-text renders the "above 90 percen
exports[`test rendering of Indicator value sub-text renders the "below 90 percentile" 1`] = `
<DocumentFragment>
<div>
below 90
not above 90
<sup
style="top: -0.2em;"
>

View file

@ -87,9 +87,9 @@ const ReleaseUpdate = ({}: IReleaseUpdateProps) => {
<div>
<ul>
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B1}</li>
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2}</li>
<ul>
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2}</li>
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_1}</li>
<ul>
<li>{DOWNLOAD_COPY.RELEASE_1_0.SECTION1_B2_1_1}</li>

View file

@ -38,10 +38,10 @@ exports[`rendering of ReleaseUpdate Component checks if component renders 1`] =
locations of Alaska Native Villages using data from the Bureau of Indian
Affairs at the U.S. Department of the Interior
</li>
<ul>
<li>
Added new data for indicators of burden
</li>
<ul>
<li>
Climate change
</li>

View file

@ -12,6 +12,8 @@ export const FAQS_LINK = 'https://www.whitehouse.gov/wp-content/uploads/2022/02/
export const FED_RECOGNIZED_INDIAN_ENTITIES = `https://www.federalregister.gov/documents/2022/01/28/2022-01789/indian-entities-recognized-by-and-eligible-to-receive-services-from-the-united-states-bureau-of`;
export const EJSCREEN = 'https://www.epa.gov/ejscreen/how-does-epa-use-ejscreen';
export const CEJST_INSTRUCT = `https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/CEQ-CEJST-Instructions.pdf`;
export const CEJST_MEMO = `https://www.whitehouse.gov/wp-content/uploads/2023/01/M-23-09_Signed_CEQ_CPO.pdf`;
export const PAGE = defineMessages({
TITLE: {
id: 'about.page.title.text',
@ -25,7 +27,7 @@ export const CONTENT = {
<FormattedMessage
id={'about.page.paragraph.1'}
defaultMessage={`
In January of 2020, President Biden issued <link1>Executive Order 14008</link1>. The order directed the Council on Environmental Quality (CEQ) to develop a new tool. This tool is called the Climate and Economic Justice Screening Tool. The tool has an interactive map and uses datasets that are indicators of burdens in eight categories: climate change, energy, health, housing, legacy pollution, transportation, water and wastewater, and workforce development. The tool uses this information to identify communities that are experiencing these burdens. These are the communities that are disadvantaged because they are overburdened and underserved.
In January of 2021, President Biden issued <link1>Executive Order 14008</link1>. The order directed the Council on Environmental Quality (CEQ) to develop a new tool. This tool is called the Climate and Economic Justice Screening Tool. The tool has an interactive map and uses datasets that are indicators of burdens in eight categories: climate change, energy, health, housing, legacy pollution, transportation, water and wastewater, and workforce development. The tool uses this information to identify communities that are experiencing these burdens. These are the communities that are disadvantaged because they are overburdened and underserved.
`}
description={'Navigate to the About page. This is the paragraph 1'}
@ -58,17 +60,23 @@ export const CONTENT = {
<FormattedMessage
id={'about.page.list.item.1'}
defaultMessage={`
Addendum to the Justice40 Initiative Interim Guidance on Using CEJST
<link1>Memorandum</link1> on Using the CEJST for the Justice40 Initiative
`}
description={'Navigate to the About page. This is the list item 1'}
values={{
link1: linkFn(CEJST_MEMO, false, true),
}}
/>,
LI2:
<FormattedMessage
id={'about.page.list.item.2'}
defaultMessage={`
Instructions to Federal Agencies on Using the CEJST
<link1>Instructions</link1> to Federal agencies on using the CEJST
`}
description={'Navigate to the About page. This is the list item 2'}
values={{
link1: linkFn(CEJST_INSTRUCT, false, true),
}}
/>,
PARA4:
<FormattedMessage

View file

@ -70,7 +70,7 @@ export const DOWNLOAD_FILES = {
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
},
INSTRUCTIONS: {
SIZE: .8, // KB // Todo: Update when actual file is uploaded
SIZE: 228.4, // KB // Todo: Update when actual file is uploaded
URL: getDownloadFileUrl(process.env.GATSBY_FILE_DL_PATH_1_0_INSTRUCT_PDF, false),
LAST_UPDATED: COMMON_COPY.METH_1_0_RELEASE_DATE,
},
@ -598,5 +598,19 @@ export const DOWNLOAD_LINKS = {
/>,
}}
/>,
LINK6: <FormattedMessage
id={'download.page.download.file.6'}
defaultMessage={`<link6>Instructions to Federal agencies on using the CEJST</link6> (.pdf {instructions})`}
description={'Navigate to the download page. This is sixth download file link'}
values={{
link6: COMMON_COPY.linkFn(DOWNLOAD_FILES.NARWAL.INSTRUCTIONS.URL, false, true),
instructions: <FormattedNumber
value={DOWNLOAD_FILES.NARWAL.INSTRUCTIONS.SIZE}
style="unit"
unit="kilobyte"
unitDisplay="narrow"
/>,
}}
/>,
// };
};

View file

@ -1090,7 +1090,7 @@ export const SIDE_PANEL_VALUES = {
BELOW: <FormattedMessage
id={'explore.map.page.side.panel.indicator.value.subtext.below'}
description={'indicating below threshold'}
defaultMessage={`below `}
defaultMessage={`not above `}
/>,
PERCENT: <FormattedMessage
id={'explore.map.page.side.panel.indicator.value.subtext.percent'}

View file

@ -4,7 +4,7 @@ import {defineMessages, FormattedMessage} from 'gatsby-plugin-intl';
import LinkTypeWrapper from '../../components/LinkTypeWrapper';
import {EJSCREEN, EXEC_ORDER_LINK, FED_RECOGNIZED_INDIAN_ENTITIES} from './about';
import {EJSCREEN, EXEC_ORDER_LINK, FED_RECOGNIZED_INDIAN_ENTITIES, CEJST_INSTRUCT, CEJST_MEMO} from './about';
import {boldFn, linkFn, FEEDBACK_EMAIL} from './common';
import {PAGES_ENDPOINTS} from '../constants';
import {EXPLORE_PAGE_LINKS} from './explore';
@ -270,13 +270,20 @@ export const FAQ_ANSWERS = {
/>,
Q1_P3_1: <FormattedMessage
id={ 'faqs.page.answers.Q1_P3_1'}
defaultMessage={ 'Addendum to the Justice40 Interim Guidance'}
defaultMessage={ '<link1>Memorandum</link1> on Using the CEJST for the Justice40 Initiative'}
description={ 'Navigate to the FAQs page, this will be an answer, Q1_P3_1'}
values={{
link1: linkFn(CEJST_MEMO, false, true),
}}
/>,
Q1_P3_2: <FormattedMessage
id={ 'faqs.page.answers.Q1_P3_2'}
defaultMessage={ 'Instructions to Federal Agencies on Using the CEJST'}
defaultMessage={ '<link1>Instructions</link1> to Federal agencies on using the CEJST'}
description={ 'Navigate to the FAQs page, this will be an answer, Q1_P3_2'}
values={{
link1: linkFn(CEJST_INSTRUCT, false, true),
}}
/>,
Q1_P4: <FormattedMessage
id={ 'faqs.page.answers.Q1_P4'}

View file

@ -255,7 +255,7 @@ export const CATEGORIES = {
/>,
IF: <FormattedMessage
id={'methodology.page.indicator.categories.afford.house.if'}
defaultMessage={`Experienced <link0>historic underinvestment</link0> OR are at or above the 90th percentile for the <link1>housing cost</link1> OR <link2>lack of green space</link2> OR <link3>lack of indoor plumbing</link3> OR <link4>lead paint</link4>`}
defaultMessage={`Experienced <link0>historic underinvestment</link0> OR are at or above the 90th percentile for <link1>housing cost</link1> OR <link2>lack of green space</link2> OR <link3>lack of indoor plumbing</link3> OR <link4>lead paint</link4>`}
description={'Navigate to the methodology page. Navigate to the category section. This will set the if portion of the formula'}
values={{
link0: simpleLink('#hist-underinv'),
@ -403,6 +403,13 @@ export const DATASETS = defineMessages({
`,
description: 'Navigate to the Methodology page. This is the description of the dataset section',
},
CENSUS_DISCLAIMER: {
id: 'methodology.page.datasetContainer.census.disclaimer',
defaultMessage: `
This product uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.
`,
description: 'Navigate to the Methodology page. This is the disclaimer when using census data',
},
ADDITIONAL_HEADING: {
id: 'methodology.page.datasetContainer.additional.heading',
defaultMessage: 'Additional Indicators',
@ -434,7 +441,7 @@ export const DATASET_CARD_LABELS = defineMessages({
},
RESP_PARTY: {
id: 'methodology.page.datasetCard.responsible.party',
defaultMessage: 'Responsible Party: ',
defaultMessage: 'Responsible party: ',
description: 'Navigate to the Methodology page. This is the label associated with explaining the card',
},
DATE_RANGE: {
@ -857,7 +864,7 @@ export const INDICATORS = [
}}
/>,
usedIn: CATEGORIES.ALL,
responsibleParty: RESPONSIBLE_PARTIES.CEN,
responsibleParty: RESPONSIBLE_PARTIES.CENSUS,
sources: [
{
source: SOURCE_LINKS.CENSUS_ACS_15_19,

View file

@ -56,15 +56,15 @@
"description": "Navigate to the About page. This is the join the community heading"
},
"about.page.list.item.1": {
"defaultMessage": "Addendum to the Justice40 Initiative Interim Guidance on Using CEJST",
"defaultMessage": "<link1>Memorandum</link1> on Using the CEJST for the Justice40 Initiative",
"description": "Navigate to the About page. This is the list item 1"
},
"about.page.list.item.2": {
"defaultMessage": "Instructions to Federal Agencies on Using the CEJST",
"defaultMessage": "<link1>Instructions</link1> to Federal agencies on using the CEJST",
"description": "Navigate to the About page. This is the list item 2"
},
"about.page.paragraph.1": {
"defaultMessage": "In January of 2020, President Biden issued <link1>Executive Order 14008</link1>. The order directed the Council on Environmental Quality (CEQ) to develop a new tool. This tool is called the Climate and Economic Justice Screening Tool. The tool has an interactive map and uses datasets that are indicators of burdens in eight categories: climate change, energy, health, housing, legacy pollution, transportation, water and wastewater, and workforce development. The tool uses this information to identify communities that are experiencing these burdens. These are the communities that are disadvantaged because they are overburdened and underserved.",
"defaultMessage": "In January of 2021, President Biden issued <link1>Executive Order 14008</link1>. The order directed the Council on Environmental Quality (CEQ) to develop a new tool. This tool is called the Climate and Economic Justice Screening Tool. The tool has an interactive map and uses datasets that are indicators of burdens in eight categories: climate change, energy, health, housing, legacy pollution, transportation, water and wastewater, and workforce development. The tool uses this information to identify communities that are experiencing these burdens. These are the communities that are disadvantaged because they are overburdened and underserved.",
"description": "Navigate to the About page. This is the paragraph 1"
},
"about.page.paragraph.2": {
@ -307,6 +307,10 @@
"defaultMessage": "<link5>How to use the list of communities</link5> (.pdf {howToCommFileSize})",
"description": "Navigate to the download page. This is fifth download file link"
},
"download.page.download.file.6": {
"defaultMessage": "<link6>Instructions to Federal agencies on using the CEJST</link6> (.pdf {instructions})",
"description": "Navigate to the download page. This is sixth download file link"
},
"download.page.files.section.title": {
"defaultMessage": "Version {version} file formats",
"description": "Navigate to the download page. This is first download file link"
@ -1200,7 +1204,7 @@
"description": "indicating above threshold"
},
"explore.map.page.side.panel.indicator.value.subtext.below": {
"defaultMessage": "below",
"defaultMessage": "not above",
"description": "indicating below threshold"
},
"explore.map.page.side.panel.indicator.value.subtext.percent": {
@ -1680,11 +1684,11 @@
"description": "Navigate to the FAQs page, this will be an answer, Q1_P3"
},
"faqs.page.answers.Q1_P3_1": {
"defaultMessage": "Addendum to the Justice40 Interim Guidance",
"defaultMessage": "<link1>Memorandum</link1> on Using the CEJST for the Justice40 Initiative",
"description": "Navigate to the FAQs page, this will be an answer, Q1_P3_1"
},
"faqs.page.answers.Q1_P3_2": {
"defaultMessage": "Instructions to Federal Agencies on Using the CEJST",
"defaultMessage": "<link1>Instructions</link1> to Federal agencies on using the CEJST",
"description": "Navigate to the FAQs page, this will be an answer, Q1_P3_2"
},
"faqs.page.answers.Q1_P4": {
@ -2372,7 +2376,7 @@
"description": "Navigate to the Methodology page. This is the label associated with a NEW card"
},
"methodology.page.datasetCard.responsible.party": {
"defaultMessage": "Responsible Party:",
"defaultMessage": "Responsible party:",
"description": "Navigate to the Methodology page. This is the label associated with explaining the card"
},
"methodology.page.datasetCard.source": {
@ -2395,6 +2399,10 @@
"defaultMessage": "Share data sources with CEQ",
"description": "Navigate to the Methodology page. This is the section heading of which datasets are used in cumulative score with a button labeled: Share data sources with CEQ"
},
"methodology.page.datasetContainer.census.disclaimer": {
"defaultMessage": "This product uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.",
"description": "Navigate to the Methodology page. This is the disclaimer when using census data"
},
"methodology.page.datasetContainer.heading": {
"defaultMessage": "Datasets used in beta methodology",
"description": "Navigate to the Methodology page. This is the section heading of which datasets are used in cumulative score"
@ -2428,7 +2436,7 @@
"description": "Navigate to the methodology page. This is the methodology page header text"
},
"methodology.page.indicator.categories.afford.house.if": {
"defaultMessage": "Experienced <link0>historic underinvestment</link0> OR are at or above the 90th percentile for the <link1>housing cost</link1> OR <link2>lack of green space</link2> OR <link3>lack of indoor plumbing</link3> OR <link4>lead paint</link4>",
"defaultMessage": "Experienced <link0>historic underinvestment</link0> OR are at or above the 90th percentile for <link1>housing cost</link1> OR <link2>lack of green space</link2> OR <link3>lack of indoor plumbing</link3> OR <link4>lead paint</link4>",
"description": "Navigate to the methodology page. Navigate to the category section. This will set the if portion of the formula"
},
"methodology.page.indicator.categories.afford.housing.methodology": {

View file

@ -8,7 +8,7 @@ import {Grid} from '@trussworks/react-uswds';
import HowYouCanHelp from '../components/HowYouCanHelp';
import J40MainGridContainer from '../components/J40MainGridContainer';
import Layout from '../components/layout';
import PublicEngageButton from '../components/PublicEngageButton';
import DatasetsButton from '../components/DatasetsButton';
import SubPageNav from '../components/SubPageNav';
import * as ABOUT_COPY from '../data/copy/about';
@ -43,7 +43,7 @@ const AboutPage = ({location}: IAboutPageProps) => {
<section className={'page-heading'}>
<h1 data-cy={'about-page-heading'}>{intl.formatMessage(ABOUT_COPY.PAGE.TITLE)}</h1>
<PublicEngageButton />
<DatasetsButton href= {'https://www.surveymonkey.com/r/6G9TQJ8'} />
</section>
<Grid row gap className={'j40-mb5-mt3'}>
@ -57,13 +57,13 @@ const AboutPage = ({location}: IAboutPageProps) => {
<p>
{ABOUT_COPY.CONTENT.PARA2}
</p>
{/* <div className={'j40-p-tag'}>
<div className={'j40-p-tag'}>
{ABOUT_COPY.CONTENT.PARA3}
<ul>
<li> {ABOUT_COPY.CONTENT.LI1}</li>
<li> {ABOUT_COPY.CONTENT.LI2}</li>
</ul>
</div> */}
</div>
<p>
{ABOUT_COPY.CONTENT.PARA4}
</p>

View file

@ -5,7 +5,7 @@ import {useIntl, FormattedMessage} from 'gatsby-plugin-intl';
import J40MainGridContainer from '../components/J40MainGridContainer';
import Layout from '../components/layout';
import LinkTypeWrapper from '../components/LinkTypeWrapper';
import PublicEngageButton from '../components/PublicEngageButton';
import DatasetsButton from '../components/DatasetsButton';
import * as CONTACT_COPY from '../data/copy/contact';
import {FEEDBACK_EMAIL} from '../data/copy/common';
@ -24,7 +24,7 @@ const ContactPage = ({location}: IContactPageProps) => {
<section className={'page-heading'}>
<h1>{intl.formatMessage(CONTACT_COPY.PAGE_INTRO.PAGE_HEADING)}</h1>
<PublicEngageButton />
<DatasetsButton href= {'https://www.surveymonkey.com/r/6G9TQJ8'} />
</section>
<Grid row gap={6}>

View file

@ -5,7 +5,7 @@ import {useWindowSize} from 'react-use';
import J40MainGridContainer from '../components/J40MainGridContainer';
import Layout from '../components/layout';
import PublicEngageButton from '../components/PublicEngageButton';
import DatasetsButton from '../components/DatasetsButton';
import ReleaseUpdate from '../components/ReleaseUpdate';
import SubPageNav from '../components/SubPageNav';
@ -26,7 +26,7 @@ const DownloadsPage = ({location}: IDownloadsPageProps) => {
<section className={'page-heading'}>
<h1>{intl.formatMessage(DOWNLOADS_COPY.PAGE_INTRO.PAGE_HEADING1)}</h1>
<PublicEngageButton />
<DatasetsButton href= {'https://www.surveymonkey.com/r/6G9TQJ8'} />
</section>
<Grid row gap className={'j40-mb5-mt3'}>
@ -54,6 +54,9 @@ const DownloadsPage = ({location}: IDownloadsPageProps) => {
<p>
{DOWNLOADS_COPY.DOWNLOAD_LINKS.LINK4}
</p>
<p>
{DOWNLOADS_COPY.DOWNLOAD_LINKS.LINK6}
</p>
<p>
{DOWNLOADS_COPY.DOWNLOAD_LINKS.LINK5}
</p>

View file

@ -6,7 +6,7 @@ import {useWindowSize} from 'react-use';
import J40MainGridContainer from '../components/J40MainGridContainer';
import Layout from '../components/layout';
import PublicEngageButton from '../components/PublicEngageButton';
import DatasetsButton from '../components/DatasetsButton';
import SubPageNav from '../components/SubPageNav';
import {USWDS_BREAKPOINTS} from '../data/constants';
@ -47,11 +47,11 @@ const FAQPage = ({location}: IFAQPageProps) => {
<>
<p key={1}>{FAQS_COPY.FAQ_ANSWERS.Q1_P1}</p>
<p>{FAQS_COPY.FAQ_ANSWERS.Q1_P2}</p>
{/* <p>{FAQS_COPY.FAQ_ANSWERS.Q1_P3}</p>
<p>{FAQS_COPY.FAQ_ANSWERS.Q1_P3}</p>
<ul>
<li>{FAQS_COPY.FAQ_ANSWERS.Q1_P3_1}</li>
<li>{FAQS_COPY.FAQ_ANSWERS.Q1_P3_2}</li>
</ul> */}
</ul>
<p>{FAQS_COPY.FAQ_ANSWERS.Q1_P4}</p>
</>
),
@ -237,7 +237,7 @@ const FAQPage = ({location}: IFAQPageProps) => {
<section className={'page-heading'}>
<h1>{intl.formatMessage(FAQS_COPY.PAGE_INTRO.PAGE_TILE)}</h1>
<PublicEngageButton />
<DatasetsButton href= {'https://www.surveymonkey.com/r/6G9TQJ8'} />
</section>
<Grid row gap className={'j40-mb5-mt3'}>

View file

@ -6,7 +6,7 @@ import ExploreDataBox from '../components/ExploreDataBox';
import J40Map from '../components/J40Map';
import J40MainGridContainer from '../components/J40MainGridContainer';
import Layout from '../components/layout';
import PublicEngageButton from '../components/PublicEngageButton';
import DatasetsButton from '../components/DatasetsButton';
import * as EXPLORE_COPY from '../data/copy/explore';
@ -26,7 +26,7 @@ const ExporeToolPage = ({location}: IMapPageProps) => {
<section className={'page-heading'}>
<h1>{intl.formatMessage(EXPLORE_COPY.PAGE_INTRO.PAGE_HEADING)}</h1>
<PublicEngageButton />
<DatasetsButton href= {'https://www.surveymonkey.com/r/6G9TQJ8'} />
</section>
<Grid row gap className={'j40-mb5-mt3'}>

View file

@ -8,7 +8,7 @@ import DatasetContainer from '../components/DatasetContainer';
import J40MainGridContainer from '../components/J40MainGridContainer';
import Layout from '../components/layout';
import SubPageNav from '../components/SubPageNav';
import PublicEngageButton from '../components/PublicEngageButton';
import DatasetsButton from '../components/DatasetsButton';
import {USWDS_BREAKPOINTS} from '../data/constants';
import * as METHODOLOGY_COPY from '../data/copy/methodology';
@ -29,7 +29,7 @@ const IndexPage = ({location}: MethodPageProps) => {
<section className={'page-heading'}>
<h1>{intl.formatMessage(METHODOLOGY_COPY.PAGE.HEADING)}</h1>
<PublicEngageButton />
<DatasetsButton href= {'https://www.surveymonkey.com/r/6G9TQJ8'} />
</section>
<Grid row gap className={'j40-mt3'}>

View file

@ -6,7 +6,7 @@ import {Card, CardBody, CardFooter, CardHeader, Grid} from '@trussworks/react-us
import DownloadButton from '../components/DownloadButton';
import J40MainGridContainer from '../components/J40MainGridContainer';
import Layout from '../components/layout';
import PublicEngageButton from '../components/PublicEngageButton';
import DatasetsButton from '../components/DatasetsButton';
import SubPageNav from '../components/SubPageNav';
import * as PREV_VER_COPY from '../data/copy/previousVer';
@ -33,7 +33,7 @@ const PreviousVersions = ({location}: IPreviousVersionsProps) => {
<section className={'page-heading'}>
<h1 data-cy={'about-page-heading'}>{intl.formatMessage(PREV_VER_COPY.PAGE.TITLE)}</h1>
<PublicEngageButton />
<DatasetsButton href= {'https://www.surveymonkey.com/r/6G9TQJ8'} />
</section>
<Grid row gap className={'j40-mb5-mt3'}>

View file

@ -392,27 +392,23 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
>
About
</h1>
<div>
<a
href="/en/public-engagement"
href="https://www.surveymonkey.com/r/6G9TQJ8"
rel="noreferrer"
target="_blank"
>
<button
class="usa-button"
data-testid="button"
type="button"
>
<div>
Share data sources with CEQ
<img
alt="an icon that represents a calendar"
alt="launch icon"
src="test-file-stub"
/>
<div>
Public engagement
</div>
</div>
</button>
</a>
</div>
</section>
<div
class="grid-row grid-gap j40-mb5-mt3"
@ -425,7 +421,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
<section>
<p>
In January of 2020, President Biden issued
In January of 2021, President Biden issued
<a
class="usa-link usa-link--external"
data-cy=""
@ -454,6 +450,45 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
. The Justice40 Initiative seeks to deliver 40% of the overall benefits of investments in climate, clean energy, and related areas to disadvantaged communities.
</p>
<div
class="j40-p-tag"
>
Federal agencies should also use the following:
<ul>
<li>
<a
class="usa-link usa-link--external"
data-cy=""
href="https://www.whitehouse.gov/wp-content/uploads/2023/01/M-23-09_Signed_CEQ_CPO.pdf"
rel="noreferrer"
target="_blank"
>
Memorandum
</a>
on Using the CEJST for the Justice40 Initiative
</li>
<li>
<a
class="usa-link usa-link--external"
data-cy=""
href="https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/CEQ-CEJST-Instructions.pdf"
rel="noreferrer"
target="_blank"
>
Instructions
</a>
to Federal agencies on using the CEJST
</li>
</ul>
</div>
<p>
CEQ will update the tool each year based on public feedback, research, and the availability of new data. The current version of the tool is version 1.0.

View file

@ -390,27 +390,23 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
<h1>
Contact
</h1>
<div>
<a
href="/en/public-engagement"
href="https://www.surveymonkey.com/r/6G9TQJ8"
rel="noreferrer"
target="_blank"
>
<button
class="usa-button"
data-testid="button"
type="button"
>
<div>
Share data sources with CEQ
<img
alt="an icon that represents a calendar"
alt="launch icon"
src="test-file-stub"
/>
<div>
Public engagement
</div>
</div>
</button>
</a>
</div>
</section>
<div
class="grid-row grid-gap-6"

View file

@ -390,27 +390,23 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
<h1>
Downloads
</h1>
<div>
<a
href="/en/public-engagement"
href="https://www.surveymonkey.com/r/6G9TQJ8"
rel="noreferrer"
target="_blank"
>
<button
class="usa-button"
data-testid="button"
type="button"
>
<div>
Share data sources with CEQ
<img
alt="an icon that represents a calendar"
alt="launch icon"
src="test-file-stub"
/>
<div>
Public engagement
</div>
</div>
</button>
</a>
</div>
</section>
<div
class="grid-row grid-gap j40-mb5-mt3"
@ -462,10 +458,10 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
locations of Alaska Native Villages using data from the Bureau of Indian
Affairs at the U.S. Department of the Interior
</li>
<ul>
<li>
Added new data for indicators of burden
</li>
<ul>
<li>
Climate change
</li>
@ -727,6 +723,18 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</a>
(.pdf 4.4MB)
</p>
<p>
<a
class="usa-link usa-link--external"
data-cy=""
href="//"
rel="noreferrer"
target="_blank"
>
Instructions to Federal agencies on using the CEJST
</a>
(.pdf 228.4kB)
</p>
<p>
<a
class="usa-link usa-link--external"

View file

@ -390,27 +390,23 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
<h1>
Frequently asked questions
</h1>
<div>
<a
href="/en/public-engagement"
href="https://www.surveymonkey.com/r/6G9TQJ8"
rel="noreferrer"
target="_blank"
>
<button
class="usa-button"
data-testid="button"
type="button"
>
<div>
Share data sources with CEQ
<img
alt="an icon that represents a calendar"
alt="launch icon"
src="test-file-stub"
/>
<div>
Public engagement
</div>
</div>
</button>
</a>
</div>
</section>
<div
class="grid-row grid-gap j40-mb5-mt3"
@ -462,6 +458,35 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
<p>
Federal agencies will use the tool for the Justice40 Initiative. It will help them identify disadvantaged communities that should receive 40% of the overall benefits of programs included in the Justice40 Initiative. The Justice40 Initiative seeks to deliver 40% of the overall benefits in climate, clean energy, and other related areas to disadvantaged communities.
</p>
<p>
Other useful links for Federal agencies:
</p>
<ul>
<li>
<a
class="usa-link usa-link--external"
data-cy=""
href="https://www.whitehouse.gov/wp-content/uploads/2023/01/M-23-09_Signed_CEQ_CPO.pdf"
rel="noreferrer"
target="_blank"
>
Memorandum
</a>
on Using the CEJST for the Justice40 Initiative
</li>
<li>
<a
class="usa-link usa-link--external"
data-cy=""
href="https://static-data-screeningtool.geoplatform.gov/data-versions/1.0/data/score/downloadable/CEQ-CEJST-Instructions.pdf"
rel="noreferrer"
target="_blank"
>
Instructions
</a>
to Federal agencies on using the CEJST
</li>
</ul>
<p>
The public can find communities of interest and provide feedback. This feedback will be used to improve the tool.
</p>

View file

@ -390,27 +390,23 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
<h1>
Methodology
</h1>
<div>
<a
href="/en/public-engagement"
href="https://www.surveymonkey.com/r/6G9TQJ8"
rel="noreferrer"
target="_blank"
>
<button
class="usa-button"
data-testid="button"
type="button"
>
<div>
Share data sources with CEQ
<img
alt="an icon that represents a calendar"
alt="launch icon"
src="test-file-stub"
/>
<div>
Public engagement
</div>
</div>
</button>
</a>
</div>
</section>
<div
class="grid-row grid-gap j40-mt3"
@ -702,7 +698,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
>
historic underinvestment
</a>
OR are at or above the 90th percentile for the
OR are at or above the 90th percentile for
<a
class="usa-link"
href="#house-burden"
@ -1048,6 +1044,11 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
The tool's datasets are public and consistent nationwide. They come from different sources and are high quality. The Council on Environmental Quality (CEQ) chose them based on relevance, availability, and quality. They identify climate, environmental, and other burdens on communities.
</p>
<p>
This product uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.
</p>
</div>
<div
@ -1127,9 +1128,9 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Census
U.S. Census
</li>
<li>
<span>
@ -1179,7 +1180,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -1230,7 +1231,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Federal Emergency Management Agency (FEMA)
</li>
@ -1281,7 +1282,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Federal Emergency Management Agency (FEMA)
</li>
@ -1344,7 +1345,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Federal Emergency Management Agency (FEMA)
</li>
@ -1401,7 +1402,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
First Street Foundation
</li>
@ -1458,7 +1459,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
First Street Foundation
</li>
@ -1509,7 +1510,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Energy (DOE)
</li>
@ -1560,7 +1561,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA) Office of Air and Radiation (OAR)
</li>
@ -1612,7 +1613,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Centers for Disease Control and Prevention (CDC)
</li>
@ -1663,7 +1664,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Centers for Disease Control and Prevention (CDC)
</li>
@ -1714,7 +1715,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Centers for Disease Control and Prevention (CDC)
</li>
@ -1776,7 +1777,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Centers for Disease Control and Prevention (CDC)
</li>
@ -1858,7 +1859,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
National Community Reinvestment Coalition (NCRC)
</li>
@ -1909,7 +1910,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Housing and Urban Development (HUD)
</li>
@ -1967,7 +1968,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Data from
@ -2056,7 +2057,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Housing and Urban Development (HUD)
</li>
@ -2107,7 +2108,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -2164,7 +2165,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of the Interior (DOI)
</li>
@ -2221,7 +2222,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Army Corps of Engineers
</li>
@ -2272,7 +2273,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -2327,7 +2328,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -2378,7 +2379,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -2429,7 +2430,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -2497,7 +2498,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Transportation (DOT)
</li>
@ -2548,7 +2549,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Department of Transportation (DOT)
</li>
@ -2605,7 +2606,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -2659,7 +2660,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Environmental Protection Agency (EPA)
</li>
@ -2710,7 +2711,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -2783,7 +2784,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -2856,7 +2857,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -2929,7 +2930,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -3002,7 +3003,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
U.S. Census
</li>
@ -3094,7 +3095,7 @@ exports[`rendering of the DatasetContainer checks if various text fields are vis
</li>
<li>
<span>
Responsible Party:
Responsible party:
</span>
Bureau of Indian Affairs (BIA)
</li>

View file

@ -166,6 +166,7 @@ li.usa-nav__primary-item a:hover::after{
.page-heading {
display: flex;
justify-content: space-between;
align-items: flex-end;
@include at-media-max("tablet"){
flex-direction: column;

View file

@ -22,11 +22,14 @@ from data_pipeline.utils import downloadable_cleanup
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import score_folder_cleanup
from data_pipeline.utils import temp_folder_cleanup
from data_pipeline.utils import geo_score_folder_cleanup
logger = get_module_logger(__name__)
dataset_cli_help = "Grab the data from either 'local' for local access or 'aws' to retrieve from Justice40 S3 repository"
LOG_LINE_WIDTH = 60
@click.group()
def cli():
@ -36,30 +39,34 @@ def cli():
@cli.command(help="Clean up all census data folders")
def census_cleanup():
"""CLI command to clean up the census data folder"""
log_title("Clean Up Census Data")
data_path = settings.APP_ROOT / "data"
# census directories
logger.info("Initializing all census data")
log_info("Cleaning up all census data")
census_reset(data_path)
logger.info("Cleaned up all census data files")
log_goodbye()
sys.exit()
@cli.command(help="Clean up all data folders")
def data_cleanup():
"""CLI command to clean up the all the data folders"""
log_title("Clean Up Data ")
data_path = settings.APP_ROOT / "data"
log_info("Cleaning up all data folders")
census_reset(data_path)
data_folder_cleanup()
tribal_reset(data_path)
score_folder_cleanup()
temp_folder_cleanup()
geo_score_folder_cleanup()
logger.info("Cleaned up all data folders")
log_goodbye()
sys.exit()
@ -75,19 +82,19 @@ def data_cleanup():
def census_data_download(zip_compress):
"""CLI command to download all census shape files from the Census FTP and extract the geojson
to generate national and by state Census Block Group CSVs"""
logger.info("Initializing all census data")
log_title("Download Census Data ")
data_path = settings.APP_ROOT / "data"
census_reset(data_path)
logger.info("Downloading census data")
log_info("Downloading census data")
etl_runner("census")
if zip_compress:
log_info("Zipping census data")
zip_census_data()
logger.info("Completed downloading census data")
log_goodbye()
sys.exit()
@ -101,10 +108,14 @@ def census_data_download(zip_compress):
help=dataset_cli_help,
)
def pull_census_data(data_source: str):
logger.info("Pulling census data from %s", data_source)
log_title("Pull Census Data")
log_info(f"Pulling census data from {data_source}")
data_path = settings.APP_ROOT / "data" / "census"
check_census_data_source(data_path, data_source)
logger.info("Finished pulling census data")
log_goodbye()
sys.exit()
@ -127,8 +138,12 @@ def etl_run(dataset: str):
Returns:
None
"""
log_title("Run ETL")
log_info("Running dataset(s)")
etl_runner(dataset)
log_goodbye()
sys.exit()
@ -137,9 +152,15 @@ def etl_run(dataset: str):
)
def score_run():
"""CLI command to generate the score"""
log_title("Score", "Generate Score")
log_info("Cleaning up data folders")
score_folder_cleanup()
log_info("Generating score")
score_generate()
log_goodbye()
sys.exit()
@ -148,62 +169,25 @@ def score_run():
)
def score_full_run():
"""CLI command to run ETL and generate the score in one command"""
log_title("Score Full Run", "Run ETL and Generate Score (no tiles)")
log_info("Cleaning up data folders")
data_folder_cleanup()
score_folder_cleanup()
temp_folder_cleanup()
log_info("Running all ETLs")
etl_runner()
log_info("Generating score")
score_generate()
sys.exit()
@cli.command(help="Generate Geojson files with scores baked in")
@click.option(
"-s",
"--data-source",
default="local",
required=False,
type=str,
help=dataset_cli_help,
)
def geo_score(data_source: str):
"""CLI command to combine score with GeoJSON data and generate low and high files
Args:
data_source (str): Source for the census data (optional)
Options:
- local: fetch census and score data from the local data directory
- aws: fetch census and score from AWS S3 J40 data repository
Returns:
None
"""
score_geo(data_source=data_source)
log_goodbye()
sys.exit()
@cli.command(
help="Generate map tiles. Pass -t to generate tribal layer as well.",
)
@click.option(
"-t",
"--generate-tribal-layer",
default=False,
required=False,
is_flag=True,
type=bool,
)
def generate_map_tiles(generate_tribal_layer):
"""CLI command to generate the map tiles"""
data_path = settings.APP_ROOT / "data"
generate_tiles(data_path, generate_tribal_layer)
sys.exit()
@cli.command(
help="Run etl_score_post to create score csv, tile csv, and downloadable zip",
help="Run etl_score_post to create score csv, tile csv, and downloadable zip"
)
@click.option(
"-s",
@ -225,9 +209,74 @@ def generate_score_post(data_source: str):
Returns:
None
"""
log_title(
"Generate Score Post ", "Create Score CSV, Tile CSV, Downloadable ZIP"
)
log_info("Cleaning up downloadable folder")
downloadable_cleanup()
log_info("Running score post activities")
score_post(data_source)
log_goodbye()
sys.exit()
@cli.command(help="Generate GeoJSON files with scores baked in")
@click.option(
"-s",
"--data-source",
default="local",
required=False,
type=str,
help=dataset_cli_help,
)
def geo_score(data_source: str):
"""CLI command to combine score with GeoJSON data and generate low and high files
Args:
data_source (str): Source for the census data (optional)
Options:
- local: fetch census and score data from the local data directory
- aws: fetch census and score from AWS S3 J40 data repository
Returns:
None
"""
log_title("Generate GeoJSON", "Combine Score and GeoJSON")
log_info("Cleaning up geo score folder")
geo_score_folder_cleanup()
log_info("Combining score with GeoJSON")
score_geo(data_source=data_source)
log_goodbye()
sys.exit()
@cli.command(
help="Generate map tiles. Pass -t to generate tribal layer as well.",
)
@click.option(
"-t",
"--generate-tribal-layer",
default=False,
required=False,
is_flag=True,
type=bool,
)
def generate_map_tiles(generate_tribal_layer):
"""CLI command to generate the map tiles"""
log_title("Generate Map Tiles")
data_path = settings.APP_ROOT / "data"
log_info("Generating tiles")
generate_tiles(data_path, generate_tribal_layer)
log_goodbye()
sys.exit()
@ -261,49 +310,74 @@ def data_full_run(check: bool, data_source: str):
Returns:
None
"""
log_title("Full Run", "Census DL, ETL, Score, Combine, Generate Tiles")
data_path = settings.APP_ROOT / "data"
if check:
if not check_first_run():
# check if the data full run has been run before
logger.info("*** The data full run was already executed")
log_info("The data full run was already executed")
sys.exit()
else:
# census directories
logger.info("*** Initializing all data folders")
log_info("Cleaning up data folders")
census_reset(data_path)
data_folder_cleanup()
score_folder_cleanup()
temp_folder_cleanup()
if data_source == "local":
logger.info("*** Downloading census data")
log_info("Downloading census data")
etl_runner("census")
logger.info("*** Running all ETLs")
log_info("Running all ETLs")
etl_runner()
logger.info("*** Generating Score")
log_info("Generating score")
score_generate()
logger.info("*** Running Post Score scripts")
log_info("Running post score")
downloadable_cleanup()
score_post(data_source)
logger.info("*** Combining Score with Census Geojson")
log_info("Combining score with census GeoJSON")
score_geo(data_source)
logger.info("*** Generating Map Tiles")
log_info("Generating map tiles")
generate_tiles(data_path, True)
log_info("Completing pipeline")
file = "first_run.txt"
cmd = f"touch {data_path}/{file}"
call(cmd, shell=True)
logger.info("*** Map data ready")
log_goodbye()
sys.exit()
def log_title(title: str, subtitle: str = None):
"""Logs a title in our fancy title format"""
logger.info("-" * LOG_LINE_WIDTH)
logger.info("")
logger.info(f"{title}")
if subtitle:
logger.info(f"{subtitle}")
logger.info("")
logger.info("-" * LOG_LINE_WIDTH)
logger.info("")
def log_info(info: str):
"""Logs a general informational message"""
logger.info(f"- {info}")
def log_goodbye():
"""Logs a goodbye message"""
logger.info("- Finished. Bye!")
if __name__ == "__main__":
cli()

View file

@ -0,0 +1,174 @@
import sys
import click
import difflib
import pandas as pd
from data_pipeline.etl.score import constants
from data_pipeline.utils import get_module_logger, download_file_from_url
from data_pipeline.application import log_title, log_info, log_goodbye
logger = get_module_logger(__name__)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.width", 10000)
pd.set_option("display.colheader_justify", "left")
@click.group()
def cli():
"""
A helper tool to run comparisons between files in production and those
in the local file system.
"""
@cli.command(
help="Compare score stored in the AWS production environment to the production score. Defaults to checking against version 1.0.",
)
@click.option(
"-v",
"--compare-to-version",
default="1.0",
required=False,
type=str,
)
def compare_score(compare_to_version: str):
"""Compares the score in the production environment to the locally generated score. The
algorithm is pretty simple:
1. Fetch and load both scores into dataframes.
2. Round floats to a number of decimal places to account for differences in the machine
and python versions used to generate the scores. If we skip this step, there are usually
thousands of extremely minor differences.
3. Compare the columns. Print out the deltas.
4. Compare the values. Print out the deltas. Save the deltas to deltas.csv.
5. Save a nice summary to comparison-summary.md. End.
"""
FLOAT_ROUNDING_PLACES = 2
WORKING_PATH = constants.TMP_PATH / "Comparator" / "Score"
summary = "# Score Comparison Summary\n"
summary += f"Hi! I'm the Score Comparator. I compared the score in production (version {compare_to_version}) to the"
summary += " freshly calculated score. Here are the results.\n"
log_title("Compare Score", "Compare production score to local score")
locally_generated_score_path = constants.DATA_SCORE_CSV_FULL_FILE_PATH
if not locally_generated_score_path.is_file():
logger.error(
f"- No score file exists at {locally_generated_score_path}. Please generate the score and try again."
)
sys.exit(1)
# TODO: transition to downloader code when it's available
production_score_url = f"https://justice40-data.s3.amazonaws.com/data-versions/{compare_to_version}/data/score/csv/full/usa.csv"
production_score_path = WORKING_PATH / "usa.csv"
log_info(f"Fetching score version {compare_to_version} from AWS")
production_score_path.parent.mkdir(parents=True, exist_ok=True)
download_file_from_url(
file_url=production_score_url, download_file_name=production_score_path
)
log_info("Loading files into pandas for comparisons")
local_score_df = pd.read_csv(
locally_generated_score_path,
index_col="GEOID10_TRACT",
dtype={"GEOID10_TRACT": str},
low_memory=False,
).sort_index()
production_score_df = pd.read_csv(
production_score_path,
index_col="GEOID10_TRACT",
dtype={"GEOID10_TRACT": str},
low_memory=False,
).sort_index()
# Because of variations in Python versions and machine-level calculations, some of
# our numbers can be really close but not the same. That throws off our comparisons.
# So we're going to round to a reasonable amount of digits before doing anything else.
production_score_df = production_score_df.round(FLOAT_ROUNDING_PLACES)
local_score_df = local_score_df.round(FLOAT_ROUNDING_PLACES)
local_score_df_columns = sorted(local_score_df.columns.array.tolist())
production_score_df_columns = sorted(
production_score_df.columns.array.tolist()
)
log_info("Comparing columns (production vs local). Differences are: ")
summary += "\n## Columns\n"
summary += "I compared the columns. Here's what I found.\n"
col_diff = difflib.unified_diff(
production_score_df_columns, local_score_df_columns
)
col_diff_res = ""
for d in col_diff:
col_diff_res += str(d) + "\n"
if len(col_diff_res) == 0:
log_info("None. Columns are the same")
summary += "* There are no differences in the column names.\n"
else:
log_info("There are differences. The diff is:")
log_info(col_diff_res)
summary += f"* There are differences in the column names. Here's a diff:\n{col_diff_res}\n"
log_info("Comparing dataframe contents (production vs local)")
summary += "\n## Scores\n"
summary += "I compared the scores, too. Here's what I found.\n"
production_row_count = len(production_score_df.index)
local_row_count = len(local_score_df.index)
summary += f"* The production score has {production_row_count:,} census tracts, and the freshly calculated score has {local_row_count:,}."
summary += (
" They match!\n"
if production_row_count == local_row_count
else " They don't match.\n"
)
try:
comparison_results_df = production_score_df.compare(
local_score_df, align_axis=1, keep_shape=False, keep_equal=False
).rename({"self": "Production", "other": "Local"}, axis=1, level=1)
summary += f"* I compared all of the census tracts. There are {len(comparison_results_df.index):,} tracts with at least one score difference."
summary += " Please examine the logs or run the score comparison locally to view them all.\n"
log_info(
f"There are {len(comparison_results_df.index)} rows with differences"
)
log_info("Those differences are:")
log_info("\n" + str(comparison_results_df))
comparison_path = WORKING_PATH / "deltas.csv"
comparison_results_df.to_csv(path_or_buf=comparison_path)
log_info(f"Wrote comparison results to {comparison_path}")
except ValueError as e:
summary += "* I could not run a full comparison. This is likely because there are column or index (census tract) differences."
summary += " Please examine the logs or run the score comparison locally to find out more.\n"
log_info(
f"Encountered an exception while performing the comparison: {repr(e)}"
)
summary_path = WORKING_PATH / "comparison-summary.md"
with open(summary_path, "w", encoding="utf-8") as f:
f.write(summary)
log_info(f"Wrote comparison summary to {summary_path}")
log_goodbye()
sys.exit()
if __name__ == "__main__":
cli()

View file

@ -225,8 +225,8 @@ class ExtractTransformLoad:
# TODO: remove this once all ETL classes are converted to using the new
# base class parameters and patterns.
if self.GEO_LEVEL is None:
logger.info(
"Skipping validation step for this class because it does not "
logger.warning(
f"Skipping validation step for {self.__class__.__name__} because it does not "
"seem to be converted to new ETL class patterns."
)
return
@ -331,7 +331,7 @@ class ExtractTransformLoad:
Uses the directory and the file name from `self._get_output_file_path`.
"""
logger.info(f"Saving `{self.NAME}` CSV")
logger.debug(f"Saving `{self.NAME}` CSV")
# Create directory if necessary.
output_file_path = self._get_output_file_path()
@ -342,7 +342,7 @@ class ExtractTransformLoad:
output_file_path, index=False, float_format=float_format
)
logger.info(f"File written to `{output_file_path}`.")
logger.debug(f"File written to `{output_file_path}`.")
# This is a classmethod so it can be used without needing to create an instance of
# the class. This is a use case in `etl_score`.
@ -362,7 +362,7 @@ class ExtractTransformLoad:
f"No file found at `{output_file_path}`."
)
logger.info(
logger.debug(
f"Reading in CSV `{output_file_path}` for ETL of class `{cls}`."
)
output_df = pd.read_csv(

View file

@ -42,6 +42,9 @@ def _get_datasets_to_run(dataset_to_run: str) -> typing.List[dict]:
def _run_one_dataset(dataset: dict) -> None:
"""Runs one etl process."""
logger.info(f"Running ETL for {dataset['name']}")
etl_module = importlib.import_module(
f"data_pipeline.etl.sources.{dataset['module_dir']}.etl"
)
@ -49,21 +52,26 @@ def _run_one_dataset(dataset: dict) -> None:
etl_instance = etl_class()
# run extract
logger.debug(f"Extracting {dataset['name']}")
etl_instance.extract()
# run transform
logger.debug(f"Transforming {dataset['name']}")
etl_instance.transform()
# run load
logger.debug(f"Loading {dataset['name']}")
etl_instance.load()
# run validate
logger.debug(f"Validating {dataset['name']}")
etl_instance.validate()
# cleanup
logger.debug(f"Cleaning up {dataset['name']}")
etl_instance.cleanup()
logger.info(f"Finished `etl-run` for dataset `{dataset['name']}`.")
logger.info(f"Finished ETL for dataset {dataset['name']}")
def etl_runner(dataset_to_run: str = None) -> None:
@ -94,7 +102,7 @@ def etl_runner(dataset_to_run: str = None) -> None:
]
if concurrent_datasets:
logger.info("Running concurrent jobs")
logger.info("Running concurrent ETL jobs")
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {
executor.submit(_run_one_dataset, dataset=dataset)
@ -106,10 +114,10 @@ def etl_runner(dataset_to_run: str = None) -> None:
# Otherwise, the exceptions are silently ignored.
fut.result()
# Note: these high-memory datasets also usually require the Census geojson to be
# generated, and one of them requires the Tribal geojson to be generated.
# Note: these high-memory datasets also usually require the Census GeoJSON to be
# generated, and one of them requires the Tribal GeoJSON to be generated.
if high_memory_datasets:
logger.info("Running high-memory jobs")
logger.info("Running high-memory ETL jobs")
for dataset in high_memory_datasets:
_run_one_dataset(dataset=dataset)

View file

@ -56,8 +56,6 @@ class ScoreETL(ExtractTransformLoad):
self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS: List[str] = []
def extract(self) -> None:
logger.info("Loading data sets from disk.")
# EJSCreen csv Load
ejscreen_csv = constants.DATA_PATH / "dataset" / "ejscreen" / "usa.csv"
self.ejscreen_df = pd.read_csv(
@ -200,7 +198,7 @@ class ScoreETL(ExtractTransformLoad):
)
def _join_tract_dfs(self, census_tract_dfs: list) -> pd.DataFrame:
logger.info("Joining Census Tract dataframes")
logger.debug("Joining Census Tract dataframes")
def merge_function(
left: pd.DataFrame, right: pd.DataFrame
@ -317,7 +315,7 @@ class ScoreETL(ExtractTransformLoad):
~df[field_names.GEOID_TRACT_FIELD].isin(drop_tracts),
np.nan,
)
logger.info(
logger.debug(
f"Creating special case column for percentiles from {input_column_name}"
)
df[
@ -335,7 +333,7 @@ class ScoreETL(ExtractTransformLoad):
# TODO Move a lot of this to the ETL part of the pipeline
def _prepare_initial_df(self) -> pd.DataFrame:
logger.info("Preparing initial dataframe")
logger.debug("Preparing initial dataframe")
# Join all the data sources that use census tracts
census_tract_dfs = [
@ -377,7 +375,7 @@ class ScoreETL(ExtractTransformLoad):
assert (
census_tract_df.shape[0] <= pre_join_len
), "Join against national tract list ADDED rows"
logger.info(
logger.debug(
"Dropped %s tracts not in the 2010 tract data",
pre_join_len
- census_tract_df[field_names.GEOID_TRACT_FIELD].nunique(),
@ -560,7 +558,7 @@ class ScoreETL(ExtractTransformLoad):
for col in boolean_columns:
tmp = df_copy[col].copy()
df_copy[col] = np.where(tmp.notna(), tmp.astype(bool), None)
logger.info(f"{col} contains {df_copy[col].isna().sum()} nulls.")
logger.debug(f"{col} contains {df_copy[col].isna().sum()} nulls.")
# Convert all columns to numeric and do math
# Note that we have a few special conditions here and we handle them explicitly.
@ -591,7 +589,7 @@ class ScoreETL(ExtractTransformLoad):
.astype(bool)
.fillna(False)
][field_names.GEOID_TRACT_FIELD].to_list()
logger.info(
logger.debug(
f"Dropping {len(drop_tracts)} tracts from Agricultural Value Loss"
)
elif numeric_column == field_names.LINGUISTIC_ISO_FIELD:
@ -599,7 +597,7 @@ class ScoreETL(ExtractTransformLoad):
# 72 is the FIPS code for Puerto Rico
df_copy[field_names.GEOID_TRACT_FIELD].str.startswith("72")
][field_names.GEOID_TRACT_FIELD].to_list()
logger.info(
logger.debug(
f"Dropping {len(drop_tracts)} tracts from Linguistic Isolation"
)
@ -615,7 +613,7 @@ class ScoreETL(ExtractTransformLoad):
df_copy[field_names.TOTAL_POP_FIELD].fillna(0)
<= low_population
][field_names.GEOID_TRACT_FIELD].to_list()
logger.info(
logger.debug(
f"Dropping {len(drop_tracts)} tracts from DOT traffic burden"
)
@ -666,7 +664,7 @@ class ScoreETL(ExtractTransformLoad):
)
def _backfill_island_demographics(self, df: pd.DataFrame) -> pd.DataFrame:
logger.info("Backfilling island demographic data")
logger.debug("Backfilling island demographic data")
island_index = self._get_island_areas(df)
for backfill_field_name in self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS:
actual_field_name = backfill_field_name.replace(
@ -684,8 +682,6 @@ class ScoreETL(ExtractTransformLoad):
return df
def transform(self) -> None:
logger.info("Transforming Score Data")
# prepare the df with the right CBG/tract IDs, column names/types, and percentiles
self.df = self._prepare_initial_df()
@ -696,9 +692,6 @@ class ScoreETL(ExtractTransformLoad):
self.df = self._backfill_island_demographics(self.df)
def load(self) -> None:
logger.info(
f"Saving Score CSV to {constants.DATA_SCORE_CSV_FULL_FILE_PATH}."
)
constants.DATA_SCORE_CSV_FULL_DIR.mkdir(parents=True, exist_ok=True)
self.df.to_csv(constants.DATA_SCORE_CSV_FULL_FILE_PATH, index=False)

View file

@ -118,7 +118,7 @@ class GeoScoreETL(ExtractTransformLoad):
fields = [self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME]
# TODO update this join
logger.info("Merging and compressing score CSV with USA GeoJSON")
logger.info("Merging and compressing score csv with USA GeoJSON")
self.geojson_score_usa_high = self.score_usa_df.set_index(
self.GEOID_FIELD_NAME
).merge(
@ -143,7 +143,7 @@ class GeoScoreETL(ExtractTransformLoad):
columns={self.TARGET_SCORE_SHORT_FIELD: self.TARGET_SCORE_RENAME_TO}
)
logger.info("Converting geojson into geodf with tracts")
logger.info("Converting GeoJSON into GeoDataFrame with tracts")
usa_tracts = gpd.GeoDataFrame(
usa_tracts,
columns=[
@ -154,15 +154,15 @@ class GeoScoreETL(ExtractTransformLoad):
crs="EPSG:4326",
)
logger.info("Creating buckets from tracts")
logger.debug("Creating buckets from tracts")
usa_bucketed, keep_high_zoom_df = self._create_buckets_from_tracts(
usa_tracts, self.NUMBER_OF_BUCKETS
)
logger.info("Aggregating buckets")
logger.debug("Aggregating buckets")
usa_aggregated = self._aggregate_buckets(usa_bucketed, agg_func="mean")
logger.info("Breaking up polygons")
logger.debug("Breaking up polygons")
compressed = self._breakup_multipolygons(
usa_aggregated, self.NUMBER_OF_BUCKETS
)
@ -220,7 +220,7 @@ class GeoScoreETL(ExtractTransformLoad):
len(state_tracts.index) / self.NUMBER_OF_BUCKETS
)
logger.info(
logger.debug(
f"The number of buckets has increased to {self.NUMBER_OF_BUCKETS}"
)
for i in range(len(state_tracts.index)):

View file

@ -62,7 +62,7 @@ class PostScoreETL(ExtractTransformLoad):
# End YAML definition constants
def _extract_counties(self, county_path: Path) -> pd.DataFrame:
logger.info("Reading Counties CSV")
logger.debug("Reading Counties CSV")
return pd.read_csv(
county_path,
sep="\t",
@ -75,7 +75,7 @@ class PostScoreETL(ExtractTransformLoad):
)
def _extract_states(self, state_path: Path) -> pd.DataFrame:
logger.info("Reading States CSV")
logger.debug("Reading States CSV")
return pd.read_csv(
state_path,
dtype={"fips": "string", "state_abbreviation": "string"},
@ -83,7 +83,7 @@ class PostScoreETL(ExtractTransformLoad):
)
def _extract_score(self, score_path: Path) -> pd.DataFrame:
logger.info("Reading Score CSV")
logger.debug("Reading Score CSV")
df = pd.read_csv(
score_path,
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
@ -98,8 +98,6 @@ class PostScoreETL(ExtractTransformLoad):
return df
def extract(self) -> None:
logger.info("Starting Extraction")
# check census data
check_census_data_source(
census_data_path=self.DATA_PATH / "census",
@ -170,7 +168,7 @@ class PostScoreETL(ExtractTransformLoad):
score_df: pd.DataFrame,
) -> pd.DataFrame:
logger.info("Merging county info with score info")
logger.debug("Merging county info with score info")
score_county_merged = score_df.merge(
# We drop state abbreviation so we don't get it twice
counties_df[["GEOID", "County Name"]],
@ -178,7 +176,7 @@ class PostScoreETL(ExtractTransformLoad):
how="left",
)
logger.info("Merging state info with county-score info")
logger.debug("Merging state info with county-score info")
# Here, we need to join on a separate key, since there's no
# entry for the island areas in the counties df (there are no
# counties!) Thus, unless we join state separately from county,
@ -207,7 +205,7 @@ class PostScoreETL(ExtractTransformLoad):
score_county_state_merged_df: pd.DataFrame,
) -> pd.DataFrame:
logger.info("Rounding Decimals")
logger.debug("Rounding Decimals")
# grab all the keys from tiles score columns
tiles_score_column_titles = list(constants.TILES_SCORE_COLUMNS.keys())
@ -218,7 +216,7 @@ class PostScoreETL(ExtractTransformLoad):
# We may not want some states/territories on the map, so this will drop all
# rows with those FIPS codes (first two digits of the census tract)
logger.info(
logger.debug(
f"Dropping specified FIPS codes from tile data: {constants.DROP_FIPS_CODES}"
)
tracts_to_drop = []
@ -241,7 +239,7 @@ class PostScoreETL(ExtractTransformLoad):
score_tiles[float_cols] * scale_factor
).apply(np.floor) / scale_factor
logger.info("Adding fields for island areas and Puerto Rico")
logger.debug("Adding fields for island areas and Puerto Rico")
# The below operation constructs variables for the front end.
# Since the Island Areas, Puerto Rico, and the nation all have a different
# set of available data, each has its own user experience.
@ -381,8 +379,6 @@ class PostScoreETL(ExtractTransformLoad):
return final_df
def transform(self) -> None:
logger.info("Transforming data sources for Score + County CSVs")
transformed_counties = self._transform_counties(self.input_counties_df)
transformed_states = self._transform_states(self.input_states_df)
transformed_score = self._transform_score(self.input_score_df)
@ -403,7 +399,7 @@ class PostScoreETL(ExtractTransformLoad):
def _load_score_csv_full(
self, score_county_state_merged: pd.DataFrame, score_csv_path: Path
) -> None:
logger.info("Saving Full Score CSV with County Information")
logger.debug("Saving Full Score CSV with County Information")
score_csv_path.parent.mkdir(parents=True, exist_ok=True)
score_county_state_merged.to_csv(
score_csv_path,
@ -476,7 +472,7 @@ class PostScoreETL(ExtractTransformLoad):
def _load_tile_csv(
self, score_tiles_df: pd.DataFrame, tile_score_path: Path
) -> None:
logger.info("Saving Tile Score CSV")
logger.debug("Saving Tile Score CSV")
tile_score_path.parent.mkdir(parents=True, exist_ok=True)
score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")
@ -498,13 +494,13 @@ class PostScoreETL(ExtractTransformLoad):
constants.SCORE_VERSIONING_DATA_DOCUMENTATION_ZIP_FILE_PATH
)
logger.info("Writing downloadable excel")
logger.debug("Writing downloadable excel")
excel_config = self._load_excel_from_df(
excel_df=self.output_score_county_state_merged_df,
excel_path=excel_path,
)
logger.info("Writing downloadable csv")
logger.debug("Writing downloadable csv")
# open yaml config
downloadable_csv_config = load_yaml_dict_from_file(
self.CONTENT_CONFIG / "csv.yml", CSVConfig
@ -516,7 +512,7 @@ class PostScoreETL(ExtractTransformLoad):
)
downloadable_df.to_csv(csv_path, index=False)
logger.info("Creating codebook for download zip")
logger.debug("Creating codebook for download zip")
# consolidate all excel fields from the config yml. The codebook
# code takes in a list of fields, but the excel config file
@ -562,17 +558,17 @@ class PostScoreETL(ExtractTransformLoad):
codebook_df.to_csv(codebook_path, index=False)
# zip assets
logger.info("Compressing csv files")
logger.debug("Compressing csv files")
files_to_compress = [csv_path, codebook_path, readme_path]
zip_files(csv_zip_path, files_to_compress)
logger.info("Compressing xls files")
logger.debug("Compressing xls files")
files_to_compress = [excel_path, codebook_path, readme_path]
zip_files(xls_zip_path, files_to_compress)
# Per #1557
# zip file that contains the .xls, .csv, .pdf, tech support document, checksum file
logger.info("Compressing data and documentation files")
logger.debug("Compressing data and documentation files")
files_to_compress = [
excel_path,
csv_path,

View file

@ -47,14 +47,14 @@ def check_score_data_source(
# download from s3 if census_data_source is aws
if score_data_source == "aws":
logger.info("Fetching Score Tile data from AWS S3")
logger.debug("Fetching Score Tile data from AWS S3")
download_file_from_url(
file_url=TILE_SCORE_CSV_S3_URL, download_file_name=TILE_SCORE_CSV
)
else:
# check if score data is found locally
if not os.path.isfile(TILE_SCORE_CSV):
logger.info(
logger.warning(
"No local score tiles data found. Please use '-s aws` to fetch from AWS"
)
sys.exit()
@ -409,7 +409,7 @@ def compare_to_list_of_expected_state_fips_codes(
f"{sorted(list(actual_state_fips_codes_set - expected_states_set))}\n"
)
else:
logger.info(
logger.debug(
"Data matches expected state and territory representation"
f"{dataset_name_phrase}."
)

View file

@ -33,15 +33,12 @@ class CalEnviroScreenETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Downloading CalEnviroScreen Data")
super().extract(
self.CALENVIROSCREEN_FTP_URL,
self.get_tmp_path(),
)
def transform(self) -> None:
logger.info("Transforming CalEnviroScreen Data")
# Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:
# https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip
# Load comparison index (CalEnviroScreen 4)
@ -70,7 +67,6 @@ class CalEnviroScreenETL(ExtractTransformLoad):
)
def load(self) -> None:
logger.info("Saving CalEnviroScreen CSV")
# write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(self.CSV_PATH / "data06.csv", index=False)

View file

@ -81,7 +81,6 @@ class CDCLifeExpectancy(ExtractTransformLoad):
return df
def extract(self) -> None:
logger.info("Starting data download.")
all_usa_raw_df = self._download_and_prep_data(
file_url=self.USA_FILE_URL,
@ -102,13 +101,13 @@ class CDCLifeExpectancy(ExtractTransformLoad):
additional_fips_codes_not_expected=self.STATES_MISSING_FROM_USA_FILE,
)
logger.info("Downloading data for Maine")
logger.debug("Downloading data for Maine")
maine_raw_df = self._download_and_prep_data(
file_url=self.MAINE_FILE_URL,
download_file_name=self.get_tmp_path() / "maine.csv",
)
logger.info("Downloading data for Wisconsin")
logger.debug("Downloading data for Wisconsin")
wisconsin_raw_df = self._download_and_prep_data(
file_url=self.WISCONSIN_FILE_URL,
download_file_name=self.get_tmp_path() / "wisconsin.csv",
@ -138,7 +137,6 @@ class CDCLifeExpectancy(ExtractTransformLoad):
self.raw_df = combined_df
def transform(self) -> None:
logger.info("Starting CDC life expectancy transform.")
self.output_df = self.raw_df.rename(
columns={
@ -148,7 +146,6 @@ class CDCLifeExpectancy(ExtractTransformLoad):
)
def load(self) -> None:
logger.info("Saving CDC Life Expectancy CSV")
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.output_df[self.COLUMNS_TO_KEEP].to_csv(

View file

@ -44,7 +44,6 @@ class CDCPlacesETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Starting to download 520MB CDC Places file.")
file_path = download_file_from_url(
file_url=self.CDC_PLACES_URL,
download_file_name=self.get_tmp_path() / "census_tract.csv",
@ -57,8 +56,6 @@ class CDCPlacesETL(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Starting CDC Places transform")
# Rename GEOID field
self.df.rename(
columns={self.CDC_GEOID_FIELD_NAME: self.GEOID_TRACT_FIELD_NAME},

View file

@ -48,7 +48,6 @@ class CDCSVIIndex(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Downloading 43 MB CDC SVI INDEX")
self.df = pd.read_csv(
filepath_or_buffer=self.CDC_SVI_INDEX_URL,
dtype={self.CDC_SVI_INDEX_TRACTS_FIPS_CODE: "string"},
@ -56,7 +55,6 @@ class CDCSVIIndex(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Starting CDC SVI INDEX transform")
# Note: In this dataset all US census tracts are ranked against one another.
# Puerto Rico is not included in this dataset
self.df.rename(
@ -109,8 +107,6 @@ class CDCSVIIndex(ExtractTransformLoad):
)
def load(self) -> None:
logger.info("Saving CDC SVI Index Data")
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df[self.COLUMNS_TO_KEEP].to_csv(

View file

@ -70,14 +70,9 @@ class CensusETL(ExtractTransformLoad):
None
"""
shp_file_path = self._path_for_fips_file(fips_code, GeoFileType.SHP)
logger.info(f"Checking if {fips_code} shp file exists")
# check if file exists
if not shp_file_path.is_file():
logger.info(
f"{fips_code} shp file does not exist. Downloading and extracting shape file"
)
tract_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/TRACT/2010/tl_2010_{fips_code}_tract10.zip"
unzip_file_from_url(
tract_state_url,
@ -86,8 +81,11 @@ class CensusETL(ExtractTransformLoad):
)
def extract(self) -> None:
logger.info("Downloading Census Data")
for fips_code in self.STATE_FIPS_CODES:
logger.debug("Extracting census data")
for index, fips_code in enumerate(self.STATE_FIPS_CODES):
logger.debug(
f"Extracting shape for FIPS {fips_code} {index+1} of {len(self.STATE_FIPS_CODES)}"
)
self._extract_shp(fips_code)
def _transform_to_geojson(self, fips_code: str) -> None:
@ -100,11 +98,8 @@ class CensusETL(ExtractTransformLoad):
geojson_file_path = self._path_for_fips_file(
fips_code, GeoFileType.GEOJSON
)
logger.info(f"Checking if {fips_code} geoJSON file exists ")
if not geojson_file_path.is_file():
logger.info(
f"GeoJSON file {fips_code} does not exist. Converting shp to geoJSON"
)
cmd = [
"ogr2ogr",
"-f",
@ -120,9 +115,11 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.debug("Transforming tracts")
for file in self.GEOJSON_BASE_PATH.iterdir():
if file.suffix == ".json":
logger.info(f"Ingesting geoid10 for file {file}")
logger.debug(f"Adding GEOID10 for file {file.name}")
with open(self.GEOJSON_BASE_PATH / file, encoding="utf-8") as f:
geojson = json.load(f)
for feature in geojson["features"]:
@ -142,13 +139,19 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Transforming Census Data")
for fips_code in self.STATE_FIPS_CODES:
logger.debug("Transforming census data")
logger.debug("Transforming SHP files to GeoJSON")
for index, fips_code in enumerate(self.STATE_FIPS_CODES):
logger.debug(
f"Transforming FIPS {fips_code} to GeoJSON {index+1} of {len(self.STATE_FIPS_CODES)}"
)
self._transform_to_geojson(fips_code)
self._generate_tract_table()
def _load_into_state_csvs(self, fips_code: str) -> None:
"""Load state CSVS into individual CSV files
"""Load state CSVs into individual CSV files
Args:
fips_code (str): the FIPS code for the region of interest
@ -182,10 +185,9 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Writing national us.csv file")
logger.debug("Loading national US.csv")
if not self.NATIONAL_TRACT_CSV_PATH.is_file():
logger.info(f"Creating {self.NATIONAL_TRACT_CSV_PATH}")
with open(
self.NATIONAL_TRACT_CSV_PATH,
mode="w",
@ -211,22 +213,21 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Generating national geojson file")
logger.debug("Loading National GeoJson")
usa_df = gpd.GeoDataFrame()
for file_name in self.GEOJSON_BASE_PATH.rglob("*.json"):
logger.info(f"Ingesting {file_name}")
logger.debug(f"Adding national GeoJSON file {file_name.name}")
state_gdf = gpd.read_file(file_name)
usa_df = usa_df.append(state_gdf)
usa_df = usa_df.to_crs(
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
)
logger.info("Writing national geojson file")
usa_df.to_file(self.NATIONAL_TRACT_JSON_PATH, driver="GeoJSON")
logger.info("Census tract downloading complete")
logger.debug("Saving national GeoJSON file")
usa_df.to_file(self.NATIONAL_TRACT_JSON_PATH, driver="GeoJSON")
def load(self) -> None:
"""Create state CSVs, National CSV, and National GeoJSON
@ -234,8 +235,13 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Saving Census CSV")
logger.debug("Loading census data")
logger.debug("Loading individual state csv files")
for fips_code in self.TRACT_PER_STATE:
self._load_into_state_csvs(fips_code)
self._load_national_csv()
self._load_national_geojson()
logger.debug("Census data complete")

View file

@ -39,7 +39,6 @@ def get_state_fips_codes(data_path: Path) -> list:
"""Returns a list with state data"""
fips_csv_path = data_path / "census" / "csv" / "fips_states_2010.csv"
logger.info("Downloading fips from S3 repository")
unzip_file_from_url(
settings.AWS_JUSTICE40_DATASOURCES_URL + "/fips_states_2010.zip",
data_path / "tmp",
@ -97,7 +96,6 @@ def check_census_data_source(
# download from s3 if census_data_source is aws
if census_data_source == "aws":
logger.info("Fetching Census data from AWS S3")
unzip_file_from_url(
CENSUS_DATA_S3_URL,
DATA_PATH / "tmp",
@ -106,14 +104,13 @@ def check_census_data_source(
else:
# check if census data is found locally
if not os.path.isfile(census_data_path / "geojson" / "us.json"):
logger.info(
logger.error(
"No local census data found. Please use '-s aws` to fetch from AWS"
)
sys.exit()
def zip_census_data():
logger.info("Compressing census files to data/tmp folder")
CENSUS_DATA_PATH = settings.APP_ROOT / "data" / "census"
TMP_PATH = settings.APP_ROOT / "data" / "tmp"

View file

@ -363,18 +363,16 @@ class CensusACSETL(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Starting Census ACS Transform")
df = self.df
# Here we join the geometry of the US to the dataframe so that we can impute
# The income of neighbors. first this looks locally; if there's no local
# geojson file for all of the US, this will read it off of S3
logger.info("Reading in geojson for the country")
logger.debug("Reading in geojson for the country")
if not os.path.exists(
self.DATA_PATH / "census" / "geojson" / "us.json"
):
logger.info("Fetching Census data from AWS S3")
logger.debug("Fetching Census data from AWS S3")
unzip_file_from_url(
CENSUS_DATA_S3_URL,
self.DATA_PATH / "tmp",
@ -406,7 +404,7 @@ class CensusACSETL(ExtractTransformLoad):
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
]:
missing_value_count = sum(df[field] == -666666666)
logger.info(
logger.debug(
f"There are {missing_value_count} ({int(100*missing_value_count/df[field].count())}%) values of "
+ f"`{field}` being marked as null values."
)
@ -591,7 +589,7 @@ class CensusACSETL(ExtractTransformLoad):
# we impute income for both income measures
## TODO: Convert to pydantic for clarity
logger.info("Imputing income information")
logger.debug("Imputing income information")
ImputeVariables = namedtuple(
"ImputeVariables", ["raw_field_name", "imputed_field_name"]
)
@ -612,7 +610,7 @@ class CensusACSETL(ExtractTransformLoad):
minimum_population_required_for_imputation=self.MINIMUM_POPULATION_REQUIRED_FOR_IMPUTATION,
)
logger.info("Calculating with imputed values")
logger.debug("Calculating with imputed values")
df[
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
@ -644,7 +642,7 @@ class CensusACSETL(ExtractTransformLoad):
== 0
), "Error: not all values were filled..."
logger.info("Renaming columns...")
logger.debug("Renaming columns...")
df = df.rename(
columns={
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME: field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,

View file

@ -88,7 +88,7 @@ def _prepare_dataframe_for_imputation(
][geoid_field].unique()
# Check that imputation is a valid choice for this set of fields
logger.info(f"Imputing values for {len(tract_list)} unique tracts.")
logger.debug(f"Imputing values for {len(tract_list)} unique tracts.")
assert len(tract_list) > 0, "Error: No missing values to impute"
return tract_list, geo_df
@ -156,7 +156,7 @@ def calculate_income_measures(
mask_to_use
][impute_var_pair.raw_field_name].mean()
logger.info("Casting geodataframe as a typical dataframe")
logger.debug("Casting geodataframe as a typical dataframe")
# get rid of the geometry column and cast as a typical df
df = pd.DataFrame(
geo_df[[col for col in geo_df.columns if col != "geometry"]]

View file

@ -30,14 +30,14 @@ def retrieve_census_acs_data(
dfs = []
for fips in get_state_fips_codes(data_path_for_fips_codes):
if fips in CENSUS_ACS_FIPS_CODES_TO_SKIP:
logger.info(
logger.debug(
f"Skipping download for state/territory with FIPS code {fips}"
)
else:
census_api_key = ""
if os.environ.get("CENSUS_API_KEY"):
census_api_key = "with API key"
logger.info(
logger.debug(
f"Downloading data for state/territory with FIPS code {fips} {census_api_key}"
)
@ -55,7 +55,7 @@ def retrieve_census_acs_data(
except ValueError as e:
logger.error(
f"Could not download data for state/territory with FIPS code {fips}"
f"Could not download data for state/territory with FIPS code {fips} because {e}"
)
raise e

View file

@ -100,7 +100,6 @@ class CensusACS2010ETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Starting Census 2010 ACS Transform")
# Define the variables to retrieve
variables = (
self.UNEMPLOYED_FIELDS
@ -118,8 +117,6 @@ class CensusACS2010ETL(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Starting Census 2010 ACS Transform")
df = self.df
# Calculate percent unemployment.
@ -184,8 +181,6 @@ class CensusACS2010ETL(ExtractTransformLoad):
self.df = output_df
def load(self) -> None:
logger.info("Saving Census 2010 ACS Data")
# mkdir census
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

View file

@ -224,7 +224,6 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
return state_median_incomes_df
def extract(self) -> None:
logger.info("Starting four separate downloads.")
# Load and clean GEOCORR data
# Note: this data is generated by https://mcdc.missouri.edu/applications/geocorr2014.html, at the advice of the Census.
# The specific query used is the following, which takes a couple of minutes to run:
@ -239,7 +238,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
# and with the "target geographies" selected being:
# - Core based statistical area (CBSA)
# - CBSA Type (Metro or Micro)
logger.info("Starting download of 1.5MB Geocorr information.")
logger.debug("Starting download of 1.5MB Geocorr information.")
unzip_file_from_url(
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
@ -265,7 +264,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
low_memory=False,
)
logger.info("Pulling PR tract list down.")
logger.debug("Pulling PR tract list down.")
# This step is necessary because PR is not in geocorr at the level that gets joined
pr_file = self.get_tmp_path() / "pr_tracts" / "pr_tracts.csv"
download_file_from_url(
@ -282,7 +281,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
self.pr_tracts["State Abbreviation"] = "PR"
# Download MSA median incomes
logger.info("Starting download of MSA median incomes.")
logger.debug("Starting download of MSA median incomes.")
download = requests.get(
self.MSA_MEDIAN_INCOME_URL,
verify=None,
@ -291,7 +290,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
self.msa_median_incomes = json.loads(download.content)
# Download state median incomes
logger.info("Starting download of state median incomes.")
logger.debug("Starting download of state median incomes.")
download_state = requests.get(
self.STATE_MEDIAN_INCOME_URL,
verify=None,
@ -301,8 +300,6 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
## NOTE we already have PR's MI here
def transform(self) -> None:
logger.info("Starting transforms.")
# Run transforms:
geocorr_df = self._transform_geocorr()
msa_median_incomes_df = self._transform_msa_median_incomes()
@ -352,8 +349,6 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
self.output_df = merged_with_state_income_df
def load(self) -> None:
logger.info("Saving Census ACS Median Income CSV")
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.output_df[self.COLUMNS_TO_KEEP].to_csv(
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False

View file

@ -352,7 +352,7 @@ class CensusDecennialETL(ExtractTransformLoad):
dfs = []
dfs_vi = []
for island in self.ISLAND_TERRITORIES:
logger.info(
logger.debug(
f"Downloading data for state/territory {island['state_abbreviation']}"
)
for county in island["county_fips"]:
@ -369,7 +369,13 @@ class CensusDecennialETL(ExtractTransformLoad):
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
)
try:
df = json.loads(download.content)
except ValueError as e:
logger.error(
f"Could not load content in census decennial ETL because {e}. Content is {download.content}."
)
# First row is the header
df = pd.DataFrame(df[1:], columns=df[0])
@ -393,8 +399,6 @@ class CensusDecennialETL(ExtractTransformLoad):
self.df_vi = pd.concat(dfs_vi)
def transform(self) -> None:
logger.info("Starting Census Decennial Transform")
# Rename All Fields
self.df.rename(columns=self.FIELD_NAME_XWALK, inplace=True)
self.df_vi.rename(columns=self.FIELD_NAME_XWALK, inplace=True)
@ -489,13 +493,11 @@ class CensusDecennialETL(ExtractTransformLoad):
# Reporting Missing Values
for col in self.df_all.columns:
missing_value_count = self.df_all[col].isnull().sum()
logger.info(
logger.debug(
f"There are {missing_value_count} missing values in the field {col} out of a total of {self.df_all.shape[0]} rows"
)
def load(self) -> None:
logger.info("Saving Census Decennial Data")
# mkdir census
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

View file

@ -65,14 +65,12 @@ class ChildOpportunityIndex(ExtractTransformLoad):
self.output_df: pd.DataFrame
def extract(self) -> None:
logger.info("Starting 51MB data download.")
super().extract(
source_url=self.SOURCE_URL,
extract_path=self.get_tmp_path(),
)
def transform(self) -> None:
logger.info("Starting transforms.")
raw_df = pd.read_csv(
filepath_or_buffer=self.get_tmp_path() / "raw.csv",
# The following need to remain as strings for all of their digits, not get

View file

@ -30,7 +30,6 @@ class DOEEnergyBurden(ExtractTransformLoad):
self.output_df: pd.DataFrame
def transform(self) -> None:
logger.info("Starting DOE Energy Burden transforms.")
raw_df: pd.DataFrame = pd.read_csv(
filepath_or_buffer=self.get_tmp_path()
/ "DOE_LEAD_AMI_TRACT_2018_ALL.csv",
@ -41,7 +40,7 @@ class DOEEnergyBurden(ExtractTransformLoad):
low_memory=False,
)
logger.info("Renaming columns and ensuring output format is correct")
logger.debug("Renaming columns and ensuring output format is correct")
output_df = raw_df.rename(
columns={
self.INPUT_ENERGY_BURDEN_FIELD_NAME: self.REVISED_ENERGY_BURDEN_FIELD_NAME,

View file

@ -53,7 +53,6 @@ class TravelCompositeETL(ExtractTransformLoad):
- Renames the Census Tract column to match the other datasets
- Converts to CSV
"""
logger.info("Transforming DOT Travel Disadvantage Data")
# read in the unzipped shapefile from data source
# reformat it to be standard df, remove unassigned rows, and

View file

@ -60,7 +60,6 @@ class AbandonedMineETL(ExtractTransformLoad):
self.output_df: pd.DataFrame
def transform(self) -> None:
logger.info("Starting eAMLIS transforms.")
df = pd.read_csv(
self.get_tmp_path() / "eAMLIS export of all data.tsv",
sep="\t",

View file

@ -44,7 +44,6 @@ class EJSCREENETL(ExtractTransformLoad):
]
def extract(self) -> None:
logger.info("Downloading EJScreen Data")
super().extract(
self.EJSCREEN_FTP_URL,
self.get_tmp_path(),
@ -52,7 +51,6 @@ class EJSCREENETL(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Transforming EJScreen Data")
self.df = pd.read_csv(
self.EJSCREEN_CSV,
dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str},

View file

@ -39,7 +39,7 @@ class EJSCREENAreasOfConcernETL(ExtractTransformLoad):
def extract(self) -> None:
if self.ejscreen_areas_of_concern_data_exists():
logger.info("Loading EJSCREEN Areas of Concern Data Locally")
logger.debug("Loading EJSCREEN Areas of Concern Data Locally")
self.df = pd.read_csv(
filepath_or_buffer=self.EJSCREEN_AREAS_OF_CONCERN_SOURCE_DATA,
dtype={
@ -48,24 +48,24 @@ class EJSCREENAreasOfConcernETL(ExtractTransformLoad):
low_memory=False,
)
else:
logger.info(
logger.warning(
"EJSCREEN areas of concern data does not exist locally. Not loading the data."
)
def transform(self) -> None:
logger.info("Transforming EJSCREEN Areas of Concern Data")
logger.debug("Transforming EJSCREEN Areas of Concern Data")
# TO DO: As a one off we did all the processing in a separate Notebook
# Can add here later for a future PR
def load(self) -> None:
if self.ejscreen_areas_of_concern_data_exists():
logger.info("Saving EJSCREEN Areas of Concern Data")
logger.debug("Saving EJSCREEN Areas of Concern Data")
# write nationwide csv
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(self.OUTPUT_PATH / "usa.csv", index=False)
else:
logger.info(
logger.warning(
"EJSCREEN areas of concern data does not exist locally. Not saving the data."
)

View file

@ -49,8 +49,6 @@ class EnergyDefinitionAlternativeDraft(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Starting data download.")
unzip_file_from_url(
file_url=self.DEFINITION_ALTERNATIVE_FILE_URL,
download_path=self.get_tmp_path(),
@ -70,8 +68,6 @@ class EnergyDefinitionAlternativeDraft(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Starting transforms.")
self.df = self.df.rename(
columns={
self.TRACT_INPUT_COLUMN_NAME: self.GEOID_TRACT_FIELD_NAME,
@ -105,8 +101,6 @@ class EnergyDefinitionAlternativeDraft(ExtractTransformLoad):
)
def load(self) -> None:
logger.info("Saving CSV")
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df[self.COLUMNS_TO_KEEP].to_csv(
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False

View file

@ -65,8 +65,6 @@ class EPARiskScreeningEnvironmentalIndicatorsETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Starting 2.5 MB data download.")
# the column headers from the above dataset are actually a census tract's data at this point
# We will use this data structure later to specify the column names
input_columns = [
@ -98,8 +96,6 @@ class EPARiskScreeningEnvironmentalIndicatorsETL(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Starting transforms.")
score_columns = [x for x in self.df.columns if "SCORE" in x]
# coerce dataframe type to perform correct next steps
@ -157,8 +153,6 @@ class EPARiskScreeningEnvironmentalIndicatorsETL(ExtractTransformLoad):
)
def load(self) -> None:
logger.info("Saving CSV")
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df[self.COLUMNS_TO_KEEP].to_csv(
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False

View file

@ -48,7 +48,6 @@ class FloodRiskETL(ExtractTransformLoad):
- Renames the Census Tract column to match the other datasets
- Calculates share of properties at risk, left-clipping number of properties at 250
"""
logger.info("Transforming National Risk Index Data")
# read in the unzipped csv data source then rename the
# Census Tract column for merging

View file

@ -48,7 +48,6 @@ class WildfireRiskETL(ExtractTransformLoad):
- Renames the Census Tract column to match the other datasets
- Calculates share of properties at risk, left-clipping number of properties at 250
"""
logger.info("Transforming National Risk Index Data")
# read in the unzipped csv data source then rename the
# Census Tract column for merging
df_fsf_fire: pd.DataFrame = pd.read_csv(

View file

@ -16,7 +16,7 @@ logger = get_module_logger(__name__)
def get_tract_geojson(
_tract_data_path: Optional[Path] = None,
) -> gpd.GeoDataFrame:
logger.info("Loading tract geometry data from census ETL")
logger.debug("Loading tract geometry data from census ETL")
GEOJSON_PATH = _tract_data_path
if GEOJSON_PATH is None:
GEOJSON_PATH = CensusETL.NATIONAL_TRACT_JSON_PATH
@ -40,7 +40,7 @@ def get_tract_geojson(
def get_tribal_geojson(
_tribal_data_path: Optional[Path] = None,
) -> gpd.GeoDataFrame:
logger.info("Loading Tribal geometry data from Tribal ETL")
logger.debug("Loading Tribal geometry data from Tribal ETL")
GEOJSON_PATH = _tribal_data_path
if GEOJSON_PATH is None:
GEOJSON_PATH = TribalETL().NATIONAL_TRIBAL_GEOJSON_PATH

View file

@ -34,9 +34,6 @@ class GeoCorrETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info(
"Starting to download 2MB GeoCorr Urban Rural Census Tract Map file."
)
unzip_file_from_url(
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
+ "/geocorr_urban_rural.csv.zip",
@ -53,7 +50,6 @@ class GeoCorrETL(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Starting GeoCorr Urban Rural Map transform")
# Put in logic from Jupyter Notebook transform when we switch in the hyperlink to Geocorr
self.output_df = self.df.rename(

View file

@ -43,7 +43,6 @@ class HistoricRedliningETL(ExtractTransformLoad):
self.df: pd.DataFrame
def transform(self) -> None:
logger.info("Transforming Historic Redlining Data")
# this is obviously temporary
historic_redlining_data = pd.read_excel(
self.HISTORIC_REDLINING_FILE_PATH
@ -55,7 +54,7 @@ class HistoricRedliningETL(ExtractTransformLoad):
columns={"HRS2010": self.REDLINING_SCALAR}
)
logger.info(f"{historic_redlining_data.columns}")
logger.debug(f"{historic_redlining_data.columns}")
# Calculate lots of different score thresholds for convenience
for threshold in [3.25, 3.5, 3.75]:

View file

@ -23,7 +23,7 @@ class HousingTransportationETL(ExtractTransformLoad):
dfs = []
zip_file_dir = self.get_tmp_path() / "housing_and_transportation_index"
for fips in get_state_fips_codes(self.DATA_PATH):
logger.info(
logger.debug(
f"Downloading housing data for state/territory with FIPS code {fips}"
)
@ -50,8 +50,6 @@ class HousingTransportationETL(ExtractTransformLoad):
self.df = pd.concat(dfs)
def transform(self) -> None:
logger.info("Transforming Housing and Transportation Data")
# Rename and reformat tract ID
self.df.rename(
columns={"tract": self.GEOID_TRACT_FIELD_NAME}, inplace=True
@ -61,7 +59,5 @@ class HousingTransportationETL(ExtractTransformLoad):
].str.replace('"', "")
def load(self) -> None:
logger.info("Saving Housing and Transportation Data")
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)

View file

@ -56,7 +56,6 @@ class HudHousingETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Extracting 1.09 GB HUD Housing Data")
super().extract(
self.HOUSING_FTP_URL,
self.HOUSING_ZIP_FILE_DIR,
@ -80,8 +79,6 @@ class HudHousingETL(ExtractTransformLoad):
return tmp_df
def transform(self) -> None:
logger.info("Transforming HUD Housing Data")
table_8 = self._read_chas_table("Table8.csv")
table_3 = self._read_chas_table("Table3.csv")

View file

@ -36,7 +36,6 @@ class HudRecapETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Downloading HUD Recap Data")
download = requests.get(
self.HUD_RECAP_CSV_URL,
verify=None,
@ -48,8 +47,6 @@ class HudRecapETL(ExtractTransformLoad):
csv_file.close()
def transform(self) -> None:
logger.info("Transforming HUD Recap Data")
# Load comparison index (CalEnviroScreen 4)
self.df = pd.read_csv(self.HUD_RECAP_CSV, dtype={"GEOID": "string"})
@ -75,7 +72,6 @@ class HudRecapETL(ExtractTransformLoad):
self.df.sort_values(by=self.GEOID_TRACT_FIELD_NAME, inplace=True)
def load(self) -> None:
logger.info("Saving HUD Recap CSV")
# write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(self.CSV_PATH / "usa.csv", index=False)

View file

@ -39,7 +39,6 @@ class MappingForEJETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Downloading Mapping for EJ Data")
super().extract(
self.MAPPING_FOR_EJ_VA_URL,
self.get_tmp_path(),
@ -50,8 +49,6 @@ class MappingForEJETL(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Transforming Mapping for EJ Data")
# Join (here, it's just concatenating) the two dataframes from
# CO and VA
self.df = pd.concat(
@ -86,7 +83,6 @@ class MappingForEJETL(ExtractTransformLoad):
)
def load(self) -> None:
logger.info("Saving Mapping for EJ CSV")
# write selected states csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df[self.COLUMNS_TO_KEEP].to_csv(
@ -94,4 +90,4 @@ class MappingForEJETL(ExtractTransformLoad):
)
def validate(self) -> None:
logger.info("Validating Mapping For EJ Data")
logger.debug("Skipping validation for MappingForEJETL")

View file

@ -75,14 +75,12 @@ class MappingInequalityETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Downloading Mapping Inequality Data")
download_file_from_url(
file_url=self.MAPPING_INEQUALITY_CSV_URL,
download_file_name=self.MAPPING_INEQUALITY_CSV,
)
def transform(self) -> None:
logger.info("Transforming Mapping Inequality Data")
df: pd.DataFrame = pd.read_csv(
self.MAPPING_INEQUALITY_CSV,
dtype={self.TRACT_INPUT_FIELD: "string"},
@ -207,7 +205,6 @@ class MappingInequalityETL(ExtractTransformLoad):
self.df = grouped_df
def load(self) -> None:
logger.info("Saving Mapping Inequality CSV")
# write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df[self.COLUMNS_TO_KEEP].to_csv(

View file

@ -33,15 +33,13 @@ class MarylandEJScreenETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Downloading 207MB Maryland EJSCREEN Data")
logger.debug("Downloading 207MB Maryland EJSCREEN Data")
super().extract(
self.MARYLAND_EJSCREEN_URL,
self.get_tmp_path(),
)
def transform(self) -> None:
logger.info("Transforming Maryland EJSCREEN Data")
list_of_files = list(glob(str(self.SHAPE_FILES_PATH) + "/*.shp"))
# Ignore counties becauses this is not the level of measurement
@ -105,7 +103,6 @@ class MarylandEJScreenETL(ExtractTransformLoad):
)
def load(self) -> None:
logger.info("Saving Maryland EJSCREEN CSV")
# write maryland tracts to csv
self.OUTPUT_CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df[self.COLUMNS_TO_KEEP].to_csv(

View file

@ -33,7 +33,6 @@ class MichiganEnviroScreenETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Downloading Michigan EJSCREEN Data")
self.df = pd.read_csv(
filepath_or_buffer=self.MICHIGAN_EJSCREEN_S3_URL,
dtype={"GEO_ID": "string"},
@ -41,8 +40,6 @@ class MichiganEnviroScreenETL(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Transforming Michigan EJSCREEN Data")
self.df.rename(
columns={
"GEO_ID": self.GEOID_TRACT_FIELD_NAME,
@ -60,7 +57,6 @@ class MichiganEnviroScreenETL(ExtractTransformLoad):
)
def load(self) -> None:
logger.info("Saving Michigan Environmental Screening Tool to CSV")
# write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df[self.COLUMNS_TO_KEEP].to_csv(

View file

@ -69,7 +69,6 @@ class NationalRiskIndexETL(ExtractTransformLoad):
"""Unzips NRI dataset from the FEMA data source and writes the files
to the temporary data folder for use in the transform() method
"""
logger.info("Downloading 405MB National Risk Index Data")
super().extract(
source_url=self.SOURCE_URL,
@ -84,7 +83,6 @@ class NationalRiskIndexETL(ExtractTransformLoad):
- Applies the NRI score for each Census Tract to the Census Block
Groups inside of that Tract
"""
logger.info("Transforming National Risk Index Data")
# read in the unzipped csv from NRI data source then rename the
# Census Tract column for merging

View file

@ -53,7 +53,6 @@ class NatureDeprivedETL(ExtractTransformLoad):
- Renames columns as needed
"""
logger.info("Transforming NLCD Data")
df_ncld: pd.DataFrame = pd.read_csv(
self.INPUT_CSV,

View file

@ -76,8 +76,6 @@ class PersistentPovertyETL(ExtractTransformLoad):
return df
def extract(self) -> None:
logger.info("Starting to download 86MB persistent poverty file.")
unzipped_file_path = self.get_tmp_path()
unzip_file_from_url(
@ -124,7 +122,6 @@ class PersistentPovertyETL(ExtractTransformLoad):
self.df = self._join_input_dfs(temporary_input_dfs)
def transform(self) -> None:
logger.info("Starting persistent poverty transform")
transformed_df = self.df
# Note: the fields are defined as following.

View file

@ -77,7 +77,6 @@ class TreeEquityScoreETL(ExtractTransformLoad):
]
def extract(self) -> None:
logger.info("Downloading Tree Equity Score Data")
for state in self.states:
super().extract(
f"{self.TES_URL}{state}.zip.zip",
@ -85,7 +84,6 @@ class TreeEquityScoreETL(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Transforming Tree Equity Score Data")
tes_state_dfs = []
for state in self.states:
tes_state_dfs.append(
@ -103,7 +101,6 @@ class TreeEquityScoreETL(ExtractTransformLoad):
)
def load(self) -> None:
logger.info("Saving Tree Equity Score CSV")
# write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df = self.df[

View file

@ -28,7 +28,6 @@ class TribalETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Downloading Tribal Data")
bia_shapefile_zip_url = (
settings.AWS_JUSTICE40_DATASOURCES_URL
@ -77,7 +76,7 @@ class TribalETL(ExtractTransformLoad):
bia_national_lar_df = gpd.read_file(path)
# DELETE
logger.info(f"Columns: {bia_national_lar_df.columns}\n")
logger.debug(f"Columns: {bia_national_lar_df.columns}\n")
bia_national_lar_df.drop(
["GISAcres"],
@ -186,8 +185,6 @@ class TribalETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Transforming Tribal Data")
# Set the filepaths:
bia_national_lar_shapefile = (
self.GEOGRAPHIC_BASE_PATH / "bia_national_lar"
@ -220,7 +217,7 @@ class TribalETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Saving Tribal GeoJson and CSV")
logger.debug("Saving Tribal GeoJson and CSV")
usa_tribal_df = gpd.GeoDataFrame(
pd.concat(self.USA_TRIBAL_DF_LIST, ignore_index=True)
)
@ -228,7 +225,7 @@ class TribalETL(ExtractTransformLoad):
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
)
logger.info("Writing national geojson file")
logger.debug("Writing national geojson file")
usa_tribal_df.to_file(
self.NATIONAL_TRIBAL_GEOJSON_PATH, driver="GeoJSON"
)

View file

@ -94,8 +94,6 @@ class TribalOverlapETL(ExtractTransformLoad):
self.tribal_gdf = get_tribal_geojson()
def transform(self) -> None:
logger.info("Starting tribal overlap transforms.")
# First, calculate whether tracts include any areas from the Tribal areas,
# for both the points in AK and the polygons in the continental US (CONUS).
tribal_overlap_with_tracts = add_tracts_for_geometries(

View file

@ -56,8 +56,6 @@ class USArmyFUDS(ExtractTransformLoad):
self.output_df: pd.DataFrame
def extract(self) -> None:
logger.info("Starting FUDS data download.")
download_file_from_url(
file_url=self.FILE_URL,
download_file_name=self.DOWNLOAD_FILE_NAME,
@ -65,11 +63,10 @@ class USArmyFUDS(ExtractTransformLoad):
)
def transform(self) -> None:
logger.info("Starting FUDS transform.")
# before we try to do any transformation, get the tract data
# so it's loaded and the census ETL is out of scope
logger.info("Loading FUDS data as GeoDataFrame for transform")
logger.debug("Loading FUDS data as GeoDataFrame for transform")
raw_df = gpd.read_file(
filename=self.DOWNLOAD_FILE_NAME,
low_memory=False,

View file

@ -8,7 +8,7 @@ logger = get_module_logger(__name__)
class ScoreA(Score):
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score A")
logger.debug("Adding Score A")
self.df[field_names.SCORE_A] = self.df[
[
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,

View file

@ -8,7 +8,7 @@ logger = get_module_logger(__name__)
class ScoreB(Score):
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score B")
logger.debug("Adding Score B")
self.df[field_names.SCORE_B] = (
self.df[
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX

View file

@ -72,7 +72,7 @@ class ScoreC(Score):
# "CalEnviroScreen for the US" score
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score C")
logger.debug("Adding Score C")
# Average all the percentile values in each bucket into a single score for each of the four buckets.
for bucket in self.BUCKETS:
self.df[bucket.name] = self.df[bucket.fields].mean(axis=1)

View file

@ -8,7 +8,7 @@ logger = get_module_logger(__name__)
class ScoreD(Score):
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Scores D and E")
logger.debug("Adding Scores D and E")
fields_to_use_in_score = [
field_names.UNEMPLOYMENT_FIELD,
field_names.LINGUISTIC_ISO_FIELD,

View file

@ -10,7 +10,7 @@ class ScoreF(Score):
# TODO Make variables and constants clearer (meaning and type)
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score F")
logger.debug("Adding Score F")
ami_and_high_school_field = "Low AMI, Low HS graduation"
meets_socio_field = "Meets socioeconomic criteria"
meets_burden_field = "Meets burden criteria"

View file

@ -8,7 +8,7 @@ logger = get_module_logger(__name__)
class ScoreG(Score):
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score G")
logger.debug("Adding Score G")
high_school_cutoff_threshold = 0.05

View file

@ -8,7 +8,7 @@ logger = get_module_logger(__name__)
class ScoreH(Score):
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score H")
logger.debug("Adding Score H")
high_school_cutoff_threshold = 0.06

View file

@ -8,7 +8,7 @@ logger = get_module_logger(__name__)
class ScoreI(Score):
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score I")
logger.debug("Adding Score I")
high_school_cutoff_threshold = 0.05

View file

@ -8,7 +8,7 @@ logger = get_module_logger(__name__)
class ScoreK(Score):
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score K")
logger.debug("Adding Score K")
high_school_cutoff_threshold = 0.06

View file

@ -52,7 +52,7 @@ class ScoreL(Score):
[column_from_island_areas, column_from_decennial_census]
].mean(axis=1, skipna=True)
logger.info(
logger.debug(
f"Combined field `{combined_column_name}` has "
f"{df[combined_column_name].isnull().sum()} "
f"({df[combined_column_name].isnull().sum() * 100 / len(df):.2f}%) "
@ -64,7 +64,7 @@ class ScoreL(Score):
a=df[combined_column_name], q=threshold_cutoff_for_island_areas
)
logger.info(
logger.debug(
f"For combined field `{combined_column_name}`, "
f"the {threshold_cutoff_for_island_areas*100:.0f} percentile cutoff is a "
f"raw value of {raw_threshold:.3f}."
@ -627,7 +627,7 @@ class ScoreL(Score):
.sum()
)
logger.info(
logger.debug(
f"For workforce criteria in island areas, "
f"{workforce_combined_criteria_for_island_areas.sum()} ("
f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
@ -642,7 +642,7 @@ class ScoreL(Score):
)
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score L")
logger.debug("Adding Score L")
self.df[field_names.THRESHOLD_COUNT] = 0
self.df[field_names.FPL_200_SERIES] = self._create_low_income_threshold(

View file

@ -768,7 +768,7 @@ class ScoreM(Score):
.sum()
)
logger.info(
logger.debug(
f"For workforce criteria in island areas, "
f"{workforce_combined_criteria_for_island_areas.sum()} ("
f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
@ -812,7 +812,7 @@ class ScoreM(Score):
)
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score M")
logger.debug("Adding Score M")
self.df[field_names.THRESHOLD_COUNT] = 0

View file

@ -914,7 +914,7 @@ class ScoreNarwhal(Score):
.sum()
)
logger.info(
logger.debug(
f"For workforce criteria in island areas, "
f"{workforce_combined_criteria_for_island_areas.sum()} ("
f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
@ -972,7 +972,7 @@ class ScoreNarwhal(Score):
We calculate "donut holes" after the initial score generation
"""
logger.info("Marking donut hole tracts")
logger.debug("Marking donut hole tracts")
# This is the boolean we pass to the front end for the donut-hole-specific
# low income criterion
@ -1050,7 +1050,7 @@ class ScoreNarwhal(Score):
)
def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score Narhwal")
logger.debug("Adding Score Narhwal")
self.df[field_names.THRESHOLD_COUNT] = 0
self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (

View file

@ -23,7 +23,7 @@ def toy_score_df(scope="module"):
def _helper_test_dropping_tracts(toy_score_df, drop_tracts):
logger.info(drop_tracts)
logger.debug(drop_tracts)
test_frame = toy_score_df[
~toy_score_df[field_names.GEOID_TRACT_FIELD].isin(drop_tracts)
]

View file

@ -41,12 +41,10 @@ class ExampleETL(ExtractTransformLoad):
/ "input.zip"
)
logger.info(f"Extracting {zip_file_path}")
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
zip_ref.extractall(self.get_tmp_path())
def transform(self):
logger.info(f"Loading file from {self.get_tmp_path() / 'input.csv'}.")
df: pd.DataFrame = pd.read_csv(
self.get_tmp_path() / "input.csv",
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},

View file

@ -202,7 +202,7 @@ class TestETL:
expected_file_path = data_path / "dataset" / etl.NAME / "usa.csv"
logger.info(f"Expected: {expected_file_path}")
logger.debug(f"Expected: {expected_file_path}")
assert actual_file_path == expected_file_path
@ -545,7 +545,7 @@ class TestETL:
# Delete output file.
output_file_path = etl._get_output_file_path()
if os.path.exists(output_file_path):
logger.info("Deleting output file created by other tests.")
logger.debug("Deleting output file created by other tests.")
os.remove(output_file_path)
# Run more steps to generate test data.

View file

@ -39,7 +39,7 @@ def generate_tiles(data_path: Path, generate_tribal_layer: bool) -> None:
os.mkdir(low_tile_path)
# generate high mbtiles file
logger.info("Generating USA High mbtiles file")
logger.debug("Generating USA High mbtiles file")
cmd = "tippecanoe "
cmd += f"--minimum-zoom={USA_HIGH_MIN_ZOOM} --maximum-zoom={USA_HIGH_MAX_ZOOM} --layer=blocks "
cmd += "--no-feature-limit --no-tile-size-limit "
@ -48,7 +48,7 @@ def generate_tiles(data_path: Path, generate_tribal_layer: bool) -> None:
call(cmd, shell=True)
# generate high mvts
logger.info("Generating USA High mvt folders and files")
logger.debug("Generating USA High mvt folders and files")
cmd = "tippecanoe "
cmd += f"--minimum-zoom={USA_HIGH_MIN_ZOOM} --maximum-zoom={USA_HIGH_MAX_ZOOM} --no-tile-compression "
cmd += "--no-feature-limit --no-tile-size-limit "
@ -57,7 +57,7 @@ def generate_tiles(data_path: Path, generate_tribal_layer: bool) -> None:
call(cmd, shell=True)
# generate low mbtiles file
logger.info("Generating USA Low mbtiles file")
logger.debug("Generating USA Low mbtiles file")
cmd = "tippecanoe "
cmd += f"--minimum-zoom={USA_LOW_MIN_ZOOM} --maximum-zoom={USA_LOW_MAX_ZOOM} --layer=blocks "
cmd += f"--output={low_tile_path}/usa_low.mbtiles "
@ -65,7 +65,7 @@ def generate_tiles(data_path: Path, generate_tribal_layer: bool) -> None:
call(cmd, shell=True)
# generate low mvts
logger.info("Generating USA Low mvt folders and files")
logger.debug("Generating USA Low mvt folders and files")
cmd = "tippecanoe "
cmd += f"--minimum-zoom={USA_LOW_MIN_ZOOM} --maximum-zoom={USA_LOW_MAX_ZOOM} --no-tile-compression "
cmd += "--drop-densest-as-needed "
@ -86,7 +86,7 @@ def generate_tiles(data_path: Path, generate_tribal_layer: bool) -> None:
remove_all_from_dir(tribal_tiles_path)
# generate mbtiles file
logger.info("Generating Tribal mbtiles file")
logger.debug("Generating Tribal mbtiles file")
cmd = "tippecanoe "
cmd += "--layer=blocks "
cmd += "--base-zoom=3 "
@ -96,7 +96,7 @@ def generate_tiles(data_path: Path, generate_tribal_layer: bool) -> None:
call(cmd, shell=True)
# generate mvts
logger.info("Generating Tribal mvt folders and files")
logger.debug("Generating Tribal mvt folders and files")
cmd = "tippecanoe "
cmd += "--layer=blocks "
cmd += "--base-zoom=3 "

View file

@ -1,5 +1,4 @@
import datetime
import json
import logging
import os
import shutil
@ -17,6 +16,9 @@ from data_pipeline.config import settings
from data_pipeline.content.schemas.download_schemas import CodebookConfig
from data_pipeline.content.schemas.download_schemas import CSVConfig
from data_pipeline.content.schemas.download_schemas import ExcelConfig
from data_pipeline.etl.score.constants import (
SCORE_VERSIONING_SHAPEFILE_CODEBOOK_FILE_PATH,
)
from marshmallow import ValidationError
from marshmallow_dataclass import class_schema
@ -42,11 +44,12 @@ def get_module_logger(module_name: str) -> logging.Logger:
logger = logging.getLogger(module_name)
handler = logging.StreamHandler()
formatter = logging.Formatter(
"%(asctime)s [%(name)-12s] %(levelname)-8s %(message)s"
"%(asctime)s [%(name)40.40s] %(levelname)-8s %(message)s"
)
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
logger.setLevel(logging.INFO)
logger.propagate = False # don't send log messages to the parent logger (to avoid duplicate log messages)
return logger
@ -80,7 +83,6 @@ def remove_files_from_dir(
if not file.endswith(extension):
continue
os.remove(files_path / file)
logger.info(f"Removing {file}")
def remove_all_from_dir(files_path: Path) -> None:
@ -102,9 +104,8 @@ def remove_all_from_dir(files_path: Path) -> None:
os.remove(files_path / file)
else:
shutil.rmtree(files_path / file)
logger.info(f"Removing {file}")
else:
logger.info(f"The following path does not exist: `{files_path}`.")
logger.warning(f"The following path does not exist: `{files_path}`.")
def remove_all_dirs_from_dir(dir_path: Path) -> None:
@ -121,7 +122,6 @@ def remove_all_dirs_from_dir(dir_path: Path) -> None:
file_path = os.path.join(dir_path, filename)
if os.path.isdir(file_path):
shutil.rmtree(file_path)
logging.info(f"Removing directory {file_path}")
def download_file_from_url(
@ -146,7 +146,6 @@ def download_file_from_url(
if not os.path.isdir(download_file_name.parent):
os.mkdir(download_file_name.parent)
logger.info(f"Downloading {file_url}")
response = requests.get(
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
)
@ -192,7 +191,6 @@ def unzip_file_from_url(
verify=verify,
)
logger.info(f"Extracting {zip_file_path}")
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
zip_ref.extractall(unzipped_file_path)
@ -205,7 +203,7 @@ def data_folder_cleanup() -> None:
data_path = settings.APP_ROOT / "data"
logger.info("Initializing all dataset directoriees")
logger.debug("Initializing all dataset directoriees")
remove_all_from_dir(data_path / "dataset")
@ -214,13 +212,32 @@ def score_folder_cleanup() -> None:
data_path = settings.APP_ROOT / "data"
logger.info("Initializing all score data")
logger.debug("Initializing all score data")
remove_all_from_dir(data_path / "score" / "csv")
remove_all_from_dir(data_path / "score" / "geojson")
remove_all_from_dir(data_path / "score" / "tiles")
remove_all_from_dir(data_path / "score" / "shapefile")
downloadable_cleanup()
def geo_score_folder_cleanup() -> None:
"""Removes the necessary files to run geo-score. This works out to be
zip files, since if we don't remove them python's zip utils continuously
add to them instead of overwriting the contents."""
data_path = settings.APP_ROOT / "data"
logger.debug("Removing zip files")
remove_files_from_dir(data_path / "score" / "shapefile", ".zip")
shapefile_and_codebook_zipped = (
SCORE_VERSIONING_SHAPEFILE_CODEBOOK_FILE_PATH
)
if os.path.isfile(shapefile_and_codebook_zipped):
os.remove(shapefile_and_codebook_zipped)
def downloadable_cleanup() -> None:
"""Remove all files from downloadable directory in the local data/score path"""
@ -233,7 +250,7 @@ def temp_folder_cleanup() -> None:
data_path = settings.APP_ROOT / "data"
logger.info("Initializing all temp directories")
logger.debug("Initializing all temp directories")
remove_all_from_dir(data_path / "tmp")
@ -289,8 +306,6 @@ def zip_files(zip_file_path: Path, files_to_compress: List[Path]):
with zipfile.ZipFile(zip_file_path, "w") as zf:
for f in files_to_compress:
zf.write(f, arcname=Path(f).name, compress_type=compression)
zip_info = get_zip_info(zip_file_path)
logger.info(json.dumps(zip_info, indent=4, sort_keys=True, default=str))
def zip_directory(
@ -309,7 +324,6 @@ def zip_directory(
def zipdir(origin_directory: Path, ziph: zipfile.ZipFile):
for root, dirs, files in os.walk(origin_directory):
for file in files:
logger.info(f"Compressing file: {file}")
ziph.write(
os.path.join(root, file),
os.path.relpath(
@ -319,7 +333,6 @@ def zip_directory(
compress_type=compression,
)
logger.info(f"Compressing {Path(origin_zip_directory).name} directory")
zip_file_name = f"{Path(origin_zip_directory).name}.zip"
# start archiving
@ -329,10 +342,6 @@ def zip_directory(
zipdir(f"{origin_zip_directory}/", zipf)
zipf.close()
logger.info(
f"Completed compression of {Path(origin_zip_directory).name} directory"
)
def load_yaml_dict_from_file(
yaml_file_path: Path,