Issue 105: Configure and run black and other pre-commit hooks (clean branch) (#1962)

* Configure and run `black` and other pre-commit hooks

Co-authored-by: matt bowen <matthew.r.bowen@omb.eop.gov>
This commit is contained in:
Lucas Merrill Brown 2022-10-04 18:08:47 -04:00 committed by GitHub
commit 6e6223cd5e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
162 changed files with 716 additions and 602 deletions

1
.github/CODEOWNERS vendored
View file

@ -1,2 +1 @@
* @esfoobar-usds @vim-usds @emma-nechamkin @mattbowen-usds

View file

@ -56,7 +56,7 @@ body:
id: other-datasource-type
attributes:
label: If "other" above, please specify
description: If you have selected "other" above, please give as much detail as you can as to where this data can live
description: If you have selected "other" above, please give as much detail as you can as to where this data can live
placeholder: www.example.com and www.example2.com
validations:
required: false
@ -110,14 +110,14 @@ body:
attributes:
label: Known data quality issues
description: Please describe any data quality issues you know about with this dataset.
placeholder: Limited accuracy in rural areas, etc.
placeholder: Limited accuracy in rural areas, etc.
validations:
required: false
required: false
- type: input
id: geographic-coverage
attributes:
label: What is the geographic coverage (in percent) of this dataset
description: If known, provide an estimate of the coverage of this dataset vis-a-vis the full United States
description: If known, provide an estimate of the coverage of this dataset vis-a-vis the full United States
placeholder: 90%
validations:
required: false
@ -126,9 +126,9 @@ body:
attributes:
label: Description of geographic coverage estimate
description: (If relevant) Please explain your reasoning behind the above estimate of geographic coverage
placeholder: e.g. Missing information for Puerto Rico, etc.
placeholder: e.g. Missing information for Puerto Rico, etc.
validations:
required: false
required: false
- type: input
id: last-updated-date
attributes:
@ -151,10 +151,10 @@ body:
id: link-to-documentation
attributes:
label: Link to more documentation
description: Please provide a link where one can find additional information
description: Please provide a link where one can find additional information
placeholder: www.example.com
validations:
required: true
required: true
- type: dropdown
id: can-go-in-cloud
attributes:
@ -167,8 +167,8 @@ body:
- type: textarea
id: additional-information
attributes:
label: Additional Information
label: Additional Information
description: Please provide any additional information you have about this dataset
placeholder: e.g. Missing information for Puerto Rico, etc.
placeholder: e.g. Missing information for Puerto Rico, etc.
validations:
required: false

View file

@ -19,4 +19,4 @@ Fixes # (issue number)
- [ ] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged and published in downstream modules
- [ ] Any dependent changes have been merged and published in downstream modules

View file

@ -79,8 +79,8 @@ jobs:
with:
# Deploy to S3 for the Staging URL
message: |
** Score Deployed! **
Find it here:
** Score Deployed! **
Find it here:
- Score Full usa.csv: https://justice40-data.s3.amazonaws.com/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv/full/usa.csv
- Download Zip Packet: https://justice40-data.s3.amazonaws.com/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/downloadable/Screening_Tool_Data.zip
repo-token: ${{ secrets.GITHUB_TOKEN }}
@ -116,7 +116,7 @@ jobs:
with:
# Deploy to S3 for the staging URL
message: |
** Map Deployed! **
** Map Deployed! **
Map with Staging Backend: https://screeningtool.geoplatform.gov/en?flags=stage_hash=${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}
Find tiles here: https://justice40-data.s3.amazonaws.com/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/tiles
repo-token: ${{ secrets.GITHUB_TOKEN }}

View file

@ -93,7 +93,7 @@ jobs:
with:
# Deploy to S3 for the staging URL
message: |
**🚢 Here is the frontend staging link: 🚢**
**🚢 Here is the frontend staging link: 🚢**
Find it here: http://usds-geoplatform-justice40-website.s3-website-us-east-1.amazonaws.com/justice40-tool/${{env.DESTINATION_FOLDER}}/en/ !
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: "github-actions[bot]" # The user.login for temporary GitHub tokens
@ -108,9 +108,9 @@ jobs:
uses: mshick/add-pr-comment@v1
with:
message: |
** 👋 Attention translators!! 👋 **
** 👋 Attention translators!! 👋 **
Copy changes have resulted in a new en.json file. Please download en.json file and send to translators: https://github.com/usds/justice40-tool/blob/${{env.COMMIT_HASH}}/client/src/intl/en.json
repo-token: ${{ secrets.GITHUB_TOKEN }}
repo-token-user-login: "github-actions[bot]" # The user.login for temporary GitHub tokens
allow-repeats: true
if: contains(steps.changed-files.outputs.modified_files, 'client/src/intl/en.json')
if: contains(steps.changed-files.outputs.modified_files, 'client/src/intl/en.json')

View file

@ -10,7 +10,7 @@ on:
schedule:
# runs tests every day at 12am ET (4am UTC):
- cron: '0 4 * * *'
jobs:
jobs:
nightly:
runs-on: ubuntu-20.04
env:
@ -25,4 +25,4 @@ jobs:
start: npm start
wait-on: 'http://localhost:8000'
# To run only specific spec/tests:
# spec: cypress/e2e/downloadPacket.spec.js
# spec: cypress/e2e/downloadPacket.spec.js

View file

@ -1,5 +1,5 @@
name: Generate Census
on:
on:
workflow_dispatch:
inputs:
confirm-action:

View file

@ -9,11 +9,11 @@ on:
workflow_dispatch:
inputs:
logLevel:
description: 'Log level'
description: 'Log level'
required: true
default: 'warning'
tags:
description: 'Ping test'
description: 'Ping test'
jobs:
sitePingCheck:
name: Slack Notification

View file

@ -13,4 +13,3 @@ Los mantenedores del proyecto tienen el derecho y la obligación de eliminar, ed
Los casos de abuso, acoso o de otro comportamiento inaceptable se pueden denunciar abriendo un problema o contactando con uno o más de los mantenedores del proyecto en justice40open@usds.gov.
Este Código de conducta es una adaptación de la versión 1.0.0 del Convenio del colaborador ([Contributor Covenant](http://contributor-covenant.org), *en inglés*) disponible en el sitio http://contributor-covenant.org/version/1/0/0/ *(en inglés)*.

View file

@ -12,4 +12,4 @@ Project maintainers have the right and responsibility to remove, edit, or reject
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers at justice40open@usds.gov.
This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)
This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)

View file

@ -32,4 +32,4 @@ When participating in Justice40 open source community conversations and spaces,
- Take space and give space. We strive to create an equitable environment in which all are welcome and able to participate. We hope individuals feel comfortable voicing their opinions and providing contributions and will do our best to recognize and make space for individuals who may be struggling to find space here. Likewise, we expect individuals to recognize when they are taking up significant space and take a step back to allow room for others.
- Be present when joining synchronous conversations such as our community chat. Why be here if you're not going to _be here_?
- Be respectful.
- Default to positive. Assume others' contributions are legitimate and valuable and that they are made with good intention.
- Default to positive. Assume others' contributions are legitimate and valuable and that they are made with good intention.

View file

@ -43,4 +43,3 @@ Si desea colaborar con alguna parte del código base, bifurque el repositorio si
* Al menos un revisor autorizado debe aprobar la confirmación (en [CODEOWNERS](https://github.com/usds/justice40-tool/tree/main/.github/CODEOWNERS), en inglés, consulte la lista más reciente de estos revisores).
* Todas las verificaciones de estado obligatorias deben ser aprobadas.
Si hay un desacuerdo importante entre los integrantes del equipo, se organizará una reunión con el fin de determinar el plan de acción para la solicitud de incorporación de cambios.

View file

@ -36,7 +36,7 @@ Homebrew is an easy way to manage software downloads on MacOS. You don't _have_
You should regularly run `brew update` and `brew doctor` to make sure your packages are up to date and in good condition.
### Install Node using NVM
### Install Node using NVM
This will work for both MacOS and Win10. Follow instructions on this [link](https://medium.com/@nodesource/installing-node-js-tutorial-using-nvm-5c6ff5925dd8). Be sure to read through the whole doc to find the sections within each step relevant to you (e.g. if you're using Homebrew, when you get to Step 2 look for the section, "Install NVM with Homebrew").
@ -54,7 +54,7 @@ You should then be able to switch to that version of node by:
To validate you are using node 14, type:
`node -v`
`node -v`
This should return *Now using node 14.x.x (npm v6.x.x)*

View file

@ -28,4 +28,3 @@ Por estos u otros propósitos y motivos, y sin ninguna expectativa de otra consi
c. El Afirmante excluye la responsabilidad de los derechos de compensación de otras personas que se puedan aplicar a la Obra o a cualquier uso de esta, incluidos, entre otros, los Derechos de Autor y Derechos Conexos de cualquier persona sobre la Obra. Además, el Afirmante excluye la responsabilidad de obtener los consentimientos o permisos u otros derechos necesarios que se exijan para cualquier uso de la Obra.
d. El Afirmante entiende y reconoce que Creative Commons no es una parte en este documento y que no tiene ningún derecho u obligación con respecto a esta CC0 o al uso de la Obra.

View file

@ -2,7 +2,7 @@
[![CC0 License](https://img.shields.io/badge/license-CCO--1.0-brightgreen)](https://github.com/usds/justice40-tool/blob/main/LICENSE.md)
*[Read this in English!](README.md)*
Le damos la bienvenida a la comunidad de código abierto de Justice40. Este repositorio contiene el código, los procesos y la documentación que activa los datos y la tecnología de la Herramienta Justice40 para la Vigilancia del Clima y la Justicia Económica (CEJST, por sus siglas en inglés).
## Antecedentes
@ -36,7 +36,7 @@ El equipo central usa el grupo para publicar la información más reciente sobre
Las colaboraciones son siempre bien recibidas. Nos agradan las aportaciones en forma de conversación sobre los temas de este repositorio y las solicitudes para incorporación de cambios en la documentación y el código.
En [CONTRIBUTING-es.md](CONTRIBUTING-es.md), consulte la manera de empezar a participar.
## Instalación
## Instalación
La instalación es una instalación típica de gatsby y los detalles se pueden encontrar en [INSTALLATION-es.md](INSTALLATION-es.md)

View file

@ -11,19 +11,19 @@ The Justice40 initiative and screening tool were announced in an [Executive Orde
Please see our [Open Source Community Orientation](docs/Justice40_Open_Source_Community_Orientation.pptx) deck for more information on the Justice40 initiative, our team, this project, and ways to participate.
## Core team
The core Justice40 team building this tool is a small group of designers, developers, and product managers from the US Digital Service in partnership with the Council on Environmental Quality (CEQ).
The core Justice40 team building this tool is a small group of designers, developers, and product managers from the US Digital Service in partnership with the Council on Environmental Quality (CEQ).
An up-to-date list of core team members can be found in [MAINTAINERS.md](MAINTAINERS.md). The engineering members of the core team who maintain the code in this repo are listed in [.github/CODEOWNERS](.github/CODEOWNERS).
## Community
The Justice40 team is taking a community-first and open source approach to the product development of this tool. We believe government software should be made in the open and be built and licensed such that anyone can take the code, run it themselves without paying money to third parties or using proprietary software, and use it as they will.
The Justice40 team is taking a community-first and open source approach to the product development of this tool. We believe government software should be made in the open and be built and licensed such that anyone can take the code, run it themselves without paying money to third parties or using proprietary software, and use it as they will.
We know that we can learn from a wide variety of communities, including those who will use or will be impacted by the tool, who are experts in data science or technology, or who have experience in climate, economic,or environmental justice work. We are dedicated to creating forums for continuous conversation and feedback to help shape the design and development of the tool.
We know that we can learn from a wide variety of communities, including those who will use or will be impacted by the tool, who are experts in data science or technology, or who have experience in climate, economic,or environmental justice work. We are dedicated to creating forums for continuous conversation and feedback to help shape the design and development of the tool.
We also recognize capacity building as a key part of involving a diverse open source community. We are doing our best to use accessible language, provide technical and process documents in multiple languages, and offer support to our community members of a wide variety of backgrounds and skillsets, directly or in the form of group chats and training. If you have ideas for how we can improve or add to our capacity building efforts and methods for welcoming folks into our community, please let us know in the [Google Group](https://groups.google.com/u/4/g/justice40-open-source) or email us at justice40open@usds.gov.
### Community Guidelines
Principles and guidelines for participating in our open source community are available [here](COMMUNITY_GUIDELINES.md). Please read them before joining or starting a conversation in this repo or one of the channels listed below.
Principles and guidelines for participating in our open source community are available [here](COMMUNITY_GUIDELINES.md). Please read them before joining or starting a conversation in this repo or one of the channels listed below.
### Community Chats
We host open source community chats every third Monday of the month at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source).
@ -31,15 +31,15 @@ We host open source community chats every third Monday of the month at 5-6pm ET.
Community members are welcome to share updates or propose topics for discussion in community chats. Please do so in the Google Group.
### Google Group
Our [Google Group](https://groups.google.com/u/4/g/justice40-open-source) is open to anyone to join and share their knowledge or experiences, as well as to ask questions of the core Justice40 team or the wider community.
Our [Google Group](https://groups.google.com/u/4/g/justice40-open-source) is open to anyone to join and share their knowledge or experiences, as well as to ask questions of the core Justice40 team or the wider community.
The core team uses the group to post updates on the program and tech/data issues, and to share the agenda and call for community participation in the community chat.
Curious about whether to ask a question here as a Github issue or in the Google Group? The general rule of thumb is that issues are for actionable topics related to the tool or data itself (e.g. questions about a specific data set in use, or suggestion for a new tool feature), and the Google Group is for more general topics or questions. If you can't decide, use the google group and we'll discuss it there before moving to Github if appropriate!
Curious about whether to ask a question here as a Github issue or in the Google Group? The general rule of thumb is that issues are for actionable topics related to the tool or data itself (e.g. questions about a specific data set in use, or suggestion for a new tool feature), and the Google Group is for more general topics or questions. If you can't decide, use the google group and we'll discuss it there before moving to Github if appropriate!
## Contributing
Contributions are always welcome! We encourage contributions in the form of discussion on issues in this repo and pull requests of documentation and code.
Contributions are always welcome! We encourage contributions in the form of discussion on issues in this repo and pull requests of documentation and code.
See [CONTRIBUTING.md](CONTRIBUTING.md) for ways to get started.

View file

@ -0,0 +1,39 @@
exclude: ^client|\.csv
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/lucasmbrown/mirrors-autoflake
rev: v1.3
hooks:
- id: autoflake
args:
[
"--in-place",
"--remove-all-unused-imports",
"--remove-unused-variable",
"--ignore-init-module-imports",
]
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
name: isort (python)
args:
[
"--force-single-line-imports",
"--profile=black",
"--line-length=80",
"--src-path=.:data/data-pipeline"
]
- repo: https://github.com/ambv/black
rev: 22.8.0
hooks:
- id: black
language_version: python3.9
args: [--config=./data/data-pipeline/pyproject.toml]

View file

@ -243,14 +243,66 @@ If you want to run tile generation, please install TippeCanoe [following these i
- We use [Poetry](https://python-poetry.org/) for managing dependencies and building the application. Please follow the instructions on their site to download.
- Install Poetry requirements with `poetry install`
### Running tox
### Running tox
Our full test and check suite is run using tox. This can be run using commands such
Our full test and check suite is run using tox. This can be run using commands such
as `poetry run tox`.
Each run can take a while to build the whole environment. If you'd like to save time,
you can use the previously built environment by running `poetry run tox -e lint`
which will drastically speed up the process.
you can use the previously built environment by running `poetry run tox -e lint`
which will drastically speed up the linting process.
### Configuring pre-commit hooks
<!-- markdown-link-check-disable -->
To promote consistent code style and quality, we use git pre-commit hooks to
automatically lint and reformat our code before every commit we make to the codebase.
Pre-commit hooks are defined in the file [`.pre-commit-config.yaml`](../.pre-commit-config.yaml).
<!-- markdown-link-check-enable -->
1. First, install [`pre-commit`](https://pre-commit.com/) globally:
$ brew install pre-commit
2. While in the `data/data-pipeline` directory, run `pre-commit install` to install
the specific git hooks used in this repository.
Now, any time you commit code to the repository, the hooks will run on all modified files automatically. If you wish,
you can force a re-run on all files with `pre-commit run --all-files`.
#### Conflicts between backend and frontend git hooks
<!-- markdown-link-check-disable -->
In the front-end part of the codebase (the `justice40-tool/client` folder), we use
`Husky` to run pre-commit hooks for the front-end. This is different than the
`pre-commit` framework we use for the backend. The frontend `Husky` hooks are
configured at
[client/.husky](client/.husky).
It is not possible to run both our `Husky` hooks and `pre-commit` hooks on every
commit; either one or the other will run.
<!-- markdown-link-check-enable -->
`Husky` is installed every time you run `npm install`. To use the `Husky` front-end
hooks during front-end development, simply run `npm install`.
However, running `npm install` overwrites the backend hooks setup by `pre-commit`.
To restore the backend hooks after running `npm install`, do the following:
1. Run `pre-commit install` while in the `data/data-pipeline` directory.
2. The terminal should respond with an error message such as:
```
[ERROR] Cowardly refusing to install hooks with `core.hooksPath` set.
hint: `git config --unset-all core.hooksPath`
```
This error is caused by having previously run `npm install` which used `Husky` to
overwrite the hooks path.
3. Follow the hint and run `git config --unset-all core.hooksPath`.
4. Run `pre-commit install` again.
Now `pre-commit` and the backend hooks should take precedence.
### The Application entrypoint
@ -323,7 +375,7 @@ see [python-markdown docs](https://github.com/ipython-contrib/jupyter_contrib_nb
### Background
<!-- markdown-link-check-disable -->
For this project, we make use of [pytest](https://docs.pytest.org/en/latest/) for testing purposes.
For this project, we make use of [pytest](https://docs.pytest.org/en/latest/) for testing purposes.
<!-- markdown-link-check-enable-->
To run tests, simply run `poetry run pytest` in this directory (i.e., `justice40-tool/data/data-pipeline`).
@ -466,19 +518,19 @@ In order to update the snapshot fixtures of an ETL class, follow the following s
1. If you need to manually update the fixtures, update the "furthest upstream" source
that is called by `_setup_etl_instance_and_run_extract`. For instance, this may
involve creating a new zip file that imitates the source data. (e.g., for the
National Risk Index test, update
`data_pipeline/tests/sources/national_risk_index/data/NRI_Table_CensusTracts.zip`
involve creating a new zip file that imitates the source data. (e.g., for the
National Risk Index test, update
`data_pipeline/tests/sources/national_risk_index/data/NRI_Table_CensusTracts.zip`
which is a 64kb imitation of the 405MB source NRI data.)
2. Run `pytest . -rsx --update_snapshots` to update snapshots for all files, or you
can pass a specific file name to pytest to be more precise (e.g., `pytest
can pass a specific file name to pytest to be more precise (e.g., `pytest
data_pipeline/tests/sources/national_risk_index/test_etl.py -rsx --update_snapshots`)
3. Re-run pytest without the `update_snapshots` flag (e.g., `pytest . -rsx`) to
ensure the tests now pass.
4. Carefully check the `git diff` for the updates to all test fixtures to make sure
these are as expected. This part is very important. For instance, if you changed a
column name, you would only expect the column name to change in the output. If
you modified the calculation of some data, spot check the results to see if the
column name, you would only expect the column name to change in the output. If
you modified the calculation of some data, spot check the results to see if the
numbers in the updated fixtures are as expected.
### Other ETL Unit Tests
@ -520,4 +572,4 @@ These tests are implemented as pytest test, but are skipped by default. To run t
1. Generate a full score with `poetry run python3 data_pipeline/application.py score-full-run`
2. Generate the tile data with `poetry run python3 data_pipeline/application.py generate-score-post`
3. Generate the frontend GEOJSON with `poetry run python3 data_pipeline/application.py geo-score`
4. Select the smoke tests for pytest with `poetry run pytest data_pipeline/tests -k smoketest`
4. Select the smoke tests for pytest with `poetry run pytest data_pipeline/tests -k smoketest`

View file

@ -1,31 +1,27 @@
from subprocess import call
import sys
import click
from subprocess import call
import click
from data_pipeline.config import settings
from data_pipeline.etl.runner import (
etl_runner,
score_generate,
score_geo,
score_post,
)
from data_pipeline.etl.runner import etl_runner
from data_pipeline.etl.runner import score_generate
from data_pipeline.etl.runner import score_geo
from data_pipeline.etl.runner import score_post
from data_pipeline.etl.sources.census.etl_utils import check_census_data_source
from data_pipeline.etl.sources.census.etl_utils import (
check_census_data_source,
reset_data_directories as census_reset,
zip_census_data,
)
from data_pipeline.etl.sources.census.etl_utils import zip_census_data
from data_pipeline.etl.sources.tribal.etl_utils import (
reset_data_directories as tribal_reset,
)
from data_pipeline.tile.generate import generate_tiles
from data_pipeline.utils import (
data_folder_cleanup,
get_module_logger,
score_folder_cleanup,
downloadable_cleanup,
temp_folder_cleanup,
check_first_run,
)
from data_pipeline.utils import check_first_run
from data_pipeline.utils import data_folder_cleanup
from data_pipeline.utils import downloadable_cleanup
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import score_folder_cleanup
from data_pipeline.utils import temp_folder_cleanup
logger = get_module_logger(__name__)
@ -36,8 +32,6 @@ dataset_cli_help = "Grab the data from either 'local' for local access or 'aws'
def cli():
"""Defines a click group for the commands below"""
pass
@cli.command(help="Clean up all census data folders")
def census_cleanup():

View file

@ -2,13 +2,13 @@
## Comparison tool
TODO once the comparison tool has been refactored.
TODO once the comparison tool has been refactored.
## Single comparator score comparisons
The goal of this directory is to create interactive 1-to-1 dac list:cejst comparisons. That means that, when this tool is run, you will have comparisons of two true/false classifications.
The goal of this directory is to create interactive 1-to-1 dac list:cejst comparisons. That means that, when this tool is run, you will have comparisons of two true/false classifications.
This uses `papermill` to parameterize a jupyter notebook, and is meant to be a *lightweight* entry into this analysis. The tool as a whole creates a bunch of comparisons against CEJST data -- but after it runs, you'll have the notebook to re-run and add to if you are so inclined.
This uses `papermill` to parameterize a jupyter notebook, and is meant to be a *lightweight* entry into this analysis. The tool as a whole creates a bunch of comparisons against CEJST data -- but after it runs, you'll have the notebook to re-run and add to if you are so inclined.
To run:
` $ python src/run_tract_comparison.py --template_notebook=TEMPLATE.ipynb --parameter_yaml=PARAMETERS.yaml`
@ -19,52 +19,52 @@ For example, if I am running this from the `comparison_tool` directory within th
__What is the template notebook?__
This gets filled in by the parameters in the yaml file and then executed. Even after execution, it is run-able and interactive. You do not need to change anything in this (with the caveat -- depending on how you run `jupyter lab`, you might need to add `import sys` and then `sys.path.append("../../../../)` to run the notebook live).
This gets filled in by the parameters in the yaml file and then executed. Even after execution, it is run-able and interactive. You do not need to change anything in this (with the caveat -- depending on how you run `jupyter lab`, you might need to add `import sys` and then `sys.path.append("../../../../)` to run the notebook live).
__What is the output?__
When you run this, you'll get back three files:
1. The filled-in parameter notebook that you can run live, with the date appended. This means if you run the script twice in one day, the notebook will get overriden, but if you run the script on two consecutive days, you will get two separate notebooks saved.
2. A graph that shows the relative average of the specified `ADDITIONAL_DEMO_COLUMNS` and `DEMOGRAPHIC_COLUMNS` segmented by CEJST and the comparator you include. This gets overridden with every run.
1. The filled-in parameter notebook that you can run live, with the date appended. This means if you run the script twice in one day, the notebook will get overriden, but if you run the script on two consecutive days, you will get two separate notebooks saved.
2. A graph that shows the relative average of the specified `ADDITIONAL_DEMO_COLUMNS` and `DEMOGRAPHIC_COLUMNS` segmented by CEJST and the comparator you include. This gets overridden with every run.
3. An excel file with many tabs that has summary statistics from the comparison of the two classifications (the cejst and the comparator).
In more detail, the excel file contains the following tabs:
- `Summary`: out of all tracts (even if you keep missing), how many tracts are classified TRUE/FALSE by the comparator and CEJST, by population and number.
- `Tract level stats`: overall, for all tracts classified as TRUE for CEJST and the comparator, how do the demographics of those tracts compare? Here, we think of "demographics" loosely -- whatever columns you include in the parameter yaml will show up. For example, if my additional demographics column in the yaml included `percent of households in linguistic isolation`, I'd see the average percent of households in linguistic isolation for the comparator-identified tracts (where the comparator is TRUE) and for CEJST-identified tracts.
- `Tract level stats`: overall, for all tracts classified as TRUE for CEJST and the comparator, how do the demographics of those tracts compare? Here, we think of "demographics" loosely -- whatever columns you include in the parameter yaml will show up. For example, if my additional demographics column in the yaml included `percent of households in linguistic isolation`, I'd see the average percent of households in linguistic isolation for the comparator-identified tracts (where the comparator is TRUE) and for CEJST-identified tracts.
- `Population level stats`: same demographic variables, looking at population within tract. Since not all tracts have the same number of people, this will be slightly different. This also includes segments of the population -- where you can investigate the disjoint set of tracts identified by a single method (e.g., you could specifically look at tracts identified by CEJST but not by the comparator.)
- `Segmented tract level stats`: segmented version of the tract-level stats.
- (Optional -- requires not disjoint set of tracts) `Comparator and CEJST overlap`: shows the overlap from the vantage point of the comparator ("what share of the tracts that the comparator identifies are also identified in CEJST?"). Also lists the states the comparator has information for.
- `Segmented tract level stats`: segmented version of the tract-level stats.
- (Optional -- requires not disjoint set of tracts) `Comparator and CEJST overlap`: shows the overlap from the vantage point of the comparator ("what share of the tracts that the comparator identifies are also identified in CEJST?"). Also lists the states the comparator has information for.
__What parameters go in the yaml file?__
- ADDITIONAL_DEMO_COLUMNS: list, demographic columns from the score file that you want to run analyses on. All columns here will appear in the excel file and the graph.
- COMPARATOR_COLUMN: the name of the column that has a boolean (*must be TRUE / FALSE*) for whether or not the tract is prioritized. You provide this!
- DEMOGRAPHIC_COLUMNS: list, demographic columns from another file that you'd like to include in the analysis.
- DEMOGRAPHIC_FILE: the file that has the census demographic information. This name suggests, in theory, that you've run our pipeline and are using the ACS output -- but any file with `GEOID10_TRACT` as the field with census tract IDs will work.
- OUTPUT_DATA_PATH: where you want the output to be. Convention: output + folder named of data source. Note that the folder name of the data source gets read as the "data name" for some of the outputs.
- SCORE_COLUMN: CEJST score boolean name column name.
- SCORE_FILE: CEJST full score file. This requires that you've run our pipeline, but in theory, the downloaded file should also work, provided the columns are named appropriately.
- TOTAL_POPULATION_COLUMN: column name for total population. We use `Total Population` currently in our pipeline.
- OTHER_COMPARATOR_COLUMNS: list, other columns from the comparator file you might want to read in for analysis. This is an optional argument. You will keep these columns to perform analysis once you have the notebook -- this will not be included in the excel print out.
- ADDITIONAL_DEMO_COLUMNS: list, demographic columns from the score file that you want to run analyses on. All columns here will appear in the excel file and the graph.
- COMPARATOR_COLUMN: the name of the column that has a boolean (*must be TRUE / FALSE*) for whether or not the tract is prioritized. You provide this!
- DEMOGRAPHIC_COLUMNS: list, demographic columns from another file that you'd like to include in the analysis.
- DEMOGRAPHIC_FILE: the file that has the census demographic information. This name suggests, in theory, that you've run our pipeline and are using the ACS output -- but any file with `GEOID10_TRACT` as the field with census tract IDs will work.
- OUTPUT_DATA_PATH: where you want the output to be. Convention: output + folder named of data source. Note that the folder name of the data source gets read as the "data name" for some of the outputs.
- SCORE_COLUMN: CEJST score boolean name column name.
- SCORE_FILE: CEJST full score file. This requires that you've run our pipeline, but in theory, the downloaded file should also work, provided the columns are named appropriately.
- TOTAL_POPULATION_COLUMN: column name for total population. We use `Total Population` currently in our pipeline.
- OTHER_COMPARATOR_COLUMNS: list, other columns from the comparator file you might want to read in for analysis. This is an optional argument. You will keep these columns to perform analysis once you have the notebook -- this will not be included in the excel print out.
- KEEP_MISSING_VALUES_FOR_SEGMENTATION: whether or not to fill NaNs. True keeps missing.
__Cleaning data__
Comparator data should live in a flat csv, just like the CEJST data. Right now, each comparator has a folder in `comparison_tool/data` that contains a notebook to clean the data (this is because the data is often quirky and so live inspection is easier), the `raw` data, and the `clean` data. We can also point the `yaml` to an `ETL` output, for files in which there are multiple important columns, if you want to use one of the data sources the CEJST team has already included in the pipeline (which are already compatible with the tool).
Comparator data should live in a flat csv, just like the CEJST data. Right now, each comparator has a folder in `comparison_tool/data` that contains a notebook to clean the data (this is because the data is often quirky and so live inspection is easier), the `raw` data, and the `clean` data. We can also point the `yaml` to an `ETL` output, for files in which there are multiple important columns, if you want to use one of the data sources the CEJST team has already included in the pipeline (which are already compatible with the tool).
When you make your own output for comparison, make sure to follow the steps below.
When you make your own output for comparison, make sure to follow the steps below.
When you clean the data, it's important that you:
1. Ensure the tract level id is named the same as the field name in score M (specified in `field_names`). Right now, this is `GEOID10_TRACT`.
2. Ensure the identification column is a `bool`.
You will provide the path to the comparator data in the parameter yaml file.
You will provide the path to the comparator data in the parameter yaml file.
__How to use the shell script__
We have also included a shell script, `run_all_comparisons.sh`. This script includes all
of the commands that we have run to generate pairwise comparisons.
We have also included a shell script, `run_all_comparisons.sh`. This script includes all
of the commands that we have run to generate pairwise comparisons.
To run: `$ bash run_all_comparisons.sh`
To add to it: create a new line and include the command line for each notebook run.
To add to it: create a new line and include the command line for each notebook run.

View file

@ -1,3 +1,3 @@
#! /bin/bash
poetry run python3 src/run_tract_comparison.py --template_notebook=src/tract_comparison__template.ipynb --parameter_yaml=src/donut_hole_dacs.yaml
poetry run python3 src/run_tract_comparison.py --template_notebook=src/tract_comparison__template.ipynb --parameter_yaml=src/donut_hole_dacs.yaml

View file

@ -17,7 +17,7 @@ DEMOGRAPHIC_COLUMNS:
DEMOGRAPHIC_FILE: ../../data_pipeline/data/dataset/census_acs_2019/usa.csv
OUTPUT_DATA_PATH: output/donut_hole_dac
SCORE_FILE: ../../data_pipeline/data/score/csv/full/usa.csv
OTHER_COMPARATOR_COLUMNS:
OTHER_COMPARATOR_COLUMNS:
- donut_hole_dac
- P200_PFS
- HSEF

View file

@ -12,12 +12,12 @@ To see more: https://buildmedia.readthedocs.org/media/pdf/papermill/latest/paper
To run:
` $ python src/run_tract_comparison.py --template_notebook=TEMPLATE.ipynb --parameter_yaml=PARAMETERS.yaml`
"""
import os
import datetime
import argparse
import yaml
import datetime
import os
import papermill as pm
import yaml
def _read_param_file(param_file: str) -> dict:

View file

@ -16,7 +16,7 @@
"import matplotlib.pyplot as plt\n",
"\n",
"from data_pipeline.score import field_names\n",
"from data_pipeline.comparison_tool.src import utils \n",
"from data_pipeline.comparison_tool.src import utils\n",
"\n",
"pd.options.display.float_format = \"{:,.3f}\".format\n",
"%load_ext lab_black"
@ -128,9 +128,7 @@
"metadata": {},
"outputs": [],
"source": [
"utils.validate_new_data(\n",
" file_path=COMPARATOR_FILE, score_col=COMPARATOR_COLUMN\n",
")"
"utils.validate_new_data(file_path=COMPARATOR_FILE, score_col=COMPARATOR_COLUMN)"
]
},
{
@ -148,20 +146,25 @@
"metadata": {},
"outputs": [],
"source": [
"comparator_cols = [COMPARATOR_COLUMN] + OTHER_COMPARATOR_COLUMNS if OTHER_COMPARATOR_COLUMNS else [COMPARATOR_COLUMN]\n",
"comparator_cols = (\n",
" [COMPARATOR_COLUMN] + OTHER_COMPARATOR_COLUMNS\n",
" if OTHER_COMPARATOR_COLUMNS\n",
" else [COMPARATOR_COLUMN]\n",
")\n",
"\n",
"#papermill_description=Loading_data\n",
"# papermill_description=Loading_data\n",
"joined_df = pd.concat(\n",
" [\n",
" utils.read_file(\n",
" file_path=SCORE_FILE,\n",
" columns=[TOTAL_POPULATION_COLUMN, SCORE_COLUMN] + ADDITIONAL_DEMO_COLUMNS,\n",
" columns=[TOTAL_POPULATION_COLUMN, SCORE_COLUMN]\n",
" + ADDITIONAL_DEMO_COLUMNS,\n",
" geoid=GEOID_COLUMN,\n",
" ),\n",
" utils.read_file(\n",
" file_path=COMPARATOR_FILE,\n",
" columns=comparator_cols,\n",
" geoid=GEOID_COLUMN\n",
" geoid=GEOID_COLUMN,\n",
" ),\n",
" utils.read_file(\n",
" file_path=DEMOGRAPHIC_FILE,\n",
@ -196,13 +199,13 @@
"metadata": {},
"outputs": [],
"source": [
"#papermill_description=Summary_stats\n",
"# papermill_description=Summary_stats\n",
"population_df = utils.produce_summary_stats(\n",
" joined_df=joined_df,\n",
" comparator_column=COMPARATOR_COLUMN,\n",
" score_column=SCORE_COLUMN,\n",
" population_column=TOTAL_POPULATION_COLUMN,\n",
" geoid_column=GEOID_COLUMN\n",
" geoid_column=GEOID_COLUMN,\n",
")\n",
"population_df"
]
@ -224,18 +227,18 @@
"metadata": {},
"outputs": [],
"source": [
"#papermill_description=Tract_stats\n",
"# papermill_description=Tract_stats\n",
"tract_level_by_identification_df = pd.concat(\n",
" [\n",
" utils.get_demo_series(\n",
" grouping_column=COMPARATOR_COLUMN,\n",
" joined_df=joined_df,\n",
" demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS\n",
" demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n",
" ),\n",
" utils.get_demo_series(\n",
" grouping_column=SCORE_COLUMN,\n",
" joined_df=joined_df,\n",
" demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS\n",
" demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n",
" ),\n",
" ],\n",
" axis=1,\n",
@ -256,17 +259,25 @@
" y=\"Variable\",\n",
" x=\"Avg in tracts\",\n",
" hue=\"Definition\",\n",
" data=tract_level_by_identification_df.sort_values(by=COMPARATOR_COLUMN, ascending=False)\n",
" data=tract_level_by_identification_df.sort_values(\n",
" by=COMPARATOR_COLUMN, ascending=False\n",
" )\n",
" .stack()\n",
" .reset_index()\n",
" .rename(\n",
" columns={\"level_0\": \"Variable\", \"level_1\": \"Definition\", 0: \"Avg in tracts\"}\n",
" columns={\n",
" \"level_0\": \"Variable\",\n",
" \"level_1\": \"Definition\",\n",
" 0: \"Avg in tracts\",\n",
" }\n",
" ),\n",
" palette=\"Blues\",\n",
")\n",
"plt.xlim(0, 1)\n",
"plt.title(\"Tract level averages by identification strategy\")\n",
"plt.savefig(os.path.join(OUTPUT_DATA_PATH, \"tract_lvl_avg.jpg\"), bbox_inches='tight')"
"plt.savefig(\n",
" os.path.join(OUTPUT_DATA_PATH, \"tract_lvl_avg.jpg\"), bbox_inches=\"tight\"\n",
")"
]
},
{
@ -276,13 +287,13 @@
"metadata": {},
"outputs": [],
"source": [
"#papermill_description=Tract_stats_grouped\n",
"# papermill_description=Tract_stats_grouped\n",
"tract_level_by_grouping_df = utils.get_tract_level_grouping(\n",
" joined_df=joined_df,\n",
" score_column=SCORE_COLUMN,\n",
" comparator_column=COMPARATOR_COLUMN,\n",
" demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n",
" keep_missing_values=KEEP_MISSING_VALUES_FOR_SEGMENTATION\n",
" keep_missing_values=KEEP_MISSING_VALUES_FOR_SEGMENTATION,\n",
")\n",
"\n",
"tract_level_by_grouping_formatted_df = utils.format_multi_index_for_excel(\n",
@ -315,7 +326,7 @@
"metadata": {},
"outputs": [],
"source": [
"#papermill_description=Population_stats\n",
"# papermill_description=Population_stats\n",
"population_weighted_stats_df = pd.concat(\n",
" [\n",
" utils.construct_weighted_statistics(\n",
@ -363,7 +374,7 @@
"comparator_and_cejst_proportion_series, states = utils.get_final_summary_info(\n",
" population=population_df,\n",
" comparator_file=COMPARATOR_FILE,\n",
" geoid_col=GEOID_COLUMN\n",
" geoid_col=GEOID_COLUMN,\n",
")"
]
},
@ -393,7 +404,7 @@
"metadata": {},
"outputs": [],
"source": [
"#papermill_description=Writing_excel\n",
"# papermill_description=Writing_excel\n",
"utils.write_single_comparison_excel(\n",
" output_excel=OUTPUT_EXCEL,\n",
" population_df=population_df,\n",
@ -401,7 +412,7 @@
" population_weighted_stats_df=population_weighted_stats_df,\n",
" tract_level_by_grouping_formatted_df=tract_level_by_grouping_formatted_df,\n",
" comparator_and_cejst_proportion_series=comparator_and_cejst_proportion_series,\n",
" states_text=states_text\n",
" states_text=states_text,\n",
")"
]
}

View file

@ -1,9 +1,9 @@
import pathlib
import pandas as pd
import xlsxwriter
from data_pipeline.score import field_names
from data_pipeline.etl.sources.census.etl_utils import get_state_information
from data_pipeline.score import field_names
# Some excel parameters
DEFAULT_COLUMN_WIDTH = 18

View file

@ -1,8 +1,7 @@
import pathlib
from dynaconf import Dynaconf
import data_pipeline
from dynaconf import Dynaconf
settings = Dynaconf(
envvar_prefix="DYNACONF",

View file

@ -1,7 +1,7 @@
# This is a temporary file. We should make sure this *type* of information is maintained when we refactor.
# This is a temporary file. We should make sure this *type* of information is maintained when we refactor.
fields:
- score_name: Total threshold criteria exceeded
notes: Lists out the total number of criteria (where each category has one or more criteria) exceeded. For example, a tract that exceeds the 90th percentile for linguistic isolation (1) and unemployment (2), and meets the training and workforce development socioeconomic criteria (high school attainment rate and low percentage of higher ed students) would have a 2 in this field.
notes: Lists out the total number of criteria (where each category has one or more criteria) exceeded. For example, a tract that exceeds the 90th percentile for linguistic isolation (1) and unemployment (2), and meets the training and workforce development socioeconomic criteria (high school attainment rate and low percentage of higher ed students) would have a 2 in this field.
- score_name: Definition M (communities)
notes: True / False variable for whether a tract is a Disadvantaged Community (DAC)
- score_name: Is low income and has a low percent of higher ed students?
@ -43,7 +43,7 @@ fields:
- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?
category: training and workforce development
- score_name: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?
category: training and workforce development
category: training and workforce development
- score_name: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?
category: training and workforce development
- score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?

View file

@ -427,7 +427,9 @@
}
],
"source": [
"for col in [col for col in download_codebook.index.to_list() if \"(percentile)\" in col]:\n",
"for col in [\n",
" col for col in download_codebook.index.to_list() if \"(percentile)\" in col\n",
"]:\n",
" print(f\" - column_name: {col}\")\n",
" if \"Low\" not in col:\n",
" print(\n",

View file

@ -1,6 +1,8 @@
from dataclasses import dataclass, field
from dataclasses import dataclass
from dataclasses import field
from enum import Enum
from typing import List, Optional
from typing import List
from typing import Optional
class FieldType(Enum):

View file

@ -5,18 +5,15 @@ import typing
from typing import Optional
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.score.etl_utils import (
compare_to_list_of_expected_state_fips_codes,
)
from data_pipeline.etl.score.schemas.datasets import DatasetsConfig
from data_pipeline.utils import (
load_yaml_dict_from_file,
unzip_file_from_url,
remove_all_from_dir,
get_module_logger,
)
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import load_yaml_dict_from_file
from data_pipeline.utils import remove_all_from_dir
from data_pipeline.utils import unzip_file_from_url
logger = get_module_logger(__name__)

View file

@ -1,5 +1,5 @@
import importlib
import concurrent.futures
import importlib
import typing
from data_pipeline.etl.score.etl_score import ScoreETL

View file

@ -81,7 +81,7 @@ datasets:
load_fields:
- short_name: "he_heat"
df_field_name: "EXTREME_HEAT_FIELD"
long_name: "Summer days above 90F"
long_name: "Summer days above 90F"
field_type: float
include_in_downloadable_files: true
include_in_tiles: true
@ -92,7 +92,7 @@ datasets:
include_in_downloadable_files: true
include_in_tiles: true
- short_name: "he_green"
long_name: "Percent impenetrable surface areas"
long_name: "Percent impenetrable surface areas"
df_field_name: "IMPENETRABLE_SURFACES_FIELD"
field_type: float
include_in_downloadable_files: true
@ -110,7 +110,7 @@ datasets:
load_fields:
- short_name: "EBP_PFS"
df_field_name: "REVISED_ENERGY_BURDEN_FIELD_NAME"
long_name: "Energy burden"
long_name: "Energy burden"
field_type: float
include_in_downloadable_files: true
include_in_tiles: true
@ -121,7 +121,7 @@ datasets:
- short_name: "fuds_count"
df_field_name: "ELIGIBLE_FUDS_COUNT_FIELD_NAME"
long_name: "Count of eligible Formerly Used Defense Site (FUDS) properties centroids"
description_short:
description_short:
"The number of FUDS marked as Eligible and Has Project in the tract."
field_type: int64
include_in_tiles: false
@ -129,7 +129,7 @@ datasets:
- short_name: "not_fuds_ct"
df_field_name: "INELIGIBLE_FUDS_COUNT_FIELD_NAME"
long_name: "Count of ineligible Formerly Used Defense Site (FUDS) properties centroids"
description_short:
description_short:
"The number of FUDS marked as Ineligible or Project in the tract."
field_type: int64
include_in_tiles: false
@ -137,7 +137,7 @@ datasets:
- short_name: "has_fuds"
df_field_name: "ELIGIBLE_FUDS_BINARY_FIELD_NAME"
long_name: "Is there at least one Formerly Used Defense Site (FUDS) in the tract?"
description_short:
description_short:
"Whether the tract has a FUDS"
field_type: bool
include_in_tiles: false
@ -149,7 +149,7 @@ datasets:
- short_name: "has_aml"
df_field_name: "AML_BOOLEAN"
long_name: "Is there at least one abandoned mine in this census tract?"
description_short:
description_short:
"Whether the tract has an abandoned mine"
field_type: bool
include_in_tiles: true
@ -161,7 +161,7 @@ datasets:
load_fields:
- short_name: "EXAMPLE_FIELD"
df_field_name: "Input Field 1"
long_name: "Example Field 1"
long_name: "Example Field 1"
field_type: float
include_in_tiles: true
include_in_downloadable_files: true
@ -172,35 +172,35 @@ datasets:
load_fields:
- short_name: "flood_eligible_properties"
df_field_name: "COUNT_PROPERTIES"
long_name: "Count of properties eligible for flood risk calculation within tract (floor of 250)"
long_name: "Count of properties eligible for flood risk calculation within tract (floor of 250)"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "flood_risk_properties_today"
df_field_name: "PROPERTIES_AT_RISK_FROM_FLOODING_TODAY"
long_name: "Count of properties at risk of flood today"
long_name: "Count of properties at risk of flood today"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "flood_risk_properties_30yrs"
df_field_name: "PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS"
long_name: "Count of properties at risk of flood in 30 years"
long_name: "Count of properties at risk of flood in 30 years"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "flood_risk_share_today"
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_TODAY"
long_name: "Share of properties at risk of flood today"
long_name: "Share of properties at risk of flood today"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: true
- short_name: "flood_risk_share_30yrs"
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS"
long_name: "Share of properties at risk of flood in 30 years"
long_name: "Share of properties at risk of flood in 30 years"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
@ -212,35 +212,35 @@ datasets:
load_fields:
- short_name: "fire_eligible_properties"
df_field_name: "COUNT_PROPERTIES"
long_name: "Count of properties eligible for wildfire risk calculation within tract (floor of 250)"
long_name: "Count of properties eligible for wildfire risk calculation within tract (floor of 250)"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "fire_risk_properties_today"
df_field_name: "PROPERTIES_AT_RISK_FROM_FIRE_TODAY"
long_name: "Count of properties at risk of wildfire today"
long_name: "Count of properties at risk of wildfire today"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "fire_risk_properties_30yrs"
df_field_name: "PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS"
long_name: "Count of properties at risk of wildfire in 30 years"
long_name: "Count of properties at risk of wildfire in 30 years"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "fire_risk_share_today"
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_TODAY"
long_name: "Share of properties at risk of fire today"
long_name: "Share of properties at risk of fire today"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: true
- short_name: "fire_risk_share_30yrs"
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS"
long_name: "Share of properties at risk of fire in 30 years"
long_name: "Share of properties at risk of fire in 30 years"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
@ -252,7 +252,7 @@ datasets:
load_fields:
- short_name: "travel_burden"
df_field_name: "TRAVEL_BURDEN_FIELD_NAME"
long_name: "DOT Travel Barriers Score"
long_name: "DOT Travel Barriers Score"
field_type: float
include_in_tiles: true
include_in_downloadable_files: true
@ -264,28 +264,28 @@ datasets:
load_fields:
- short_name: "ncld_eligible"
df_field_name: "ELIGIBLE_FOR_NATURE_DEPRIVED_FIELD_NAME"
long_name: "Does the tract have at least 35 acres in it?"
long_name: "Does the tract have at least 35 acres in it?"
field_type: bool
include_in_tiles: true
include_in_downloadable_files: true
create_percentile: false
- short_name: "percent_impervious"
df_field_name: "TRACT_PERCENT_IMPERVIOUS_FIELD_NAME"
long_name: "Share of the tract's land area that is covered by impervious surface as a percent"
long_name: "Share of the tract's land area that is covered by impervious surface as a percent"
field_type: percentage
include_in_tiles: true
include_in_downloadable_files: true
create_percentile: true
- short_name: "percent_nonnatural"
df_field_name: "TRACT_PERCENT_NON_NATURAL_FIELD_NAME"
long_name: "Share of the tract's land area that is covered by impervious surface or cropland as a percent"
long_name: "Share of the tract's land area that is covered by impervious surface or cropland as a percent"
field_type: percentage
include_in_tiles: true
include_in_downloadable_files: true
create_percentile: true
- short_name: "percent_cropland"
df_field_name: "TRACT_PERCENT_CROPLAND_FIELD_NAME"
long_name: "Share of the tract's land area that is covered by cropland as a percent"
long_name: "Share of the tract's land area that is covered by cropland as a percent"
field_type: percentage
include_in_tiles: true
include_in_downloadable_files: true
@ -328,4 +328,4 @@ datasets:
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
create_reverse_percentile: true
create_reverse_percentile: true

View file

@ -1,9 +1,8 @@
import datetime
import os
from pathlib import Path
import datetime
from data_pipeline.config import settings
from data_pipeline.score import field_names
## note: to keep map porting "right" fields, keeping descriptors the same.

View file

@ -1,31 +1,26 @@
import functools
from typing import List
from dataclasses import dataclass
from typing import List
import numpy as np
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.score import constants
from data_pipeline.etl.sources.census_acs.etl import CensusACSETL
from data_pipeline.etl.sources.national_risk_index.etl import (
NationalRiskIndexETL,
)
from data_pipeline.etl.sources.dot_travel_composite.etl import (
TravelCompositeETL,
)
from data_pipeline.etl.sources.fsf_flood_risk.etl import (
FloodRiskETL,
)
from data_pipeline.etl.sources.eamlis.etl import AbandonedMineETL
from data_pipeline.etl.sources.fsf_flood_risk.etl import FloodRiskETL
from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
from data_pipeline.etl.sources.national_risk_index.etl import (
NationalRiskIndexETL,
)
from data_pipeline.etl.sources.nlcd_nature_deprived.etl import NatureDeprivedETL
from data_pipeline.etl.sources.tribal_overlap.etl import TribalOverlapETL
from data_pipeline.etl.sources.us_army_fuds.etl import USArmyFUDS
from data_pipeline.etl.sources.nlcd_nature_deprived.etl import NatureDeprivedETL
from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
from data_pipeline.score.score_runner import ScoreRunner
from data_pipeline.score import field_names
from data_pipeline.etl.score import constants
from data_pipeline.score.score_runner import ScoreRunner
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)
@ -699,7 +694,9 @@ class ScoreETL(ExtractTransformLoad):
self.df = self._backfill_island_demographics(self.df)
def load(self) -> None:
logger.info("Saving Score CSV")
logger.info(
f"Saving Score CSV to {constants.DATA_SCORE_CSV_FULL_FILE_PATH}."
)
constants.DATA_SCORE_CSV_FULL_DIR.mkdir(parents=True, exist_ok=True)
self.df.to_csv(constants.DATA_SCORE_CSV_FULL_FILE_PATH, index=False)

View file

@ -1,24 +1,20 @@
import concurrent.futures
import math
import os
import geopandas as gpd
import numpy as np
import pandas as pd
import geopandas as gpd
from data_pipeline.content.schemas.download_schemas import CSVConfig
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.score import constants
from data_pipeline.etl.sources.census.etl_utils import (
check_census_data_source,
)
from data_pipeline.etl.score.etl_utils import check_score_data_source
from data_pipeline.etl.sources.census.etl_utils import check_census_data_source
from data_pipeline.score import field_names
from data_pipeline.content.schemas.download_schemas import CSVConfig
from data_pipeline.utils import (
get_module_logger,
zip_files,
load_yaml_dict_from_file,
load_dict_from_yaml_object_fields,
)
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import load_dict_from_yaml_object_fields
from data_pipeline.utils import load_yaml_dict_from_file
from data_pipeline.utils import zip_files
logger = get_module_logger(__name__)

View file

@ -1,29 +1,23 @@
from pathlib import Path
import json
from numpy import float64
from pathlib import Path
import numpy as np
import pandas as pd
from data_pipeline.content.schemas.download_schemas import (
CSVConfig,
CodebookConfig,
ExcelConfig,
)
from data_pipeline.content.schemas.download_schemas import CodebookConfig
from data_pipeline.content.schemas.download_schemas import CSVConfig
from data_pipeline.content.schemas.download_schemas import ExcelConfig
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.score.etl_utils import floor_series, create_codebook
from data_pipeline.utils import (
get_module_logger,
zip_files,
load_yaml_dict_from_file,
column_list_from_yaml_object_fields,
load_dict_from_yaml_object_fields,
)
from data_pipeline.etl.score.etl_utils import create_codebook
from data_pipeline.etl.score.etl_utils import floor_series
from data_pipeline.etl.sources.census.etl_utils import check_census_data_source
from data_pipeline.score import field_names
from data_pipeline.utils import column_list_from_yaml_object_fields
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import load_dict_from_yaml_object_fields
from data_pipeline.utils import load_yaml_dict_from_file
from data_pipeline.utils import zip_files
from numpy import float64
from data_pipeline.etl.sources.census.etl_utils import (
check_census_data_source,
)
from . import constants
logger = get_module_logger(__name__)

View file

@ -1,24 +1,21 @@
import os
import sys
import typing
from pathlib import Path
from collections import namedtuple
from pathlib import Path
import numpy as np
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.score.constants import (
TILES_ISLAND_AREA_FIPS_CODES,
TILES_PUERTO_RICO_FIPS_CODE,
TILES_CONTINENTAL_US_FIPS_CODE,
TILES_ALASKA_AND_HAWAII_FIPS_CODE,
)
from data_pipeline.etl.score.constants import TILES_ALASKA_AND_HAWAII_FIPS_CODE
from data_pipeline.etl.score.constants import TILES_CONTINENTAL_US_FIPS_CODE
from data_pipeline.etl.score.constants import TILES_ISLAND_AREA_FIPS_CODES
from data_pipeline.etl.score.constants import TILES_PUERTO_RICO_FIPS_CODE
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
from data_pipeline.utils import (
download_file_from_url,
get_module_logger,
)
from data_pipeline.score import field_names
from data_pipeline.utils import download_file_from_url
from data_pipeline.utils import get_module_logger
from . import constants
logger = get_module_logger(__name__)
@ -99,7 +96,7 @@ def floor_series(series: pd.Series, number_of_decimals: int) -> pd.Series:
if series.isin(unacceptable_values).any():
series.replace(mapping, regex=False, inplace=True)
multiplication_factor = 10 ** number_of_decimals
multiplication_factor = 10**number_of_decimals
# In order to safely cast NaNs
# First coerce series to float type: series.astype(float)

View file

@ -1,6 +1,8 @@
from dataclasses import dataclass, field
from dataclasses import dataclass
from dataclasses import field
from enum import Enum
from typing import List, Optional
from typing import List
from typing import Optional
class FieldType(Enum):

View file

@ -5,7 +5,8 @@ from pathlib import Path
import pandas as pd
import pytest
from data_pipeline import config
from data_pipeline.etl.score import etl_score_post, tests
from data_pipeline.etl.score import etl_score_post
from data_pipeline.etl.score import tests
from data_pipeline.etl.score.etl_score_post import PostScoreETL

View file

@ -1,4 +1,4 @@
fips,state_name,state_abbreviation,region,division
01,Alabama,AL,South,East South Central
02,Alaska,AK,West,Pacific
04,Arizona,AZ,West,Mountain
04,Arizona,AZ,West,Mountain

1 fips state_name state_abbreviation region division
2 01 Alabama AL South East South Central
3 02 Alaska AK West Pacific
4 04 Arizona AZ West Mountain

View file

@ -1,11 +1,10 @@
import pandas as pd
import numpy as np
import pandas as pd
import pytest
from data_pipeline.etl.score.etl_utils import (
floor_series,
compare_to_list_of_expected_state_fips_codes,
)
from data_pipeline.etl.score.etl_utils import floor_series
def test_floor_series():

View file

@ -1,14 +1,11 @@
# pylint: disable=W0212
## Above disables warning about access to underscore-prefixed methods
from importlib import reload
from pathlib import Path
import pandas.api.types as ptypes
import pandas.testing as pdt
from data_pipeline.content.schemas.download_schemas import (
CSVConfig,
)
from data_pipeline.content.schemas.download_schemas import CSVConfig
from data_pipeline.etl.score import constants
from data_pipeline.utils import load_yaml_dict_from_file

View file

@ -1,8 +1,7 @@
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.config import settings
logger = get_module_logger(__name__)

View file

@ -1,13 +1,15 @@
import pathlib
from pathlib import Path
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.etl.score.etl_utils import (
compare_to_list_of_expected_state_fips_codes,
)
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger, download_file_from_url
from data_pipeline.utils import download_file_from_url
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,9 +1,11 @@
import typing
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.utils import get_module_logger, download_file_from_url
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.score import field_names
from data_pipeline.utils import download_file_from_url
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -53,7 +53,7 @@ For SVI 2018, the authors also included two adjunct variables, 1) 2014-2018 ACS
**Important Notes**
1. Tracts with zero estimates for the total population (N = 645 for the U.S.) were removed during the ranking process. These tracts were added back to the SVI databases after ranking.
1. Tracts with zero estimates for the total population (N = 645 for the U.S.) were removed during the ranking process. These tracts were added back to the SVI databases after ranking.
2. The TOTPOP field value is 0, but the percentile ranking fields (RPL_THEME1, RPL_THEME2, RPL_THEME3, RPL_THEME4, and RPL_THEMES) were set to -999.
@ -66,4 +66,4 @@ here: https://www.census.gov/programs-surveys/acs/data/variance-tables.html.
For selected ACS 5-year Detailed Tables, “Users can calculate margins of error for aggregated data by using the variance replicates. Unlike available approximation formulas, this method results in an exact margin of error by using the covariance term.”
MOEs are _not_ included nor considered during this data processing nor for the scoring comparison tool.
MOEs are _not_ included nor considered during this data processing nor for the scoring comparison tool.

View file

@ -1,9 +1,8 @@
import pandas as pd
import numpy as np
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -3,12 +3,12 @@ import json
import subprocess
from enum import Enum
from pathlib import Path
import geopandas as gpd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger, unzip_file_from_url
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import unzip_file_from_url
logger = get_module_logger(__name__)

View file

@ -5,13 +5,11 @@ from pathlib import Path
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.utils import (
get_module_logger,
remove_all_dirs_from_dir,
remove_files_from_dir,
unzip_file_from_url,
zip_directory,
)
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import remove_all_dirs_from_dir
from data_pipeline.utils import remove_files_from_dir
from data_pipeline.utils import unzip_file_from_url
from data_pipeline.utils import zip_directory
logger = get_module_logger(__name__)

View file

@ -1,19 +1,19 @@
from collections import namedtuple
import os
import pandas as pd
import geopandas as gpd
from collections import namedtuple
import geopandas as gpd
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.sources.census_acs.etl_utils import (
retrieve_census_acs_data,
)
from data_pipeline.etl.sources.census_acs.etl_imputations import (
calculate_income_measures,
)
from data_pipeline.utils import get_module_logger, unzip_file_from_url
from data_pipeline.etl.sources.census_acs.etl_utils import (
retrieve_census_acs_data,
)
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import unzip_file_from_url
logger = get_module_logger(__name__)

View file

@ -1,7 +1,10 @@
from typing import Any, List, NamedTuple, Tuple
import pandas as pd
import geopandas as gpd
from typing import Any
from typing import List
from typing import NamedTuple
from typing import Tuple
import geopandas as gpd
import pandas as pd
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger

View file

@ -1,10 +1,9 @@
import os
from pathlib import Path
from typing import List
import censusdata
import pandas as pd
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
from data_pipeline.utils import get_module_logger

View file

@ -1,11 +1,10 @@
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.sources.census_acs.etl_utils import (
retrieve_census_acs_data,
)
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,13 +1,14 @@
import json
from pathlib import Path
import numpy as np
import pandas as pd
import requests
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.config import settings
from data_pipeline.utils import unzip_file_from_url, download_file_from_url
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import download_file_from_url
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import unzip_file_from_url
logger = get_module_logger(__name__)

View file

@ -1,14 +1,13 @@
import json
from typing import List
import requests
import numpy as np
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
import requests
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger
pd.options.mode.chained_assignment = "raise"

View file

@ -1,7 +1,8 @@
from pathlib import Path
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,8 +1,9 @@
from pathlib import Path
import pandas as pd
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,6 +1,6 @@
# DOT travel barriers
The below description is taken from DOT directly:
The below description is taken from DOT directly:
Consistent with OMBs Interim Guidance for the Justice40 Initiative, DOTs interim definition of DACs includes (a) certain qualifying census tracts, (b) any Tribal land, or (c) any territory or possession of the United States. DOT has provided a mapping tool to assist applicants in identifying whether a project is located in a Disadvantaged Community, available at Transportation Disadvantaged Census Tracts (arcgis.com). A shapefile of the geospatial data is available Transportation Disadvantaged Census Tracts shapefile (version 2 .0, posted 5/10/22).
@ -13,4 +13,4 @@ The DOT interim definition for DACs was developed by an internal and external co
Resilience disadvantage identifies communities vulnerable to hazards caused by climate change. (1)
- Equity disadvantage identifies communities with a with a high percentile of persons (age 5+) who speak English "less than well." (1)
The CEJST uses only Transportation Access Disadvantage.
The CEJST uses only Transportation Access Disadvantage.

View file

@ -1,10 +1,9 @@
# pylint: disable=unsubscriptable-object
# pylint: disable=unsupported-assignment-operation
import pandas as pd
import geopandas as gpd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,4 +1,4 @@
The following is the description from eAMLIS as of August 16, 2022.
The following is the description from eAMLIS as of August 16, 2022.
---
e-AMLIS is not a comprehensive database of all AML features or all AML grant activities. e-AMLIS is a national inventory that provides information about known abandoned mine land (AML) features including polluted waters. The majority of the data in e-AMLIS provides information about known coal AML features for the 25 states and 3 tribal SMCRA-approved AML Programs. e-AMLIS also provides limited information on non-coal AML features, and, non-coal reclamation projects as well as AML features for states and tribes that do not have an approved AML Program. Additionally, e-AMLIS only accounts for the direct construction cost to reclaim each AML feature that has been identified by states and Tribes. Other project costs such as planning, design, permitting, and construction oversight are not tracked in e-AMLIS.

View file

@ -1,9 +1,10 @@
from pathlib import Path
import geopandas as gpd
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
from data_pipeline.utils import get_module_logger

View file

@ -1,6 +1,6 @@
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger

View file

@ -1,5 +1,4 @@
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
@ -58,7 +57,6 @@ class EJSCREENAreasOfConcernETL(ExtractTransformLoad):
# TO DO: As a one off we did all the processing in a separate Notebook
# Can add here later for a future PR
pass
def load(self) -> None:
if self.ejscreen_areas_of_concern_data_exists():

View file

@ -1,10 +1,11 @@
from pathlib import Path
import pandas as pd
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger, unzip_file_from_url
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import unzip_file_from_url
logger = get_module_logger(__name__)

View file

@ -1,9 +1,10 @@
from pathlib import Path
import pandas as pd
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger, unzip_file_from_url
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import unzip_file_from_url
logger = get_module_logger(__name__)

View file

@ -1,3 +1,3 @@
# FSF flood risk data
Flood risk computed as 1 in 100 year flood zone
Flood risk computed as 1 in 100 year flood zone

View file

@ -1,10 +1,9 @@
# pylint: disable=unsubscriptable-object
# pylint: disable=unsupported-assignment-operation
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,3 +1,3 @@
# FSF wildfire risk data
Fire risk computed as >= 0.003 burn risk probability
Fire risk computed as >= 0.003 burn risk probability

View file

@ -1,10 +1,9 @@
# pylint: disable=unsubscriptable-object
# pylint: disable=unsupported-assignment-operation
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,11 +1,12 @@
"""Utililities for turning geographies into tracts, using census data"""
from functools import lru_cache
from pathlib import Path
from typing import Optional
from functools import lru_cache
import geopandas as gpd
from data_pipeline.etl.sources.tribal.etl import TribalETL
from data_pipeline.utils import get_module_logger
from .census.etl import CensusETL
logger = get_module_logger(__name__)

View file

@ -1,11 +1,9 @@
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.utils import (
get_module_logger,
unzip_file_from_url,
)
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import unzip_file_from_url
logger = get_module_logger(__name__)

View file

@ -1,8 +1,8 @@
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.utils import get_module_logger
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,9 +1,9 @@
import pandas as pd
from pandas.errors import EmptyDataError
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
from data_pipeline.utils import get_module_logger, unzip_file_from_url
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import unzip_file_from_url
from pandas.errors import EmptyDataError
logger = get_module_logger(__name__)

View file

@ -1,5 +1,6 @@
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,9 +1,8 @@
import pandas as pd
import requests
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.config import settings
logger = get_module_logger(__name__)

View file

@ -1,10 +1,9 @@
import pandas as pd
import geopandas as gpd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)
@ -96,4 +95,3 @@ class MappingForEJETL(ExtractTransformLoad):
def validate(self) -> None:
logger.info("Validating Mapping For EJ Data")
pass

View file

@ -37,4 +37,4 @@ Oklahoma City,90R,D
Milwaukee Co.,S-D1,D
Milwaukee Co.,S-D2,D
Milwaukee Co.,S-D3,D
Milwaukee Co.,S-D4,D
Milwaukee Co.,S-D4,D

1 city holc_id HOLC Grade (manually mapped)
37 Milwaukee Co. S-D1 D
38 Milwaukee Co. S-D2 D
39 Milwaukee Co. S-D3 D
40 Milwaukee Co. S-D4 D

View file

@ -1,10 +1,11 @@
import pathlib
import numpy as np
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.score import field_names
from data_pipeline.utils import download_file_from_url, get_module_logger
from data_pipeline.utils import download_file_from_url
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -8,7 +8,7 @@ According to the documentation:
There exist two data categories: Population Burden and Population Characteristics.
There are two indicators within Population Burden: Exposure, and Socioeconomic. Within Population Characteristics, there exist two indicators: Sensitive, Environmental Effects. Each respective indicator contains several relevant covariates, and an averaged score.
There are two indicators within Population Burden: Exposure, and Socioeconomic. Within Population Characteristics, there exist two indicators: Sensitive, Environmental Effects. Each respective indicator contains several relevant covariates, and an averaged score.
The two "Pollution Burden" average scores are then averaged together and the result is multiplied by the average of the "Population Characteristics" categories to get the total EJ Score for each tract.
@ -20,4 +20,4 @@ Furthermore, it was determined that Bladensburg residents are at a higher risk o
Source:
Driver, A.; Mehdizadeh, C.; Bara-Garcia, S.; Bodenreider, C.; Lewis, J.; Wilson, S. Utilization of the Maryland Environmental Justice Screening Tool: A Bladensburg, Maryland Case Study. Int. J. Environ. Res. Public Health 2019, 16, 348.
Driver, A.; Mehdizadeh, C.; Bara-Garcia, S.; Bodenreider, C.; Lewis, J.; Wilson, S. Utilization of the Maryland Environmental Justice Screening Tool: A Bladensburg, Maryland Case Study. Int. J. Environ. Res. Public Health 2019, 16, 348.

View file

@ -1,11 +1,11 @@
from glob import glob
import geopandas as gpd
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -29,4 +29,4 @@ Sources:
* Minnesota Pollution Control Agency. (2015, December 15). Environmental Justice Framework Report.
Retrieved from https://www.pca.state.mn.us/sites/default/files/p-gen5-05.pdf.
* Faust, J., L. August, K. Bangia, V. Galaviz, J. Leichty, S. Prasad… and L. Zeise. (2017, January). Update to the California Communities Environmental Health Screening Tool CalEnviroScreen 3.0. Retrieved from OEHHA website: https://oehha.ca.gov/media/downloads/calenviroscreen/report/ces3report.pdf
* Faust, J., L. August, K. Bangia, V. Galaviz, J. Leichty, S. Prasad… and L. Zeise. (2017, January). Update to the California Communities Environmental Health Screening Tool CalEnviroScreen 3.0. Retrieved from OEHHA website: https://oehha.ca.gov/media/downloads/calenviroscreen/report/ces3report.pdf

View file

@ -1,9 +1,8 @@
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -2,10 +2,9 @@
# but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498
# pylint: disable=unsubscriptable-object
# pylint: disable=unsupported-assignment-operation
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -2,7 +2,7 @@
The following dataset was compiled by TPL (Trust for Public Lands) using NCLD data. We define as: AREA - [CROPLAND] - [IMPERVIOUS SURFACES].
## Codebook
## Codebook
- GEOID10 Census tract ID
- SF State Name
- CF County Name
@ -13,7 +13,7 @@ The following dataset was compiled by TPL (Trust for Public Lands) using NCLD da
- AcresCrops Acres crops calculated by summing all cells in the NLCD Cropland Data Layer crop classes.
- PctCrops Formula: AcresCrops/TractAcres*100.
- PctImperv Mean imperviousness for each census tract.
- CAVEAT: Where tracts extend into open water, mean imperviousness may be underestimated.
- CAVEAT: Where tracts extend into open water, mean imperviousness may be underestimated.
- __TO USE__ PctNatural Formula: 100 PctCrops PctImperv.
- PctNat90 Tract in or below 10th percentile for PctNatural. 1 = True, 0 = False.
- PctNatural 10th percentile = 28.6439%
@ -24,7 +24,7 @@ The following dataset was compiled by TPL (Trust for Public Lands) using NCLD da
- P200_PFS 65th percentile = 64.0%
- NatureDep ImpOrCrp = 1 AND LowInAndEd = 1.
We added `GEOID10_TRACT` before converting shapefile to csv.
We added `GEOID10_TRACT` before converting shapefile to csv.
## Instructions to recreate

View file

@ -1,10 +1,9 @@
# pylint: disable=unsubscriptable-object
# pylint: disable=unsupported-assignment-operation
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -1,12 +1,11 @@
import functools
import pandas as pd
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.utils import (
get_module_logger,
unzip_file_from_url,
)
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import unzip_file_from_url
logger = get_module_logger(__name__)

View file

@ -1,11 +1,12 @@
from pathlib import Path
import geopandas as gpd
import pandas as pd
from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger, unzip_file_from_url
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import unzip_file_from_url
logger = get_module_logger(__name__)

View file

@ -1,11 +1,8 @@
from pathlib import Path
from data_pipeline.utils import (
get_module_logger,
remove_all_from_dir,
remove_files_from_dir,
)
from data_pipeline.utils import get_module_logger
from data_pipeline.utils import remove_all_from_dir
from data_pipeline.utils import remove_files_from_dir
logger = get_module_logger(__name__)

View file

@ -1,12 +1,11 @@
import geopandas as gpd
import numpy as np
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.sources.geo_utils import (
add_tracts_for_geometries,
get_tribal_geojson,
get_tract_geojson,
)
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
from data_pipeline.etl.sources.geo_utils import get_tract_geojson
from data_pipeline.etl.sources.geo_utils import get_tribal_geojson
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger

View file

@ -1,11 +1,13 @@
from pathlib import Path
import geopandas as gpd
import pandas as pd
import numpy as np
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.utils import get_module_logger, download_file_from_url
import geopandas as gpd
import numpy as np
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
from data_pipeline.utils import download_file_from_url
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)

View file

@ -211,7 +211,9 @@
}
],
"source": [
"tmp = sns.FacetGrid(data=score_m, col=\"Urban Heuristic Flag\", col_wrap=2, height=7)\n",
"tmp = sns.FacetGrid(\n",
" data=score_m, col=\"Urban Heuristic Flag\", col_wrap=2, height=7\n",
")\n",
"tmp.map(\n",
" sns.distplot,\n",
" \"Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)\",\n",
@ -250,7 +252,9 @@
")\n",
"\n",
"nri_with_flag[\"total_ag_loss\"] = nri_with_flag.filter(like=\"EALA\").sum(axis=1)\n",
"nri_with_flag[\"total_ag_loss_pctile\"] = nri_with_flag[\"total_ag_loss\"].rank(pct=True)\n",
"nri_with_flag[\"total_ag_loss_pctile\"] = nri_with_flag[\"total_ag_loss\"].rank(\n",
" pct=True\n",
")\n",
"\n",
"nri_with_flag.groupby(\"Urban Heuristic Flag\")[\"total_ag_loss_pctile\"].mean()"
]
@ -779,9 +783,9 @@
" \"Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?\"\n",
"].astype(int)\n",
"\n",
"score_m_adjusted_tracts = set(score_m[score_m[\"adjusted\"] > 0][\"GEOID10_TRACT\"]).union(\n",
" all_ag_loss_tracts\n",
")\n",
"score_m_adjusted_tracts = set(\n",
" score_m[score_m[\"adjusted\"] > 0][\"GEOID10_TRACT\"]\n",
").union(all_ag_loss_tracts)\n",
"display(len(set(all_scorem_tracts).difference(score_m_adjusted_tracts)))"
]
},
@ -832,7 +836,11 @@
" left_clip = nri_with_flag[nri_with_flag[\"Urban Heuristic Flag\"] == 0][\n",
" \"AGRIVALUE\"\n",
" ].quantile(threshold)\n",
" print(\"At threshold {:.2f}, minimum value is ${:,.0f}\".format(threshold, left_clip))\n",
" print(\n",
" \"At threshold {:.2f}, minimum value is ${:,.0f}\".format(\n",
" threshold, left_clip\n",
" )\n",
" )\n",
" tmp_value = nri_with_flag[\"AGRIVALUE\"].clip(lower=left_clip)\n",
" nri_with_flag[\"total_ag_loss_pctile_{:.2f}\".format(threshold)] = (\n",
" nri_with_flag[\"total_ag_loss\"] / tmp_value\n",
@ -889,7 +897,9 @@
" .set_index(\"Left clip value\")[[\"Rural\", \"Urban\"]]\n",
" .stack()\n",
" .reset_index()\n",
" .rename(columns={\"level_1\": \"Tract classification\", 0: \"Average percentile\"})\n",
" .rename(\n",
" columns={\"level_1\": \"Tract classification\", 0: \"Average percentile\"}\n",
" )\n",
")"
]
},

View file

@ -21,6 +21,7 @@
"source": [
"import os\n",
"import sys\n",
"\n",
"module_path = os.path.abspath(os.path.join(\"../..\"))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)"
@ -94,9 +95,13 @@
"bia_aian_supplemental_geojson = (\n",
" GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_AIAN_Supplemental.json\"\n",
")\n",
"bia_tsa_geojson_geojson = GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_TSA.json\"\n",
"bia_tsa_geojson_geojson = (\n",
" GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_TSA.json\"\n",
")\n",
"alaska_native_villages_geojson = (\n",
" GEOJSON_BASE_PATH / \"alaska_native_villages\" / \"AlaskaNativeVillages.gdb.geojson\"\n",
" GEOJSON_BASE_PATH\n",
" / \"alaska_native_villages\"\n",
" / \"AlaskaNativeVillages.gdb.geojson\"\n",
")"
]
},
@ -131,7 +136,9 @@
"len(\n",
" sorted(\n",
" list(\n",
" bia_national_lar_df.LARName.str.replace(r\"\\(.*\\) \", \"\", regex=True).unique()\n",
" bia_national_lar_df.LARName.str.replace(\n",
" r\"\\(.*\\) \", \"\", regex=True\n",
" ).unique()\n",
" )\n",
" )\n",
")"

View file

@ -45,6 +45,7 @@
"source": [
"# Read in the score geojson file\n",
"from data_pipeline.etl.score.constants import DATA_SCORE_CSV_TILES_FILE_PATH\n",
"\n",
"nation = gpd.read_file(DATA_SCORE_CSV_TILES_FILE_PATH)"
]
},
@ -93,10 +94,14 @@
" random_tile_features = json.loads(f.read())\n",
"\n",
"# Flatten data around the features key:\n",
"flatten_features = pd.json_normalize(random_tile_features, record_path=[\"features\"])\n",
"flatten_features = pd.json_normalize(\n",
" random_tile_features, record_path=[\"features\"]\n",
")\n",
"\n",
"# index into the feature properties, get keys and turn into a sorted list\n",
"random_tile = sorted(list(flatten_features[\"features\"][0][0][\"properties\"].keys()))"
"random_tile = sorted(\n",
" list(flatten_features[\"features\"][0][0][\"properties\"].keys())\n",
")"
]
},
{
@ -291,8 +296,8 @@
}
],
"source": [
"nation_HRS_GEO = nation[['GEOID10', 'SF', 'CF', 'HRS_ET', 'AML_ET', 'FUDS_ET']]\n",
"nation_HRS_GEO.loc[nation_HRS_GEO['FUDS_ET'] == '0']"
"nation_HRS_GEO = nation[[\"GEOID10\", \"SF\", \"CF\", \"HRS_ET\", \"AML_ET\", \"FUDS_ET\"]]\n",
"nation_HRS_GEO.loc[nation_HRS_GEO[\"FUDS_ET\"] == \"0\"]"
]
},
{
@ -321,7 +326,7 @@
}
],
"source": [
"nation['HRS_ET'].unique()"
"nation[\"HRS_ET\"].unique()"
]
}
],

View file

@ -163,4 +163,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

View file

@ -1,9 +1,6 @@
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

View file

@ -18,7 +18,10 @@
" sys.path.append(module_path)\n",
"\n",
"from data_pipeline.config import settings\n",
"from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries, get_tract_geojson\n"
"from data_pipeline.etl.sources.geo_utils import (\n",
" add_tracts_for_geometries,\n",
" get_tract_geojson,\n",
")"
]
},
{
@ -655,9 +658,9 @@
}
],
"source": [
"adjacent_tracts.groupby(\"ORIGINAL_TRACT\")[[\"included\"]].mean().reset_index().rename(\n",
" columns={\"ORIGINAL_TRACT\": \"GEOID10_TRACT\"}\n",
")"
"adjacent_tracts.groupby(\"ORIGINAL_TRACT\")[\n",
" [\"included\"]\n",
"].mean().reset_index().rename(columns={\"ORIGINAL_TRACT\": \"GEOID10_TRACT\"})"
]
},
{

View file

@ -65,7 +65,8 @@
"tmp_path.mkdir(parents=True, exist_ok=True)\n",
"\n",
"eamlis_path_in_s3 = (\n",
" settings.AWS_JUSTICE40_DATASOURCES_URL + \"/eAMLIS export of all data.tsv.zip\"\n",
" settings.AWS_JUSTICE40_DATASOURCES_URL\n",
" + \"/eAMLIS export of all data.tsv.zip\"\n",
")\n",
"\n",
"unzip_file_from_url(\n",

View file

@ -460,7 +460,9 @@
"outputs": [],
"source": [
"object_ids_to_keep = set(\n",
" merged_exaple_data[merged_exaple_data[\"_merge\"] == \"both\"].OBJECTID.astype(\"int\")\n",
" merged_exaple_data[merged_exaple_data[\"_merge\"] == \"both\"].OBJECTID.astype(\n",
" \"int\"\n",
" )\n",
")\n",
"features = []\n",
"for feature in raw_fuds_geojson[\"features\"]:\n",
@ -476,7 +478,11 @@
"outputs": [],
"source": [
"def make_fake_feature(\n",
" state: str, has_projects: bool, is_eligible: bool, latitude: float, longitude: float\n",
" state: str,\n",
" has_projects: bool,\n",
" is_eligible: bool,\n",
" latitude: float,\n",
" longitude: float,\n",
"):\n",
" \"\"\"For tracts where we don't have a FUDS, fake one.\"\"\"\n",
" make_fake_feature._object_id += 1\n",
@ -537,7 +543,9 @@
"# Create FUDS in CA for each tract that doesn't have a FUDS\n",
"for tract_id, point in points.items():\n",
" for bools in [(True, True), (True, False), (False, False)]:\n",
" features.append(make_fake_feature(\"CA\", bools[0], bools[1], point.y, point.x))"
" features.append(\n",
" make_fake_feature(\"CA\", bools[0], bools[1], point.y, point.x)\n",
" )"
]
},
{
@ -596,9 +604,9 @@
}
],
"source": [
"test_frame_with_tracts_full = test_frame_with_tracts = add_tracts_for_geometries(\n",
" test_frame\n",
")"
"test_frame_with_tracts_full = (\n",
" test_frame_with_tracts\n",
") = add_tracts_for_geometries(test_frame)"
]
},
{
@ -680,7 +688,9 @@
}
],
"source": [
"tracts = test_frame_with_tracts_full[[\"GEOID10_TRACT\", \"geometry\"]].drop_duplicates()\n",
"tracts = test_frame_with_tracts_full[\n",
" [\"GEOID10_TRACT\", \"geometry\"]\n",
"].drop_duplicates()\n",
"tracts[\"lat_long\"] = test_frame_with_tracts_full.geometry.apply(\n",
" lambda point: (point.x, point.y)\n",
")\n",

View file

@ -13,7 +13,7 @@
"import geopandas as gpd\n",
"\n",
"# Read in the above json file\n",
"nation=gpd.read_file(\"/Users/vims/Downloads/usa-high-1822-637b.json\")"
"nation = gpd.read_file(\"/Users/vims/Downloads/usa-high-1822-637b.json\")"
]
},
{
@ -45,7 +45,7 @@
}
],
"source": [
"nation['FUDS_RAW']"
"nation[\"FUDS_RAW\"]"
]
},
{
@ -248,7 +248,18 @@
}
],
"source": [
"nation_new_ind = nation[['GEOID10', 'SF', 'CF', 'HRS_ET', 'AML_ET', 'AML_RAW','FUDS_ET', 'FUDS_RAW']]\n",
"nation_new_ind = nation[\n",
" [\n",
" \"GEOID10\",\n",
" \"SF\",\n",
" \"CF\",\n",
" \"HRS_ET\",\n",
" \"AML_ET\",\n",
" \"AML_RAW\",\n",
" \"FUDS_ET\",\n",
" \"FUDS_RAW\",\n",
" ]\n",
"]\n",
"nation_new_ind"
]
},
@ -270,7 +281,7 @@
}
],
"source": [
"nation_new_ind['HRS_ET'].unique()"
"nation_new_ind[\"HRS_ET\"].unique()"
]
},
{
@ -293,7 +304,7 @@
}
],
"source": [
"nation_new_ind['HRS_ET'].value_counts()"
"nation_new_ind[\"HRS_ET\"].value_counts()"
]
},
{
@ -314,7 +325,7 @@
}
],
"source": [
"nation_new_ind['AML_ET'].unique()"
"nation_new_ind[\"AML_ET\"].unique()"
]
},
{
@ -337,7 +348,7 @@
}
],
"source": [
"nation_new_ind['AML_ET'].value_counts()"
"nation_new_ind[\"AML_ET\"].value_counts()"
]
},
{
@ -358,7 +369,7 @@
}
],
"source": [
"nation_new_ind['AML_RAW'].unique()"
"nation_new_ind[\"AML_RAW\"].unique()"
]
},
{
@ -380,7 +391,7 @@
}
],
"source": [
"nation_new_ind['AML_RAW'].value_counts()"
"nation_new_ind[\"AML_RAW\"].value_counts()"
]
},
{
@ -401,7 +412,7 @@
}
],
"source": [
"nation_new_ind['FUDS_ET'].unique()"
"nation_new_ind[\"FUDS_ET\"].unique()"
]
},
{
@ -424,7 +435,7 @@
}
],
"source": [
"nation_new_ind['FUDS_ET'].value_counts()"
"nation_new_ind[\"FUDS_ET\"].value_counts()"
]
},
{
@ -445,7 +456,7 @@
}
],
"source": [
"nation_new_ind['FUDS_RAW'].unique()"
"nation_new_ind[\"FUDS_RAW\"].unique()"
]
},
{
@ -468,7 +479,7 @@
}
],
"source": [
"nation_new_ind['FUDS_RAW'].value_counts()"
"nation_new_ind[\"FUDS_RAW\"].value_counts()"
]
}
],

View file

@ -36,8 +36,8 @@
" engine=\"pyogrio\",\n",
")\n",
"end = time.time()\n",
" \n",
"print(\"Time taken to execute the function using pyogrio is\", end-begin)"
"\n",
"print(\"Time taken to execute the function using pyogrio is\", end - begin)"
]
},
{
@ -59,11 +59,13 @@
"census_tract_gdf = gpd.read_file(\n",
" CensusETL.NATIONAL_TRACT_JSON_PATH,\n",
" engine=\"fiona\",\n",
" include_fields=[\"GEOID10\"]\n",
" include_fields=[\"GEOID10\"],\n",
")\n",
"end2 = time.time()\n",
" \n",
"print(\"Time taken to execute the function using include fields is\", end2-begin2)"
"\n",
"print(\n",
" \"Time taken to execute the function using include fields is\", end2 - begin2\n",
")"
]
},
{

Some files were not shown because too many files have changed in this diff Show more