mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-26 18:51:17 -07:00
Display score L on map (#849)
* updates to first docker run * tile constants * frontend changes * updating pickles instructions * pickles
This commit is contained in:
parent
03e59f2abd
commit
053dde0d40
12 changed files with 91 additions and 16 deletions
|
@ -1,6 +1,6 @@
|
|||
|
||||
import {LngLatBoundsLike} from 'maplibre-gl';
|
||||
import {isMobile as isMobileReactDeviceDetect} from 'react-device-detect';
|
||||
import { LngLatBoundsLike } from 'maplibre-gl';
|
||||
import { isMobile as isMobileReactDeviceDetect } from 'react-device-detect';
|
||||
|
||||
const XYZ_SUFFIX = '{z}/{x}/{y}.pbf';
|
||||
export const featureURLForTilesetName = (tilesetName: string): string => {
|
||||
|
|
|
@ -305,12 +305,86 @@ In a bit more detail:
|
|||
|
||||
#### Updating Pickles
|
||||
|
||||
If you update the input our output to various methods, it is necessary to create new pickles so that data is validated correctly. To do this:
|
||||
If you update the score in any way, it is necessary to create new pickles so that data is validated correctly.
|
||||
|
||||
1. Drop a breakpoint just before the dataframe will otherwise be written to / read from disk. If you're using VSCode, use one of the named run targets within `data-pipeline` such as `Score Full Run` , and put a breakpoint in the margin just before the actionable step. More on using breakpoints in VSCode [here](https://code.visualstudio.com/docs/editor/debugging#_breakpoints). If you are not using VSCode, you can put the line `breakpoint()` in your code and it will stop where you have placed the line in whatever calling context you are using.
|
||||
1. In your editor/terminal, run `df.to_pickle("data_pipeline/etl/score/tests/snapshots/YOUR_OUT_PATH_HERE.pkl", protocol=4)` to write the pickle to the appropriate location on disk.
|
||||
1. Be sure to do this for all inputs/outputs that have changed as a result of your modification. It is often necessary to do this several times for cascading operations.
|
||||
1. To inspect your pickle, open a python interpreter, then run `pickle.load( open( "data_pipeline/etl/score/tests/snapshots/YOUR_OUT_PATH_HERE.pkl", "rb" ) )` to get file contents.
|
||||
It starts with the `data_pipeline/etl/score/tests/sample_data/score_data_initial.csv`, which is the first two rows of the `score/full/usa.csv`.
|
||||
|
||||
To update this file, run a full score generation and then update the file as follows:
|
||||
```
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
data_path = Path.cwd()
|
||||
|
||||
# score data expected
|
||||
score_csv_path = data_path / "data_pipeline" / "data" / "score" / "csv" / "full" / "usa.csv"
|
||||
score_initial_df = pd.read_csv(score_csv_path, dtype={"GEOID10": "string"}, low_memory=False)[:2]
|
||||
score_initial_df.to_csv(data_path / "data_pipeline" / "etl" / "score" / "tests" / "sample_data" /"score_data_initial.csv", index=False)
|
||||
```
|
||||
|
||||
We have four pickle files that correspond to expected files:
|
||||
- `score_data_expected.pkl`: Initial score without counties
|
||||
- `score_transformed_expected.pkl`: Intermediate score with `etl._extract_score` and `etl. _transform_score` applied. There's no file for this intermediate process, so we need to capture the pickle mid-process.
|
||||
- `tile_data_expected.pkl`: Score with columns to be baked in tiles
|
||||
- `downloadable_data_expected.pk1`: Downloadable csv
|
||||
|
||||
To update the pickles, let's go one by one:
|
||||
|
||||
For the `score_transformed_expected.pkl`, but a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L58), before the `pdt.assert_frame_equal` and run:
|
||||
`pytest data_pipeline/etl/score/tests/test_score_post.py::test_transform_score`
|
||||
|
||||
Once on the breakpoint, capture the df to a pickle as follows:
|
||||
|
||||
```
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
data_path = Path.cwd()
|
||||
score_transformed_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tests" / "snapshots" / "score_transformed_expected.pkl", protocol=4)
|
||||
```
|
||||
|
||||
Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_transform_score`
|
||||
|
||||
For the `score_data_expected.pkl`, but a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L78), before the `pdt.assert_frame_equal` and run:
|
||||
`pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_score_data`
|
||||
|
||||
Once on the breakpoint, capture the df to a pickle as follows:
|
||||
|
||||
```
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
data_path = Path.cwd()
|
||||
score_data_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tests" / "snapshots" / "score_data_expected.pkl", protocol=4)
|
||||
```
|
||||
|
||||
Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_score_data`
|
||||
|
||||
For the `tile_data_expected.pkl`, but a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L86), before the `pdt.assert_frame_equal` and run:
|
||||
`pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_tile_data`
|
||||
|
||||
Once on the breakpoint, capture the df to a pickle as follows:
|
||||
|
||||
```
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
data_path = Path.cwd()
|
||||
output_tiles_df_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tests" / "snapshots" / "tile_data_expected.pkl", protocol=4)
|
||||
```
|
||||
|
||||
Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_tile_data`
|
||||
|
||||
For the `downloadable_data_expected.pk1`, but a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L98), before the `pdt.assert_frame_equal` and run:
|
||||
`pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_downloadable_data`
|
||||
|
||||
Once on the breakpoint, capture the df to a pickle as follows:
|
||||
|
||||
```
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
data_path = Path.cwd()
|
||||
output_downloadable_df_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tests" / "snapshots" / "downloadable_data_expected.pkl", protocol=4)
|
||||
```
|
||||
|
||||
Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_downloadable_data`
|
||||
|
||||
#### Future Enchancements
|
||||
|
||||
|
|
|
@ -262,6 +262,7 @@ def data_full_run(check: bool, data_source: str):
|
|||
score_generate()
|
||||
|
||||
logger.info("*** Running Post Score scripts")
|
||||
downloadable_cleanup()
|
||||
score_post(data_source)
|
||||
|
||||
logger.info("*** Combining Score with Census Geojson")
|
||||
|
|
|
@ -83,9 +83,6 @@ def score_generate() -> None:
|
|||
score_gen.transform()
|
||||
score_gen.load()
|
||||
|
||||
# Post Score Processing
|
||||
score_post()
|
||||
|
||||
|
||||
def score_post(data_source: str = "local") -> None:
|
||||
"""Posts the score files to the local directory
|
||||
|
|
|
@ -69,6 +69,8 @@ TILES_SCORE_COLUMNS = [
|
|||
"Score E (top 25th percentile)",
|
||||
"Score G (communities)",
|
||||
"Score G",
|
||||
"Definition L (communities)",
|
||||
"Definition L (percentile)",
|
||||
"Poverty (Less than 200% of federal poverty line) (percentile)",
|
||||
"Percent individuals age 25 or over with less than high school degree (percentile)",
|
||||
"Linguistic isolation (percent) (percentile)",
|
||||
|
@ -95,6 +97,7 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
"Score D (top 25th percentile)",
|
||||
"Score E (percentile)",
|
||||
"Score E (top 25th percentile)",
|
||||
"Definition L (percentile)",
|
||||
"Poverty (Less than 200% of federal poverty line)",
|
||||
"Percent individuals age 25 or over with less than high school degree",
|
||||
"Linguistic isolation (percent)",
|
||||
|
|
|
@ -31,8 +31,8 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
||||
)
|
||||
|
||||
self.TARGET_SCORE_NAME = "Score G"
|
||||
self.TARGET_SCORE_RENAME_TO = "G_SCORE"
|
||||
self.TARGET_SCORE_NAME = "Definition L (percentile)"
|
||||
self.TARGET_SCORE_RENAME_TO = "L_SCORE"
|
||||
|
||||
self.NUMBER_OF_BUCKETS = 10
|
||||
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -107,7 +107,7 @@ def check_census_data_source(
|
|||
# check if census data is found locally
|
||||
if not os.path.isfile(census_data_path / "geojson" / "us.json"):
|
||||
logger.info(
|
||||
"No local census data found. Please use '-d aws` to fetch from AWS"
|
||||
"No local census data found. Please use '-s aws` to fetch from AWS"
|
||||
)
|
||||
sys.exit()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue