mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-26 19:01:17 -07:00
Added grandfathering of v1.0 DACS
This commit is contained in:
parent
77e0996441
commit
e0bb33211a
13 changed files with 74271 additions and 15 deletions
|
@ -9,6 +9,7 @@ from data_pipeline.score import field_names
|
|||
|
||||
# Base Paths
|
||||
DATA_PATH = Path(settings.APP_ROOT) / "data"
|
||||
STATIC_DATA_PATH = Path(settings.APP_ROOT) / "content" / "static_data"
|
||||
TMP_PATH = DATA_PATH / "tmp"
|
||||
FILES_PATH = Path(settings.APP_ROOT) / "files"
|
||||
|
||||
|
@ -275,6 +276,7 @@ TILES_SCORE_COLUMNS = {
|
|||
# temporarily update this so that it's the Narwhal score that gets visualized on the map
|
||||
# The NEW final score value INCLUDES the adjacency index.
|
||||
field_names.FINAL_SCORE_N_BOOLEAN: "SN_C",
|
||||
field_names.FINAL_SCORE_N_BOOLEAN_V1_0: "SN_C_V10",
|
||||
field_names.IS_TRIBAL_DAC: "SN_T",
|
||||
field_names.DIABETES_LOW_INCOME_FIELD: "DLI",
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD: "ALI",
|
||||
|
|
|
@ -54,6 +54,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.eamlis_df: pd.DataFrame
|
||||
self.fuds_df: pd.DataFrame
|
||||
self.tribal_overlap_df: pd.DataFrame
|
||||
self.v1_0_score_results_df: pd.DataFrame
|
||||
|
||||
self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS: List[str] = []
|
||||
|
||||
|
@ -205,6 +206,22 @@ class ScoreETL(ExtractTransformLoad):
|
|||
header=None,
|
||||
)
|
||||
|
||||
# Load v1.0 score results for grandfathering purposes
|
||||
score_v1_0_csv = (
|
||||
constants.STATIC_DATA_PATH / "v1.0-score-results-usa.csv"
|
||||
)
|
||||
self.v1_0_score_results_df = pd.read_csv(
|
||||
score_v1_0_csv,
|
||||
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
||||
low_memory=False,
|
||||
)
|
||||
self.v1_0_score_results_df.rename(
|
||||
columns={
|
||||
field_names.FINAL_SCORE_N_BOOLEAN: field_names.FINAL_SCORE_N_BOOLEAN_V1_0,
|
||||
},
|
||||
inplace=True,
|
||||
)
|
||||
|
||||
def _join_tract_dfs(self, census_tract_dfs: list) -> pd.DataFrame:
|
||||
logger.debug("Joining Census Tract dataframes")
|
||||
|
||||
|
@ -364,6 +381,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.eamlis_df,
|
||||
self.fuds_df,
|
||||
self.tribal_overlap_df,
|
||||
self.v1_0_score_results_df,
|
||||
]
|
||||
|
||||
# Sanity check each data frame before merging.
|
||||
|
@ -514,6 +532,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
|
||||
field_names.IS_TRIBAL_DAC,
|
||||
field_names.FINAL_SCORE_N_BOOLEAN_V1_0,
|
||||
]
|
||||
|
||||
# For some columns, high values are "good", so we want to reverse the percentile
|
||||
|
|
|
@ -129,6 +129,16 @@ def tile_data_expected():
|
|||
return pd.read_pickle(pytest.SNAPSHOT_DIR / "tile_data_expected.pkl")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def create_tile_score_data_input():
|
||||
return pd.read_pickle(pytest.SNAPSHOT_DIR / "create_tile_score_data_input.pkl")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def create_tile_data_expected():
|
||||
return pd.read_pickle(pytest.SNAPSHOT_DIR / "create_tile_data_expected.pkl")
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def downloadable_data_expected():
|
||||
return pd.read_pickle(
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
These files are used as inputs to unit tests. Some notes in their creation is below.
|
||||
|
||||
### create_tile_data_expected.pkl
|
||||
1. Set a breakpoint in the `test_create_tile_data` method in `data_pipeline/etl/score/tests/test_score_post.py`
|
||||
after the call to `_create_tile_data` and debug the test.
|
||||
2. Extract a subset of the `output_tiles_df_actual` dataframe. Do not extract the whole score as the file
|
||||
will be too big and the test will run slow. Also, you need to extract the same tracts that are in
|
||||
the `create_tile_score_data_input.pkl` input data. For example, use the following command once the breakpoint is reached
|
||||
to extract a few rows at the top and bottom of the score. This will some capture states and territories.
|
||||
```python
|
||||
import pandas as pd
|
||||
pd.concat([output_tiles_df_actual.head(3), output_tiles_df_actual.tail(3)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_data_expected.pkl')
|
||||
```
|
||||
|
||||
### create_tile_score_data_input.pkl
|
||||
1. Set a breakpoint in the transform method in `data_pipeline/etl/score/etl_score_post.py` before the call to
|
||||
`_create_tile_data` and run the post scoring.
|
||||
2. Extract a subset of the `output_score_county_state_merged_df` dataframe. Do not extract the whole score as the file
|
||||
will be too big and the test will run slow. For example, use the following command once the breakpoint is reached
|
||||
to extract a few rows at the top and bottom of the score. This will some capture states and territories.
|
||||
```python
|
||||
pd.concat([output_score_county_state_merged_df.head(3), output_score_county_state_merged_df.tail(3)], ignore_index=True).to_pickle('data_pipeline/etl/score/tests/snapshots/create_tile_score_data_input.pkl')
|
||||
```
|
Binary file not shown.
Binary file not shown.
|
@ -80,11 +80,11 @@ def test_create_score_data(
|
|||
)
|
||||
|
||||
|
||||
def test_create_tile_data(etl, score_data_expected, tile_data_expected):
|
||||
output_tiles_df_actual = etl._create_tile_data(score_data_expected)
|
||||
def test_create_tile_data(etl, create_tile_score_data_input, create_tile_data_expected):
|
||||
output_tiles_df_actual = etl._create_tile_data(create_tile_score_data_input)
|
||||
pdt.assert_frame_equal(
|
||||
output_tiles_df_actual,
|
||||
tile_data_expected,
|
||||
create_tile_data_expected,
|
||||
)
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue