mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-30 12:21:17 -07:00
Fix linter and dependency issues
This commit is contained in:
parent
0da80c90d8
commit
3e087a37a4
7 changed files with 619 additions and 730 deletions
|
@ -2,7 +2,6 @@
|
|||
import pandas as pd
|
||||
import pytest
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.etl.score import constants
|
||||
from data_pipeline.etl.score.etl_score import ScoreETL
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.score.score_narwhal import ScoreNarwhal
|
||||
|
@ -13,9 +12,11 @@ logger = get_module_logger(__name__)
|
|||
|
||||
TEST_DATA_FOLDER = settings.APP_ROOT / "tests" / "score" / "test_utils" / "data"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def toy_score_df(scope="module"):
|
||||
return pd.read_csv(TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
|
||||
return pd.read_csv(
|
||||
TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
|
||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||
)
|
||||
|
||||
|
@ -84,7 +85,8 @@ def test_drop_all_tracts(toy_score_df):
|
|||
|
||||
|
||||
def test_mark_territory_dacs():
|
||||
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
|
||||
test_data = pd.read_csv(
|
||||
TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
|
||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||
)
|
||||
# Sanity check on the input data
|
||||
|
@ -92,18 +94,22 @@ def test_mark_territory_dacs():
|
|||
|
||||
scorer = ScoreNarwhal(test_data)
|
||||
scorer._mark_territory_dacs()
|
||||
territory_filter = test_data[field_names.GEOID_TRACT_FIELD].str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES))
|
||||
# Check territories are set to true
|
||||
expected_new_dacs_filter = (
|
||||
test_data[field_names.GEOID_TRACT_FIELD].isin(['60050951100', '66010951100', '69110001101', '78010990000'])
|
||||
expected_new_dacs_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
|
||||
["60050951100", "66010951100", "69110001101", "78010990000"]
|
||||
)
|
||||
assert test_data.loc[expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
|
||||
assert test_data.loc[
|
||||
expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
|
||||
].all()
|
||||
# Non-territories are still false
|
||||
assert not test_data.loc[~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
|
||||
assert not test_data.loc[
|
||||
~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
|
||||
].all()
|
||||
|
||||
|
||||
def test_mark_poverty_flag():
|
||||
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
|
||||
test_data = pd.read_csv(
|
||||
TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
|
||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||
)
|
||||
# Sanity check on the input data
|
||||
|
@ -111,14 +117,14 @@ def test_mark_poverty_flag():
|
|||
|
||||
scorer = ScoreNarwhal(test_data)
|
||||
scorer._mark_poverty_flag()
|
||||
expected_low_income_filter = (
|
||||
test_data[field_names.GEOID_TRACT_FIELD].isin(['36087011302', '66010951100', '78010990000'])
|
||||
expected_low_income_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
|
||||
["36087011302", "66010951100", "78010990000"]
|
||||
)
|
||||
# Three tracts are set to true
|
||||
assert (
|
||||
test_data[expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
|
||||
)
|
||||
assert test_data[expected_low_income_filter][
|
||||
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
|
||||
].all()
|
||||
# Everything else is false
|
||||
assert (
|
||||
not test_data[~expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
|
||||
)
|
||||
assert not test_data[~expected_low_income_filter][
|
||||
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
|
||||
].all()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue