Fix linter and dependency issues

This commit is contained in:
Carlos Felix 2024-11-19 15:01:27 -05:00 committed by Carlos Felix
commit 3e087a37a4
7 changed files with 619 additions and 730 deletions

View file

@ -2,7 +2,6 @@
import pandas as pd
import pytest
from data_pipeline.config import settings
from data_pipeline.etl.score import constants
from data_pipeline.etl.score.etl_score import ScoreETL
from data_pipeline.score import field_names
from data_pipeline.score.score_narwhal import ScoreNarwhal
@ -13,9 +12,11 @@ logger = get_module_logger(__name__)
TEST_DATA_FOLDER = settings.APP_ROOT / "tests" / "score" / "test_utils" / "data"
@pytest.fixture
def toy_score_df(scope="module"):
return pd.read_csv(TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
return pd.read_csv(
TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
@ -84,7 +85,8 @@ def test_drop_all_tracts(toy_score_df):
def test_mark_territory_dacs():
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
test_data = pd.read_csv(
TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
# Sanity check on the input data
@ -92,18 +94,22 @@ def test_mark_territory_dacs():
scorer = ScoreNarwhal(test_data)
scorer._mark_territory_dacs()
territory_filter = test_data[field_names.GEOID_TRACT_FIELD].str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES))
# Check territories are set to true
expected_new_dacs_filter = (
test_data[field_names.GEOID_TRACT_FIELD].isin(['60050951100', '66010951100', '69110001101', '78010990000'])
expected_new_dacs_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
["60050951100", "66010951100", "69110001101", "78010990000"]
)
assert test_data.loc[expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
assert test_data.loc[
expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
].all()
# Non-territories are still false
assert not test_data.loc[~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
assert not test_data.loc[
~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
].all()
def test_mark_poverty_flag():
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
test_data = pd.read_csv(
TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
# Sanity check on the input data
@ -111,14 +117,14 @@ def test_mark_poverty_flag():
scorer = ScoreNarwhal(test_data)
scorer._mark_poverty_flag()
expected_low_income_filter = (
test_data[field_names.GEOID_TRACT_FIELD].isin(['36087011302', '66010951100', '78010990000'])
expected_low_income_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
["36087011302", "66010951100", "78010990000"]
)
# Three tracts are set to true
assert (
test_data[expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
)
assert test_data[expected_low_income_filter][
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
].all()
# Everything else is false
assert (
not test_data[~expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
)
assert not test_data[~expected_low_income_filter][
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
].all()