Territories in the 65th percentile of low income are added is disadvantaged

This commit is contained in:
Carlos Felix 2024-11-13 15:58:47 -05:00 committed by Carlos Felix
commit 0da80c90d8
8 changed files with 177 additions and 125 deletions

View file

@ -2,22 +2,20 @@
import pandas as pd
import pytest
from data_pipeline.config import settings
from data_pipeline.etl.score import constants
from data_pipeline.etl.score.etl_score import ScoreETL
from data_pipeline.score import field_names
from data_pipeline.score.score_narwhal import ScoreNarwhal
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)
TEST_DATA_FOLDER = settings.APP_ROOT / "tests" / "score" / "test_utils" / "data"
@pytest.fixture
def toy_score_df(scope="module"):
return pd.read_csv(
settings.APP_ROOT
/ "tests"
/ "score"
/ "test_utils"
/ "data"
/ "test_drop_tracts_from_percentile.csv",
return pd.read_csv(TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
@ -83,3 +81,44 @@ def test_drop_all_tracts(toy_score_df):
toy_score_df,
drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list(),
), "Percentile in score fails when we drop all tracts"
def test_mark_territory_dacs():
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
# Sanity check on the input data
assert not test_data[field_names.SCORE_N_COMMUNITIES].all()
scorer = ScoreNarwhal(test_data)
scorer._mark_territory_dacs()
territory_filter = test_data[field_names.GEOID_TRACT_FIELD].str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES))
# Check territories are set to true
expected_new_dacs_filter = (
test_data[field_names.GEOID_TRACT_FIELD].isin(['60050951100', '66010951100', '69110001101', '78010990000'])
)
assert test_data.loc[expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
# Non-territories are still false
assert not test_data.loc[~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
def test_mark_poverty_flag():
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
# Sanity check on the input data
assert not test_data[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
scorer = ScoreNarwhal(test_data)
scorer._mark_poverty_flag()
expected_low_income_filter = (
test_data[field_names.GEOID_TRACT_FIELD].isin(['36087011302', '66010951100', '78010990000'])
)
# Three tracts are set to true
assert (
test_data[expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
)
# Everything else is false
assert (
not test_data[~expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
)

View file

@ -0,0 +1,8 @@
GEOID10_TRACT,Percentage households below 200% of federal poverty line in 2009,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted","Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Is low income (imputed and adjusted)?
01071950300,,0.1,0.1,False
36087011302,,0.7,0.7,False
72119130701,,0.5,0.5,False
60050951100,0.1,,,False
66010951100,0.7,,,False
69110001100,0.5,,,False
78010990000,0.9,,,False
1 GEOID10_TRACT Percentage households below 200% of federal poverty line in 2009 Percent of individuals below 200% Federal Poverty Line, imputed and adjusted Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile) Is low income (imputed and adjusted)?
2 01071950300 0.1 0.1 False
3 36087011302 0.7 0.7 False
4 72119130701 0.5 0.5 False
5 60050951100 0.1 False
6 66010951100 0.7 False
7 69110001100 0.5 False
8 78010990000 0.9 False

View file

@ -0,0 +1,9 @@
GEOID10_TRACT,Is low income (imputed and adjusted)?,Definition N (communities)
01071950300,True,False
36087011302,False,False
72119130701,True,False
60050951100,True,False
66010951100,True,False
69110001100,False,False
69110001101,True,False
78010990000,True,False
1 GEOID10_TRACT Is low income (imputed and adjusted)? Definition N (communities)
2 01071950300 True False
3 36087011302 False False
4 72119130701 True False
5 60050951100 True False
6 66010951100 True False
7 69110001100 False False
8 69110001101 True False
9 78010990000 True False