checking drop tracts works

2025-08-03 09:24:19 -07:00 · 2022-08-25 16:37:23 -04:00 · 2022-08-25 16:37:23 -04:00 · 9a2193d1a4
commit 9a2193d1a4
parent 4a25a28b0e
2 changed files with 185 additions and 0 deletions
--- a/data/data-pipeline/data_pipeline/tests/score/test_score_narwhal_methods.py
+++ b/data/data-pipeline/data_pipeline/tests/score/test_score_narwhal_methods.py
@ -0,0 +1,84 @@
 import pandas as pd
 import pytest
 from data_pipeline.config import settings
 import data_pipeline.score.field_names as field_names
 from data_pipeline.etl.score.etl_score import ScoreETL
 from data_pipeline.utils import get_module_logger
 logger = get_module_logger(__name__)
@pytest.fixture
 def toy_score_df(scope="module"):
    return pd.read_csv(
        settings.APP_ROOT
        / "tests"
        / "score"
        / "test_utils"
        / "data"
        / "test_drop_tracts_from_percentile.csv",
        dtype={field_names.GEOID_TRACT_FIELD: str},
    )
 def _helper_test_dropping_tracts(toy_score_df, drop_tracts):
    logger.info(drop_tracts)
    test_frame = toy_score_df[
        ~toy_score_df[field_names.GEOID_TRACT_FIELD].isin(drop_tracts)
    ]
    return_df = ScoreETL._add_percentiles_to_df(
        df=toy_score_df,
        input_column_name="to_rank",
        output_column_name_root="to_rank_auto",
        drop_tracts=drop_tracts,
    )
    test_frame = test_frame.assign(
        true_rank=test_frame["to_rank"].rank(pct=True)
    )
    check_frame = test_frame.merge(
        return_df[
            [
                field_names.GEOID_TRACT_FIELD,
                "to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX,
            ]
        ],
        on=[field_names.GEOID_TRACT_FIELD],
    )
    return check_frame["true_rank"].equals(
        check_frame["to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX]
    )
 def test_drop_0_tracts(toy_score_df):
    assert _helper_test_dropping_tracts(
        toy_score_df, drop_tracts=[]
    ), "Percentile in score fails when we do not drop any tracts"
 def test_drop_1_tract(toy_score_df):
    assert _helper_test_dropping_tracts(
        toy_score_df, drop_tracts=["1"]
    ), "Percentile in score fails when we do drop a single tract"
 def test_drop_2_tracts(toy_score_df):
    assert _helper_test_dropping_tracts(
        toy_score_df, drop_tracts=["1", "2"]
    ), "Percentile in score fails when we drop two tracts"
 def test_drop_many_tracts(toy_score_df):
    assert _helper_test_dropping_tracts(
        toy_score_df,
        drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list()[:5],
    ), "Percentile in score fails when we drop many tracts"
 def test_drop_all_tracts(toy_score_df):
    assert _helper_test_dropping_tracts(
        toy_score_df,
        drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list(),
    ), "Percentile in score fails when we drop all tracts"
--- a/data/data-pipeline/data_pipeline/tests/score/test_utils/data/test_drop_tracts_from_percentile.csv
+++ b/data/data-pipeline/data_pipeline/tests/score/test_utils/data/test_drop_tracts_from_percentile.csv
@ -0,0 +1,101 @@
 GEOID10_TRACT,to_rank
 1,1
 2,2
 3,3
 4,4
 5,5
 6,6
 7,7
 8,8
 9,9
 10,10
 11,11
 12,12
 13,13
 14,14
 15,15
 16,16
 17,17
 18,18
 19,19
 20,20
 21,21
 22,22
 23,23
 24,24
 25,25
 26,26
 27,27
 28,28
 29,29
 30,30
 31,31
 32,32
 33,33
 34,34
 35,35
 36,36
 37,37
 38,38
 39,39
 40,40
 41,41
 42,42
 43,43
 44,44
 45,45
 46,46
 47,47
 48,48
 49,49
 50,50
 51,51
 52,52
 53,53
 54,54
 55,55
 56,56
 57,57
 58,58
 59,59
 60,60
 61,61
 62,62
 63,63
 64,64
 65,65
 66,66
 67,67
 68,68
 69,69
 70,70
 71,71
 72,72
 73,73
 74,74
 75,75
 76,76
 77,77
 78,78
 79,79
 80,80
 81,81
 82,82
 83,83
 84,84
 85,85
 86,86
 87,87
 88,88
 89,89
 90,90
 91,91
 92,92
 93,93
 94,94
 95,95
 96,96
 97,97
 98,98
 99,99
 100,100