checking drop tracts works

2025-08-24 13:21:40 -07:00 · 2022-08-25 16:37:23 -04:00 · 2022-08-25 16:37:23 -04:00 · 9a2193d1a4
commit 9a2193d1a4
parent 4a25a28b0e
2 changed files with 185 additions and 0 deletions
--- a/data/data-pipeline/data_pipeline/tests/score/test_score_narwhal_methods.py
+++ b/data/data-pipeline/data_pipeline/tests/score/test_score_narwhal_methods.py
@ -0,0 +1,84 @@
+import pandas as pd
+import pytest
+from data_pipeline.config import settings
+import data_pipeline.score.field_names as field_names
+from data_pipeline.etl.score.etl_score import ScoreETL
+from data_pipeline.utils import get_module_logger
+
+logger = get_module_logger(__name__)
+
+
+@pytest.fixture
+def toy_score_df(scope="module"):
+    return pd.read_csv(
+        settings.APP_ROOT
+        / "tests"
+        / "score"
+        / "test_utils"
+        / "data"
+        / "test_drop_tracts_from_percentile.csv",
+        dtype={field_names.GEOID_TRACT_FIELD: str},
+    )
+
+
+def _helper_test_dropping_tracts(toy_score_df, drop_tracts):
+    logger.info(drop_tracts)
+    test_frame = toy_score_df[
+        ~toy_score_df[field_names.GEOID_TRACT_FIELD].isin(drop_tracts)
+    ]
+    return_df = ScoreETL._add_percentiles_to_df(
+        df=toy_score_df,
+        input_column_name="to_rank",
+        output_column_name_root="to_rank_auto",
+        drop_tracts=drop_tracts,
+    )
+
+    test_frame = test_frame.assign(
+        true_rank=test_frame["to_rank"].rank(pct=True)
+    )
+
+    check_frame = test_frame.merge(
+        return_df[
+            [
+                field_names.GEOID_TRACT_FIELD,
+                "to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX,
+            ]
+        ],
+        on=[field_names.GEOID_TRACT_FIELD],
+    )
+
+    return check_frame["true_rank"].equals(
+        check_frame["to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX]
+    )
+
+
+def test_drop_0_tracts(toy_score_df):
+    assert _helper_test_dropping_tracts(
+        toy_score_df, drop_tracts=[]
+    ), "Percentile in score fails when we do not drop any tracts"
+
+
+def test_drop_1_tract(toy_score_df):
+    assert _helper_test_dropping_tracts(
+        toy_score_df, drop_tracts=["1"]
+    ), "Percentile in score fails when we do drop a single tract"
+
+
+def test_drop_2_tracts(toy_score_df):
+    assert _helper_test_dropping_tracts(
+        toy_score_df, drop_tracts=["1", "2"]
+    ), "Percentile in score fails when we drop two tracts"
+
+
+def test_drop_many_tracts(toy_score_df):
+    assert _helper_test_dropping_tracts(
+        toy_score_df,
+        drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list()[:5],
+    ), "Percentile in score fails when we drop many tracts"
+
+
+def test_drop_all_tracts(toy_score_df):
+    assert _helper_test_dropping_tracts(
+        toy_score_df,
+        drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list(),
+    ), "Percentile in score fails when we drop all tracts"
--- a/data/data-pipeline/data_pipeline/tests/score/test_utils/data/test_drop_tracts_from_percentile.csv
+++ b/data/data-pipeline/data_pipeline/tests/score/test_utils/data/test_drop_tracts_from_percentile.csv
@ -0,0 +1,101 @@
+GEOID10_TRACT,to_rank
+1,1
+2,2
+3,3
+4,4
+5,5
+6,6
+7,7
+8,8
+9,9
+10,10
+11,11
+12,12
+13,13
+14,14
+15,15
+16,16
+17,17
+18,18
+19,19
+20,20
+21,21
+22,22
+23,23
+24,24
+25,25
+26,26
+27,27
+28,28
+29,29
+30,30
+31,31
+32,32
+33,33
+34,34
+35,35
+36,36
+37,37
+38,38
+39,39
+40,40
+41,41
+42,42
+43,43
+44,44
+45,45
+46,46
+47,47
+48,48
+49,49
+50,50
+51,51
+52,52
+53,53
+54,54
+55,55
+56,56
+57,57
+58,58
+59,59
+60,60
+61,61
+62,62
+63,63
+64,64
+65,65
+66,66
+67,67
+68,68
+69,69
+70,70
+71,71
+72,72
+73,73
+74,74
+75,75
+76,76
+77,77
+78,78
+79,79
+80,80
+81,81
+82,82
+83,83
+84,84
+85,85
+86,86
+87,87
+88,88
+89,89
+90,90
+91,91
+92,92
+93,93
+94,94
+95,95
+96,96
+97,97
+98,98
+99,99
+100,100