checking drop tracts works

This commit is contained in:
Emma Nechamkin 2022-08-25 16:37:23 -04:00
parent 4a25a28b0e
commit 9a2193d1a4
2 changed files with 185 additions and 0 deletions

View file

@ -0,0 +1,84 @@
import pandas as pd
import pytest
from data_pipeline.config import settings
import data_pipeline.score.field_names as field_names
from data_pipeline.etl.score.etl_score import ScoreETL
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)
@pytest.fixture
def toy_score_df(scope="module"):
return pd.read_csv(
settings.APP_ROOT
/ "tests"
/ "score"
/ "test_utils"
/ "data"
/ "test_drop_tracts_from_percentile.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
def _helper_test_dropping_tracts(toy_score_df, drop_tracts):
logger.info(drop_tracts)
test_frame = toy_score_df[
~toy_score_df[field_names.GEOID_TRACT_FIELD].isin(drop_tracts)
]
return_df = ScoreETL._add_percentiles_to_df(
df=toy_score_df,
input_column_name="to_rank",
output_column_name_root="to_rank_auto",
drop_tracts=drop_tracts,
)
test_frame = test_frame.assign(
true_rank=test_frame["to_rank"].rank(pct=True)
)
check_frame = test_frame.merge(
return_df[
[
field_names.GEOID_TRACT_FIELD,
"to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX,
]
],
on=[field_names.GEOID_TRACT_FIELD],
)
return check_frame["true_rank"].equals(
check_frame["to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX]
)
def test_drop_0_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df, drop_tracts=[]
), "Percentile in score fails when we do not drop any tracts"
def test_drop_1_tract(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df, drop_tracts=["1"]
), "Percentile in score fails when we do drop a single tract"
def test_drop_2_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df, drop_tracts=["1", "2"]
), "Percentile in score fails when we drop two tracts"
def test_drop_many_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df,
drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list()[:5],
), "Percentile in score fails when we drop many tracts"
def test_drop_all_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df,
drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list(),
), "Percentile in score fails when we drop all tracts"

View file

@ -0,0 +1,101 @@
GEOID10_TRACT,to_rank
1,1
2,2
3,3
4,4
5,5
6,6
7,7
8,8
9,9
10,10
11,11
12,12
13,13
14,14
15,15
16,16
17,17
18,18
19,19
20,20
21,21
22,22
23,23
24,24
25,25
26,26
27,27
28,28
29,29
30,30
31,31
32,32
33,33
34,34
35,35
36,36
37,37
38,38
39,39
40,40
41,41
42,42
43,43
44,44
45,45
46,46
47,47
48,48
49,49
50,50
51,51
52,52
53,53
54,54
55,55
56,56
57,57
58,58
59,59
60,60
61,61
62,62
63,63
64,64
65,65
66,66
67,67
68,68
69,69
70,70
71,71
72,72
73,73
74,74
75,75
76,76
77,77
78,78
79,79
80,80
81,81
82,82
83,83
84,84
85,85
86,86
87,87
88,88
89,89
90,90
91,91
92,92
93,93
94,94
95,95
96,96
97,97
98,98
99,99
100,100
1 GEOID10_TRACT to_rank
2 1 1
3 2 2
4 3 3
5 4 4
6 5 5
7 6 6
8 7 7
9 8 8
10 9 9
11 10 10
12 11 11
13 12 12
14 13 13
15 14 14
16 15 15
17 16 16
18 17 17
19 18 18
20 19 19
21 20 20
22 21 21
23 22 22
24 23 23
25 24 24
26 25 25
27 26 26
28 27 27
29 28 28
30 29 29
31 30 30
32 31 31
33 32 32
34 33 33
35 34 34
36 35 35
37 36 36
38 37 37
39 38 38
40 39 39
41 40 40
42 41 41
43 42 42
44 43 43
45 44 44
46 45 45
47 46 46
48 47 47
49 48 48
50 49 49
51 50 50
52 51 51
53 52 52
54 53 53
55 54 54
56 55 55
57 56 56
58 57 57
59 58 58
60 59 59
61 60 60
62 61 61
63 62 62
64 63 63
65 64 64
66 65 65
67 66 66
68 67 67
69 68 68
70 69 69
71 70 70
72 71 71
73 72 72
74 73 73
75 74 74
76 75 75
77 76 76
78 77 77
79 78 78
80 79 79
81 80 80
82 81 81
83 82 82
84 83 83
85 84 84
86 85 85
87 86 86
88 87 87
89 88 88
90 89 89
91 90 90
92 91 91
93 92 92
94 93 93
95 94 94
96 95 95
97 96 96
98 97 97
99 98 98
100 99 99
101 100 100