Adding tests to ensure proper calculations (#1871)

* just testing that the boolean is preserved on gha
* checking drop tracts works
* adding a check to the agvalue calculation for nri
* updated with error messages
This commit is contained in:
Emma Nechamkin 2022-08-31 14:26:55 -04:00 committed by GitHub
commit 5201f9e457
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 209 additions and 0 deletions

View file

@ -0,0 +1,86 @@
# pylint: disable=protected-access
import pandas as pd
import pytest
from data_pipeline.config import settings
from data_pipeline.score import field_names
from data_pipeline.etl.score.etl_score import ScoreETL
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)
@pytest.fixture
def toy_score_df(scope="module"):
return pd.read_csv(
settings.APP_ROOT
/ "tests"
/ "score"
/ "test_utils"
/ "data"
/ "test_drop_tracts_from_percentile.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
def _helper_test_dropping_tracts(toy_score_df, drop_tracts):
logger.info(drop_tracts)
test_frame = toy_score_df[
~toy_score_df[field_names.GEOID_TRACT_FIELD].isin(drop_tracts)
]
return_df = ScoreETL._add_percentiles_to_df(
df=toy_score_df,
input_column_name="to_rank",
output_column_name_root="to_rank_auto",
drop_tracts=drop_tracts,
)
test_frame = test_frame.assign(
true_rank=test_frame["to_rank"].rank(pct=True)
)
check_frame = test_frame.merge(
return_df[
[
field_names.GEOID_TRACT_FIELD,
"to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX,
]
],
on=[field_names.GEOID_TRACT_FIELD],
)
return check_frame["true_rank"].equals(
check_frame["to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX]
)
def test_drop_0_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df, drop_tracts=[]
), "Percentile in score fails when we do not drop any tracts"
def test_drop_1_tract(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df, drop_tracts=["1"]
), "Percentile in score fails when we do drop a single tract"
def test_drop_2_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df, drop_tracts=["1", "2"]
), "Percentile in score fails when we drop two tracts"
def test_drop_many_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df,
drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list()[:5],
), "Percentile in score fails when we drop many tracts"
def test_drop_all_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df,
drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list(),
), "Percentile in score fails when we drop all tracts"

View file

@ -0,0 +1,101 @@
GEOID10_TRACT,to_rank
1,1
2,2
3,3
4,4
5,5
6,6
7,7
8,8
9,9
10,10
11,11
12,12
13,13
14,14
15,15
16,16
17,17
18,18
19,19
20,20
21,21
22,22
23,23
24,24
25,25
26,26
27,27
28,28
29,29
30,30
31,31
32,32
33,33
34,34
35,35
36,36
37,37
38,38
39,39
40,40
41,41
42,42
43,43
44,44
45,45
46,46
47,47
48,48
49,49
50,50
51,51
52,52
53,53
54,54
55,55
56,56
57,57
58,58
59,59
60,60
61,61
62,62
63,63
64,64
65,65
66,66
67,67
68,68
69,69
70,70
71,71
72,72
73,73
74,74
75,75
76,76
77,77
78,78
79,79
80,80
81,81
82,82
83,83
84,84
85,85
86,86
87,87
88,88
89,89
90,90
91,91
92,92
93,93
94,94
95,95
96,96
97,97
98,98
99,99
100,100
1 GEOID10_TRACT to_rank
2 1 1
3 2 2
4 3 3
5 4 4
6 5 5
7 6 6
8 7 7
9 8 8
10 9 9
11 10 10
12 11 11
13 12 12
14 13 13
15 14 14
16 15 15
17 16 16
18 17 17
19 18 18
20 19 19
21 20 20
22 21 21
23 22 22
24 23 23
25 24 24
26 25 25
27 26 26
28 27 27
29 28 28
30 29 29
31 30 30
32 31 31
33 32 32
34 33 33
35 34 34
36 35 35
37 36 36
38 37 37
39 38 38
40 39 39
41 40 40
42 41 41
43 42 42
44 43 43
45 44 44
46 45 45
47 46 46
48 47 47
49 48 48
50 49 49
51 50 50
52 51 51
53 52 52
54 53 53
55 54 54
56 55 55
57 56 56
58 57 57
59 58 58
60 59 59
61 60 60
62 61 61
63 62 62
64 63 63
65 64 64
66 65 65
67 66 66
68 67 67
69 68 68
70 69 69
71 70 70
72 71 71
73 72 72
74 73 73
75 74 74
76 75 75
77 76 76
78 77 77
79 78 78
80 79 79
81 80 80
82 81 81
83 82 82
84 83 83
85 84 84
86 85 85
87 86 86
88 87 87
89 88 88
90 89 89
91 90 90
92 91 91
93 92 92
94 93 93
95 94 94
96 95 95
97 96 96
98 97 97
99 98 98
100 99 99
101 100 100