Merge branch 'emma-nechamkin/1849-calculation-tests' of github.com:usds/justice40-tool into emma-nechamkin/release/score-narwhal

This commit is contained in:
Emma Nechamkin 2022-08-31 10:25:55 -04:00
commit 7c6a9078e3
4 changed files with 208 additions and 7 deletions

View file

@ -153,6 +153,27 @@ class NationalRiskIndexETL(ExtractTransformLoad):
lower=self.AGRIVALUE_LOWER_BOUND
)
## Check that this clip worked -- that the only place the value has changed is when the clip took effect
base_expectation = (
disaster_agriculture_sum_series
/ df_nri[self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME]
)
assert (
df_nri[
df_nri[self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME]
!= base_expectation
][self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME].max()
<= self.AGRIVALUE_LOWER_BOUND
), (
"Clipping the agrivalue did not work. There are places where the value doesn't "
+ "match an unclipped ratio, even where the agrivalue is above the lower bound!"
)
assert (
df_nri[self.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME]
!= base_expectation
).sum() > 0, "Clipping the agrivalue did nothing!"
# This produces a boolean that is True in the case of non-zero agricultural value
df_nri[self.CONTAINS_AGRIVALUE] = (
df_nri[self.AGRICULTURAL_VALUE_INPUT_FIELD_NAME] > 0

View file

@ -493,13 +493,6 @@ class ScoreNarwhal(Score):
field_names.AML_BOOLEAN
].fillna(False)
logger.info(
f"{ self.df[field_names.AML_BOOLEAN_FILLED_IN].value_counts(dropna=False)}"
)
logger.info(
f"{ self.df[field_names.AML_BOOLEAN].value_counts(dropna=False)}"
)
self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = self.df[
[
field_names.RMP_PCTILE_THRESHOLD,

View file

@ -0,0 +1,86 @@
# pylint: disable=protected-access
import pandas as pd
import pytest
from data_pipeline.config import settings
from data_pipeline.score import field_names
from data_pipeline.etl.score.etl_score import ScoreETL
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)
@pytest.fixture
def toy_score_df(scope="module"):
return pd.read_csv(
settings.APP_ROOT
/ "tests"
/ "score"
/ "test_utils"
/ "data"
/ "test_drop_tracts_from_percentile.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
def _helper_test_dropping_tracts(toy_score_df, drop_tracts):
logger.info(drop_tracts)
test_frame = toy_score_df[
~toy_score_df[field_names.GEOID_TRACT_FIELD].isin(drop_tracts)
]
return_df = ScoreETL._add_percentiles_to_df(
df=toy_score_df,
input_column_name="to_rank",
output_column_name_root="to_rank_auto",
drop_tracts=drop_tracts,
)
test_frame = test_frame.assign(
true_rank=test_frame["to_rank"].rank(pct=True)
)
check_frame = test_frame.merge(
return_df[
[
field_names.GEOID_TRACT_FIELD,
"to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX,
]
],
on=[field_names.GEOID_TRACT_FIELD],
)
return check_frame["true_rank"].equals(
check_frame["to_rank_auto" + field_names.PERCENTILE_FIELD_SUFFIX]
)
def test_drop_0_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df, drop_tracts=[]
), "Percentile in score fails when we do not drop any tracts"
def test_drop_1_tract(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df, drop_tracts=["1"]
), "Percentile in score fails when we do drop a single tract"
def test_drop_2_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df, drop_tracts=["1", "2"]
), "Percentile in score fails when we drop two tracts"
def test_drop_many_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df,
drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list()[:5],
), "Percentile in score fails when we drop many tracts"
def test_drop_all_tracts(toy_score_df):
assert _helper_test_dropping_tracts(
toy_score_df,
drop_tracts=toy_score_df[field_names.GEOID_TRACT_FIELD].to_list(),
), "Percentile in score fails when we drop all tracts"

View file

@ -0,0 +1,101 @@
GEOID10_TRACT,to_rank
1,1
2,2
3,3
4,4
5,5
6,6
7,7
8,8
9,9
10,10
11,11
12,12
13,13
14,14
15,15
16,16
17,17
18,18
19,19
20,20
21,21
22,22
23,23
24,24
25,25
26,26
27,27
28,28
29,29
30,30
31,31
32,32
33,33
34,34
35,35
36,36
37,37
38,38
39,39
40,40
41,41
42,42
43,43
44,44
45,45
46,46
47,47
48,48
49,49
50,50
51,51
52,52
53,53
54,54
55,55
56,56
57,57
58,58
59,59
60,60
61,61
62,62
63,63
64,64
65,65
66,66
67,67
68,68
69,69
70,70
71,71
72,72
73,73
74,74
75,75
76,76
77,77
78,78
79,79
80,80
81,81
82,82
83,83
84,84
85,85
86,86
87,87
88,88
89,89
90,90
91,91
92,92
93,93
94,94
95,95
96,96
97,97
98,98
99,99
100,100
1 GEOID10_TRACT to_rank
2 1 1
3 2 2
4 3 3
5 4 4
6 5 5
7 6 6
8 7 7
9 8 8
10 9 9
11 10 10
12 11 11
13 12 12
14 13 13
15 14 14
16 15 15
17 16 16
18 17 17
19 18 18
20 19 19
21 20 20
22 21 21
23 22 22
24 23 23
25 24 24
26 25 25
27 26 26
28 27 27
29 28 28
30 29 29
31 30 30
32 31 31
33 32 32
34 33 33
35 34 34
36 35 35
37 36 36
38 37 37
39 38 38
40 39 39
41 40 40
42 41 41
43 42 42
44 43 43
45 44 44
46 45 45
47 46 46
48 47 47
49 48 48
50 49 49
51 50 50
52 51 51
53 52 52
54 53 53
55 54 54
56 55 55
57 56 56
58 57 57
59 58 58
60 59 59
61 60 60
62 61 61
63 62 62
64 63 63
65 64 64
66 65 65
67 66 66
68 67 67
69 68 68
70 69 69
71 70 70
72 71 71
73 72 72
74 73 73
75 74 74
76 75 75
77 76 76
78 77 77
79 78 78
80 79 79
81 80 80
82 81 81
83 82 82
84 83 83
85 84 84
86 85 85
87 86 86
88 87 87
89 88 88
90 89 89
91 90 90
92 91 91
93 92 92
94 93 93
95 94 94
96 95 95
97 96 96
98 97 97
99 98 98
100 99 99
101 100 100