Score tests (#1847)

* update Python version on README; tuple typing fix

* Alaska tribal points fix (#1821)

* Bump mistune from 0.8.4 to 2.0.3 in /data/data-pipeline (#1777)

Bumps [mistune](https://github.com/lepture/mistune) from 0.8.4 to 2.0.3.
- [Release notes](https://github.com/lepture/mistune/releases)
- [Changelog](https://github.com/lepture/mistune/blob/master/docs/changes.rst)
- [Commits](https://github.com/lepture/mistune/compare/v0.8.4...v2.0.3)

---
updated-dependencies:
- dependency-name: mistune
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* poetry update

* initial pass of score tests

* add threshold tests

* added ses threshold (not donut, not island)

* testing suite -- stopping for the day

* added test for lead proxy indicator

* Refactor score tests to make them less verbose and more direct (#1865)

* Cleanup tests slightly before refactor (#1846)

* Refactor score calculations tests

* Feedback from review

* Refactor output tests like calculatoin tests (#1846) (#1870)

* Reorganize files (#1846)

* Switch from lru_cache to fixture scorpes (#1846)

* Add tests for all factors (#1846)

* Mark smoketests and run as part of be deply (#1846)

* Update renamed var (#1846)

* Switch from named tuple to dataclass (#1846)

This is annoying, but pylint in python3.8 was crashing parsing the named
tuple. We weren't using any namedtuple-specific features, so I made the
type a dataclass just to get pylint to behave.

* Add default timout to requests (#1846)

* Fix type (#1846)

* Fix merge mistake on poetry.lock (#1846)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Matt Bowen <83967628+mattbowen-usds@users.noreply.github.com>
Co-authored-by: matt bowen <matthew.r.bowen@omb.eop.gov>
This commit is contained in:
Emma Nechamkin 2022-08-26 15:23:20 -04:00 committed by GitHub
commit 1c4d3e4142
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 1425 additions and 29 deletions

View file

@ -52,3 +52,16 @@ def mock_etl(monkeypatch, mock_paths) -> None:
data_path, tmp_path = mock_paths
monkeypatch.setattr(ExtractTransformLoad, "DATA_PATH", data_path)
monkeypatch.setattr(ExtractTransformLoad, "TMP_PATH", tmp_path)
def pytest_collection_modifyitems(config, items):
keywordexpr = config.option.keyword
markexpr = config.option.markexpr
if keywordexpr or markexpr:
return # let pytest handle this
smoketest = "smoketest"
skip_mymarker = pytest.mark.skip(reason=f"{smoketest} not selected")
for item in items:
if smoketest in item.keywords:
item.add_marker(skip_mymarker)

View file

@ -0,0 +1,12 @@
import pandas as pd
import pytest
from data_pipeline.config import settings
from data_pipeline.score import field_names
@pytest.fixture(scope="session")
def final_score_df():
return pd.read_csv(
settings.APP_ROOT / "data" / "score" / "csv" / "full" / "usa.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)

View file

@ -0,0 +1,291 @@
# flake8: noqa: W0613,W0611,F811
from dataclasses import dataclass
import pytest
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger
from data_pipeline.score.score_narwhal import ScoreNarwhal
from .fixtures import final_score_df # pylint: disable=unused-import
logger = get_module_logger(__name__)
pytestmark = pytest.mark.smoketest
@dataclass
class PercentileTestConfig:
percentile_column_name: str
threshold_column_name: str
threshold: float
percentile_column_need_suffix: bool = True
@property
def full_percentile_column_name(self):
if self.percentile_column_need_suffix:
return (
self.percentile_column_name
+ field_names.PERCENTILE_FIELD_SUFFIX
)
return self.percentile_column_name
### TODO: we need to blow this out for all eight categories
def _check_percentile_against_threshold(df, config: PercentileTestConfig):
"""Note - for the purpose of testing, this fills with False"""
is_minimum_flagged_ok = (
df[df[config.threshold_column_name].fillna(False)][
config.full_percentile_column_name
].min()
>= config.threshold
)
is_maximum_not_flagged_ok = (
df[~df[config.threshold_column_name].fillna(False)][
config.full_percentile_column_name
].max()
< config.threshold
)
errors = []
if not is_minimum_flagged_ok:
errors.append(
f"For column {config.threshold_column_name}, there is someone flagged below {config.threshold} percentile!"
)
if not is_maximum_not_flagged_ok:
errors.append(
f"For column {config.threshold_column_name}, there is someone not flagged above {config.threshold} percentile!"
)
return errors
def test_percentile_columns(final_score_df):
low_income = PercentileTestConfig(
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED,
ScoreNarwhal.LOW_INCOME_THRESHOLD,
)
population_loss = PercentileTestConfig(
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
agricultural_loss = PercentileTestConfig(
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
building_loss = PercentileTestConfig(
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
flood = PercentileTestConfig(
field_names.FUTURE_FLOOD_RISK_FIELD,
field_names.HIGH_FUTURE_FLOOD_RISK_FIELD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
wildfire = PercentileTestConfig(
field_names.FUTURE_WILDFIRE_RISK_FIELD,
field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
low_high_school = PercentileTestConfig(
field_names.HIGH_SCHOOL_ED_FIELD,
field_names.LOW_HS_EDUCATION_FIELD,
ScoreNarwhal.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD,
percentile_column_need_suffix=False,
)
donut_hole_income = PercentileTestConfig(
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED_DONUTS,
ScoreNarwhal.LOW_INCOME_THRESHOLD_DONUT,
)
donut_hole_adjacency = PercentileTestConfig(
(field_names.SCORE_N_COMMUNITIES + field_names.ADJACENCY_INDEX_SUFFIX),
field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD,
ScoreNarwhal.SCORE_THRESHOLD_DONUT,
percentile_column_need_suffix=False,
)
diesel = PercentileTestConfig(
field_names.DIESEL_FIELD,
field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
dot_burden = PercentileTestConfig(
field_names.DOT_TRAVEL_BURDEN_FIELD,
field_names.DOT_BURDEN_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
traffic_proximity = PercentileTestConfig(
field_names.TRAFFIC_FIELD,
field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
energy_burden = PercentileTestConfig(
field_names.ENERGY_BURDEN_FIELD,
field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
pm25 = PercentileTestConfig(
field_names.PM25_FIELD,
field_names.PM25_EXCEEDS_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
kitchen_plumbing = PercentileTestConfig(
field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD,
field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
# Leadpaint is handled below in a separate method
housing = PercentileTestConfig(
field_names.HOUSING_BURDEN_FIELD,
field_names.HOUSING_BURDEN_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
non_natural_space = PercentileTestConfig(
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME,
field_names.NON_NATURAL_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
rmp = PercentileTestConfig(
field_names.RMP_FIELD,
field_names.RMP_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
npl = PercentileTestConfig(
field_names.NPL_FIELD,
field_names.NPL_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
tsdf = PercentileTestConfig(
field_names.TSDF_FIELD,
field_names.TSDF_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
wastewater = PercentileTestConfig(
field_names.WASTEWATER_FIELD,
field_names.WASTEWATER_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
ust = PercentileTestConfig(
field_names.UST_FIELD,
field_names.UST_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
diabetes = PercentileTestConfig(
field_names.DIABETES_FIELD,
field_names.DIABETES_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
asthma = PercentileTestConfig(
field_names.ASTHMA_FIELD,
field_names.ASTHMA_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
heart_disease = PercentileTestConfig(
field_names.HEART_DISEASE_FIELD,
field_names.HEART_DISEASE_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
low_life_expectancy = PercentileTestConfig(
field_names.LOW_LIFE_EXPECTANCY_FIELD,
field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
unemployment = PercentileTestConfig(
field_names.UNEMPLOYMENT_FIELD,
field_names.UNEMPLOYMENT_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
low_median_income = PercentileTestConfig(
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
linguist_isolation = PercentileTestConfig(
field_names.LINGUISTIC_ISO_FIELD,
field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
poverty = PercentileTestConfig(
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
field_names.POVERTY_PCTILE_THRESHOLD,
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
)
errors = []
for threshhold_config in (
low_income,
population_loss,
agricultural_loss,
building_loss,
flood,
wildfire,
low_high_school,
donut_hole_income,
donut_hole_adjacency,
dot_burden,
diesel,
traffic_proximity,
energy_burden,
pm25,
kitchen_plumbing,
housing,
non_natural_space,
rmp,
npl,
tsdf,
wastewater,
ust,
diabetes,
asthma,
heart_disease,
low_life_expectancy,
unemployment,
low_median_income,
linguist_isolation,
poverty,
):
errors.extend(
_check_percentile_against_threshold(
final_score_df, threshhold_config
)
)
error_text = "\n".join(errors)
assert not errors, error_text
def test_lead_paint_indicator(
final_score_df,
):
"""We need special logic here because this is a combined threshold, so we need this test to have two parts.
1. We construct our own threshold columns
2. We make sure it's the same as the threshold column in the dataframe
"""
lead_pfs = (
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
)
home_val_pfs = (
field_names.MEDIAN_HOUSE_VALUE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
)
combined_proxy_boolean = field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD
tmp_lead_threshold = (
final_score_df[lead_pfs] >= ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD
)
tmp_mhv_threshold = (
final_score_df[home_val_pfs]
<= ScoreNarwhal.MEDIAN_HOUSE_VALUE_THRESHOLD
)
true_combined_proxy = tmp_lead_threshold & tmp_mhv_threshold
assert (
tmp_mhv_threshold.sum() > 0
), "MHV threshold alone does not capture any homes"
assert final_score_df[combined_proxy_boolean].equals(
true_combined_proxy
), "Lead proxy calculated improperly"
assert (
tmp_lead_threshold.sum() > true_combined_proxy.sum()
), "House value is not further limiting this proxy"

View file

@ -0,0 +1,205 @@
# flake8: noqa: W0613,W0611,F811
from dataclasses import dataclass
from typing import List
import pytest
import pandas as pd
from data_pipeline.score import field_names
from .fixtures import final_score_df # pylint: disable=unused-import
pytestmark = pytest.mark.smoketest
def _helper_test_count_exceeding_threshold(df, col, error_check=1000):
"""Fills NA with False"""
return df[df[col].fillna(False)].shape[0] >= error_check
def _helper_single_threshold_test(df, col, socioeconomic_column, score_column):
"""Note that this fills nulls in the threshold column where nulls exist"""
nulls_dont_exist = (
df[df[col].fillna(False) & df[socioeconomic_column]][score_column]
.isna()
.sum()
== 0
)
only_trues = df[df[col].fillna(False) & df[socioeconomic_column]][
score_column
].min()
return nulls_dont_exist, only_trues
@dataclass
class ThresholdTestConfig:
name: str
threshhold_columns: List[str]
ses_column_name: str = field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
score_column_name: str = field_names.SCORE_N_COMMUNITIES
@property
def error_message(self):
return f"Eligibility columns have an error, {self.name}"
def check_for_threshhold_errors(
df: pd.DataFrame, config: ThresholdTestConfig
) -> List[str]:
errors = []
for col in config.threshhold_columns:
nulls_dont_exist, only_trues = _helper_single_threshold_test(
df,
col,
config.ses_column_name,
config.score_column_name,
)
proper_threshold_identification = (
_helper_test_count_exceeding_threshold(df, col)
)
if not nulls_dont_exist:
errors.append(
f"For {col}, threshold is not calculated right -- there are NaNs in Score"
)
if not only_trues:
errors.append(
f"For {col} and {config.ses_column_name}, threshold is not calculated right "
f"-- there are Falses where there should only be Trues"
)
if not proper_threshold_identification:
errors.append(
f"Threshold {col} returns too few tracts, are you sure it's nationally-representative?"
)
if errors:
errors.append(config.error_message)
return errors
def test_threshholds(final_score_df):
climate_thresholds = ThresholdTestConfig(
"climate",
[
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD,
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD,
field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD,
field_names.HIGH_FUTURE_FLOOD_RISK_FIELD,
field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD,
],
)
energy_thresholds = ThresholdTestConfig(
"energy",
[
field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD,
field_names.PM25_EXCEEDS_PCTILE_THRESHOLD,
],
)
transportation_thresholds = ThresholdTestConfig(
"transportation",
[
field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD,
field_names.DOT_BURDEN_PCTILE_THRESHOLD,
field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD,
],
)
housing_thresholds = ThresholdTestConfig(
"housing",
[
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD,
field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD,
field_names.HOUSING_BURDEN_PCTILE_THRESHOLD,
field_names.NON_NATURAL_PCTILE_THRESHOLD,
],
)
pollution_thresholds = ThresholdTestConfig(
"pollution",
[
field_names.RMP_PCTILE_THRESHOLD,
field_names.NPL_PCTILE_THRESHOLD,
field_names.TSDF_PCTILE_THRESHOLD,
field_names.AML_BOOLEAN,
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
],
)
water_thresholds = ThresholdTestConfig(
"water",
[
field_names.WASTEWATER_PCTILE_THRESHOLD,
field_names.UST_PCTILE_THRESHOLD,
],
)
health_thresholds = ThresholdTestConfig(
"health",
[
field_names.DIABETES_PCTILE_THRESHOLD,
field_names.ASTHMA_PCTILE_THRESHOLD,
field_names.HEART_DISEASE_PCTILE_THRESHOLD,
field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD,
],
)
workforce_base_thresholds = ThresholdTestConfig(
"workforce (not island areas)",
[
field_names.UNEMPLOYMENT_PCTILE_THRESHOLD,
field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD,
field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD,
field_names.POVERTY_PCTILE_THRESHOLD,
],
ses_column_name=field_names.LOW_HS_EDUCATION_FIELD,
)
errors = []
for threshhold_config in [
climate_thresholds,
energy_thresholds,
transportation_thresholds,
housing_thresholds,
pollution_thresholds,
water_thresholds,
health_thresholds,
workforce_base_thresholds,
]:
errors.extend(
check_for_threshhold_errors(final_score_df, threshhold_config)
)
error_text = "\n".join(errors)
assert not errors, error_text
def test_max_40_percent_DAC(final_score_df):
score_col_with_donuts = field_names.FINAL_SCORE_N_BOOLEAN
total_population_col = field_names.TOTAL_POP_FIELD
assert (
final_score_df[score_col_with_donuts].isna().sum() == 0
), f"Error: {score_col_with_donuts} contains NULLs"
assert (
final_score_df[final_score_df[score_col_with_donuts]][
total_population_col
].sum()
/ final_score_df[total_population_col].sum()
) < 0.4, "Error: the scoring methodology identifies >40% of people in the US as disadvantaged"
assert (
final_score_df[score_col_with_donuts].sum() > 0
), "FYI: You've identified no tracts at all!"
def test_donut_hole_addition_to_score_n(final_score_df):
score_col_with_donuts = field_names.FINAL_SCORE_N_BOOLEAN
score_col = field_names.SCORE_N_COMMUNITIES
donut_hole_score_only = (
field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX
)
count_donuts = final_score_df[donut_hole_score_only].sum()
count_n = final_score_df[score_col].sum()
count_n_with_donuts = final_score_df[score_col_with_donuts].sum()
new_donuts = final_score_df[
final_score_df[donut_hole_score_only] & ~final_score_df[score_col]
].shape[0]
assert (
new_donuts + count_n == count_n_with_donuts
), "The math doesn't work! The number of new donut hole tracts plus score tracts (base) does not equal the total number of tracts identified"
assert (
count_donuts < count_n
), "There are more donut hole tracts than base tracts. How can it be?"
assert (
new_donuts > 0
), "FYI: The adjacency index is doing nothing. Consider removing it?"