mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-06 15:14:18 -07:00
Score tests (#1847)
* update Python version on README; tuple typing fix * Alaska tribal points fix (#1821) * Bump mistune from 0.8.4 to 2.0.3 in /data/data-pipeline (#1777) Bumps [mistune](https://github.com/lepture/mistune) from 0.8.4 to 2.0.3. - [Release notes](https://github.com/lepture/mistune/releases) - [Changelog](https://github.com/lepture/mistune/blob/master/docs/changes.rst) - [Commits](https://github.com/lepture/mistune/compare/v0.8.4...v2.0.3) --- updated-dependencies: - dependency-name: mistune dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * poetry update * initial pass of score tests * add threshold tests * added ses threshold (not donut, not island) * testing suite -- stopping for the day * added test for lead proxy indicator * Refactor score tests to make them less verbose and more direct (#1865) * Cleanup tests slightly before refactor (#1846) * Refactor score calculations tests * Feedback from review * Refactor output tests like calculatoin tests (#1846) (#1870) * Reorganize files (#1846) * Switch from lru_cache to fixture scorpes (#1846) * Add tests for all factors (#1846) * Mark smoketests and run as part of be deply (#1846) * Update renamed var (#1846) * Switch from named tuple to dataclass (#1846) This is annoying, but pylint in python3.8 was crashing parsing the named tuple. We weren't using any namedtuple-specific features, so I made the type a dataclass just to get pylint to behave. * Add default timout to requests (#1846) * Fix type (#1846) * Fix merge mistake on poetry.lock (#1846) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov> Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Matt Bowen <83967628+mattbowen-usds@users.noreply.github.com> Co-authored-by: matt bowen <matthew.r.bowen@omb.eop.gov>
This commit is contained in:
parent
e539db86ab
commit
1c4d3e4142
19 changed files with 1425 additions and 29 deletions
|
@ -52,3 +52,16 @@ def mock_etl(monkeypatch, mock_paths) -> None:
|
|||
data_path, tmp_path = mock_paths
|
||||
monkeypatch.setattr(ExtractTransformLoad, "DATA_PATH", data_path)
|
||||
monkeypatch.setattr(ExtractTransformLoad, "TMP_PATH", tmp_path)
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config, items):
|
||||
keywordexpr = config.option.keyword
|
||||
markexpr = config.option.markexpr
|
||||
if keywordexpr or markexpr:
|
||||
return # let pytest handle this
|
||||
|
||||
smoketest = "smoketest"
|
||||
skip_mymarker = pytest.mark.skip(reason=f"{smoketest} not selected")
|
||||
for item in items:
|
||||
if smoketest in item.keywords:
|
||||
item.add_marker(skip_mymarker)
|
||||
|
|
12
data/data-pipeline/data_pipeline/tests/score/fixtures.py
Normal file
12
data/data-pipeline/data_pipeline/tests/score/fixtures.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
import pandas as pd
|
||||
import pytest
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def final_score_df():
|
||||
return pd.read_csv(
|
||||
settings.APP_ROOT / "data" / "score" / "csv" / "full" / "usa.csv",
|
||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||
)
|
291
data/data-pipeline/data_pipeline/tests/score/test_calculation.py
Normal file
291
data/data-pipeline/data_pipeline/tests/score/test_calculation.py
Normal file
|
@ -0,0 +1,291 @@
|
|||
# flake8: noqa: W0613,W0611,F811
|
||||
from dataclasses import dataclass
|
||||
import pytest
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.utils import get_module_logger
|
||||
from data_pipeline.score.score_narwhal import ScoreNarwhal
|
||||
from .fixtures import final_score_df # pylint: disable=unused-import
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
pytestmark = pytest.mark.smoketest
|
||||
|
||||
|
||||
@dataclass
|
||||
class PercentileTestConfig:
|
||||
percentile_column_name: str
|
||||
threshold_column_name: str
|
||||
threshold: float
|
||||
percentile_column_need_suffix: bool = True
|
||||
|
||||
@property
|
||||
def full_percentile_column_name(self):
|
||||
if self.percentile_column_need_suffix:
|
||||
return (
|
||||
self.percentile_column_name
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
)
|
||||
return self.percentile_column_name
|
||||
|
||||
|
||||
### TODO: we need to blow this out for all eight categories
|
||||
def _check_percentile_against_threshold(df, config: PercentileTestConfig):
|
||||
"""Note - for the purpose of testing, this fills with False"""
|
||||
is_minimum_flagged_ok = (
|
||||
df[df[config.threshold_column_name].fillna(False)][
|
||||
config.full_percentile_column_name
|
||||
].min()
|
||||
>= config.threshold
|
||||
)
|
||||
|
||||
is_maximum_not_flagged_ok = (
|
||||
df[~df[config.threshold_column_name].fillna(False)][
|
||||
config.full_percentile_column_name
|
||||
].max()
|
||||
< config.threshold
|
||||
)
|
||||
errors = []
|
||||
if not is_minimum_flagged_ok:
|
||||
errors.append(
|
||||
f"For column {config.threshold_column_name}, there is someone flagged below {config.threshold} percentile!"
|
||||
)
|
||||
if not is_maximum_not_flagged_ok:
|
||||
errors.append(
|
||||
f"For column {config.threshold_column_name}, there is someone not flagged above {config.threshold} percentile!"
|
||||
)
|
||||
return errors
|
||||
|
||||
|
||||
def test_percentile_columns(final_score_df):
|
||||
low_income = PercentileTestConfig(
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED,
|
||||
ScoreNarwhal.LOW_INCOME_THRESHOLD,
|
||||
)
|
||||
population_loss = PercentileTestConfig(
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
agricultural_loss = PercentileTestConfig(
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
building_loss = PercentileTestConfig(
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
flood = PercentileTestConfig(
|
||||
field_names.FUTURE_FLOOD_RISK_FIELD,
|
||||
field_names.HIGH_FUTURE_FLOOD_RISK_FIELD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
wildfire = PercentileTestConfig(
|
||||
field_names.FUTURE_WILDFIRE_RISK_FIELD,
|
||||
field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
low_high_school = PercentileTestConfig(
|
||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
field_names.LOW_HS_EDUCATION_FIELD,
|
||||
ScoreNarwhal.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD,
|
||||
percentile_column_need_suffix=False,
|
||||
)
|
||||
donut_hole_income = PercentileTestConfig(
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED_DONUTS,
|
||||
ScoreNarwhal.LOW_INCOME_THRESHOLD_DONUT,
|
||||
)
|
||||
donut_hole_adjacency = PercentileTestConfig(
|
||||
(field_names.SCORE_N_COMMUNITIES + field_names.ADJACENCY_INDEX_SUFFIX),
|
||||
field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD,
|
||||
ScoreNarwhal.SCORE_THRESHOLD_DONUT,
|
||||
percentile_column_need_suffix=False,
|
||||
)
|
||||
diesel = PercentileTestConfig(
|
||||
field_names.DIESEL_FIELD,
|
||||
field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
dot_burden = PercentileTestConfig(
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD,
|
||||
field_names.DOT_BURDEN_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
traffic_proximity = PercentileTestConfig(
|
||||
field_names.TRAFFIC_FIELD,
|
||||
field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
energy_burden = PercentileTestConfig(
|
||||
field_names.ENERGY_BURDEN_FIELD,
|
||||
field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
pm25 = PercentileTestConfig(
|
||||
field_names.PM25_FIELD,
|
||||
field_names.PM25_EXCEEDS_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
kitchen_plumbing = PercentileTestConfig(
|
||||
field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD,
|
||||
field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
# Leadpaint is handled below in a separate method
|
||||
housing = PercentileTestConfig(
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.HOUSING_BURDEN_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
non_natural_space = PercentileTestConfig(
|
||||
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME,
|
||||
field_names.NON_NATURAL_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
rmp = PercentileTestConfig(
|
||||
field_names.RMP_FIELD,
|
||||
field_names.RMP_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
npl = PercentileTestConfig(
|
||||
field_names.NPL_FIELD,
|
||||
field_names.NPL_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
tsdf = PercentileTestConfig(
|
||||
field_names.TSDF_FIELD,
|
||||
field_names.TSDF_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
wastewater = PercentileTestConfig(
|
||||
field_names.WASTEWATER_FIELD,
|
||||
field_names.WASTEWATER_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
ust = PercentileTestConfig(
|
||||
field_names.UST_FIELD,
|
||||
field_names.UST_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
diabetes = PercentileTestConfig(
|
||||
field_names.DIABETES_FIELD,
|
||||
field_names.DIABETES_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
asthma = PercentileTestConfig(
|
||||
field_names.ASTHMA_FIELD,
|
||||
field_names.ASTHMA_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
heart_disease = PercentileTestConfig(
|
||||
field_names.HEART_DISEASE_FIELD,
|
||||
field_names.HEART_DISEASE_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
low_life_expectancy = PercentileTestConfig(
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
unemployment = PercentileTestConfig(
|
||||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.UNEMPLOYMENT_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
low_median_income = PercentileTestConfig(
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
linguist_isolation = PercentileTestConfig(
|
||||
field_names.LINGUISTIC_ISO_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
poverty = PercentileTestConfig(
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
|
||||
field_names.POVERTY_PCTILE_THRESHOLD,
|
||||
ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
errors = []
|
||||
for threshhold_config in (
|
||||
low_income,
|
||||
population_loss,
|
||||
agricultural_loss,
|
||||
building_loss,
|
||||
flood,
|
||||
wildfire,
|
||||
low_high_school,
|
||||
donut_hole_income,
|
||||
donut_hole_adjacency,
|
||||
dot_burden,
|
||||
diesel,
|
||||
traffic_proximity,
|
||||
energy_burden,
|
||||
pm25,
|
||||
kitchen_plumbing,
|
||||
housing,
|
||||
non_natural_space,
|
||||
rmp,
|
||||
npl,
|
||||
tsdf,
|
||||
wastewater,
|
||||
ust,
|
||||
diabetes,
|
||||
asthma,
|
||||
heart_disease,
|
||||
low_life_expectancy,
|
||||
unemployment,
|
||||
low_median_income,
|
||||
linguist_isolation,
|
||||
poverty,
|
||||
):
|
||||
errors.extend(
|
||||
_check_percentile_against_threshold(
|
||||
final_score_df, threshhold_config
|
||||
)
|
||||
)
|
||||
error_text = "\n".join(errors)
|
||||
assert not errors, error_text
|
||||
|
||||
|
||||
def test_lead_paint_indicator(
|
||||
final_score_df,
|
||||
):
|
||||
"""We need special logic here because this is a combined threshold, so we need this test to have two parts.
|
||||
|
||||
1. We construct our own threshold columns
|
||||
2. We make sure it's the same as the threshold column in the dataframe
|
||||
"""
|
||||
lead_pfs = (
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
)
|
||||
home_val_pfs = (
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
)
|
||||
combined_proxy_boolean = field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD
|
||||
|
||||
tmp_lead_threshold = (
|
||||
final_score_df[lead_pfs] >= ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
tmp_mhv_threshold = (
|
||||
final_score_df[home_val_pfs]
|
||||
<= ScoreNarwhal.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
|
||||
true_combined_proxy = tmp_lead_threshold & tmp_mhv_threshold
|
||||
|
||||
assert (
|
||||
tmp_mhv_threshold.sum() > 0
|
||||
), "MHV threshold alone does not capture any homes"
|
||||
|
||||
assert final_score_df[combined_proxy_boolean].equals(
|
||||
true_combined_proxy
|
||||
), "Lead proxy calculated improperly"
|
||||
assert (
|
||||
tmp_lead_threshold.sum() > true_combined_proxy.sum()
|
||||
), "House value is not further limiting this proxy"
|
205
data/data-pipeline/data_pipeline/tests/score/test_output.py
Normal file
205
data/data-pipeline/data_pipeline/tests/score/test_output.py
Normal file
|
@ -0,0 +1,205 @@
|
|||
# flake8: noqa: W0613,W0611,F811
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
import pytest
|
||||
import pandas as pd
|
||||
from data_pipeline.score import field_names
|
||||
from .fixtures import final_score_df # pylint: disable=unused-import
|
||||
|
||||
pytestmark = pytest.mark.smoketest
|
||||
|
||||
|
||||
def _helper_test_count_exceeding_threshold(df, col, error_check=1000):
|
||||
"""Fills NA with False"""
|
||||
return df[df[col].fillna(False)].shape[0] >= error_check
|
||||
|
||||
|
||||
def _helper_single_threshold_test(df, col, socioeconomic_column, score_column):
|
||||
"""Note that this fills nulls in the threshold column where nulls exist"""
|
||||
nulls_dont_exist = (
|
||||
df[df[col].fillna(False) & df[socioeconomic_column]][score_column]
|
||||
.isna()
|
||||
.sum()
|
||||
== 0
|
||||
)
|
||||
only_trues = df[df[col].fillna(False) & df[socioeconomic_column]][
|
||||
score_column
|
||||
].min()
|
||||
return nulls_dont_exist, only_trues
|
||||
|
||||
|
||||
@dataclass
|
||||
class ThresholdTestConfig:
|
||||
name: str
|
||||
threshhold_columns: List[str]
|
||||
ses_column_name: str = field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
|
||||
score_column_name: str = field_names.SCORE_N_COMMUNITIES
|
||||
|
||||
@property
|
||||
def error_message(self):
|
||||
return f"Eligibility columns have an error, {self.name}"
|
||||
|
||||
|
||||
def check_for_threshhold_errors(
|
||||
df: pd.DataFrame, config: ThresholdTestConfig
|
||||
) -> List[str]:
|
||||
errors = []
|
||||
for col in config.threshhold_columns:
|
||||
nulls_dont_exist, only_trues = _helper_single_threshold_test(
|
||||
df,
|
||||
col,
|
||||
config.ses_column_name,
|
||||
config.score_column_name,
|
||||
)
|
||||
proper_threshold_identification = (
|
||||
_helper_test_count_exceeding_threshold(df, col)
|
||||
)
|
||||
if not nulls_dont_exist:
|
||||
errors.append(
|
||||
f"For {col}, threshold is not calculated right -- there are NaNs in Score"
|
||||
)
|
||||
if not only_trues:
|
||||
errors.append(
|
||||
f"For {col} and {config.ses_column_name}, threshold is not calculated right "
|
||||
f"-- there are Falses where there should only be Trues"
|
||||
)
|
||||
if not proper_threshold_identification:
|
||||
errors.append(
|
||||
f"Threshold {col} returns too few tracts, are you sure it's nationally-representative?"
|
||||
)
|
||||
if errors:
|
||||
errors.append(config.error_message)
|
||||
return errors
|
||||
|
||||
|
||||
def test_threshholds(final_score_df):
|
||||
climate_thresholds = ThresholdTestConfig(
|
||||
"climate",
|
||||
[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD,
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD,
|
||||
field_names.HIGH_FUTURE_FLOOD_RISK_FIELD,
|
||||
field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD,
|
||||
],
|
||||
)
|
||||
energy_thresholds = ThresholdTestConfig(
|
||||
"energy",
|
||||
[
|
||||
field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD,
|
||||
field_names.PM25_EXCEEDS_PCTILE_THRESHOLD,
|
||||
],
|
||||
)
|
||||
transportation_thresholds = ThresholdTestConfig(
|
||||
"transportation",
|
||||
[
|
||||
field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD,
|
||||
field_names.DOT_BURDEN_PCTILE_THRESHOLD,
|
||||
field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD,
|
||||
],
|
||||
)
|
||||
housing_thresholds = ThresholdTestConfig(
|
||||
"housing",
|
||||
[
|
||||
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
|
||||
field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD,
|
||||
field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD,
|
||||
field_names.HOUSING_BURDEN_PCTILE_THRESHOLD,
|
||||
field_names.NON_NATURAL_PCTILE_THRESHOLD,
|
||||
],
|
||||
)
|
||||
pollution_thresholds = ThresholdTestConfig(
|
||||
"pollution",
|
||||
[
|
||||
field_names.RMP_PCTILE_THRESHOLD,
|
||||
field_names.NPL_PCTILE_THRESHOLD,
|
||||
field_names.TSDF_PCTILE_THRESHOLD,
|
||||
field_names.AML_BOOLEAN,
|
||||
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||
],
|
||||
)
|
||||
water_thresholds = ThresholdTestConfig(
|
||||
"water",
|
||||
[
|
||||
field_names.WASTEWATER_PCTILE_THRESHOLD,
|
||||
field_names.UST_PCTILE_THRESHOLD,
|
||||
],
|
||||
)
|
||||
health_thresholds = ThresholdTestConfig(
|
||||
"health",
|
||||
[
|
||||
field_names.DIABETES_PCTILE_THRESHOLD,
|
||||
field_names.ASTHMA_PCTILE_THRESHOLD,
|
||||
field_names.HEART_DISEASE_PCTILE_THRESHOLD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD,
|
||||
],
|
||||
)
|
||||
workforce_base_thresholds = ThresholdTestConfig(
|
||||
"workforce (not island areas)",
|
||||
[
|
||||
field_names.UNEMPLOYMENT_PCTILE_THRESHOLD,
|
||||
field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD,
|
||||
field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD,
|
||||
field_names.POVERTY_PCTILE_THRESHOLD,
|
||||
],
|
||||
ses_column_name=field_names.LOW_HS_EDUCATION_FIELD,
|
||||
)
|
||||
errors = []
|
||||
for threshhold_config in [
|
||||
climate_thresholds,
|
||||
energy_thresholds,
|
||||
transportation_thresholds,
|
||||
housing_thresholds,
|
||||
pollution_thresholds,
|
||||
water_thresholds,
|
||||
health_thresholds,
|
||||
workforce_base_thresholds,
|
||||
]:
|
||||
errors.extend(
|
||||
check_for_threshhold_errors(final_score_df, threshhold_config)
|
||||
)
|
||||
error_text = "\n".join(errors)
|
||||
assert not errors, error_text
|
||||
|
||||
|
||||
def test_max_40_percent_DAC(final_score_df):
|
||||
score_col_with_donuts = field_names.FINAL_SCORE_N_BOOLEAN
|
||||
total_population_col = field_names.TOTAL_POP_FIELD
|
||||
assert (
|
||||
final_score_df[score_col_with_donuts].isna().sum() == 0
|
||||
), f"Error: {score_col_with_donuts} contains NULLs"
|
||||
assert (
|
||||
final_score_df[final_score_df[score_col_with_donuts]][
|
||||
total_population_col
|
||||
].sum()
|
||||
/ final_score_df[total_population_col].sum()
|
||||
) < 0.4, "Error: the scoring methodology identifies >40% of people in the US as disadvantaged"
|
||||
assert (
|
||||
final_score_df[score_col_with_donuts].sum() > 0
|
||||
), "FYI: You've identified no tracts at all!"
|
||||
|
||||
|
||||
def test_donut_hole_addition_to_score_n(final_score_df):
|
||||
score_col_with_donuts = field_names.FINAL_SCORE_N_BOOLEAN
|
||||
score_col = field_names.SCORE_N_COMMUNITIES
|
||||
donut_hole_score_only = (
|
||||
field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX
|
||||
)
|
||||
count_donuts = final_score_df[donut_hole_score_only].sum()
|
||||
count_n = final_score_df[score_col].sum()
|
||||
count_n_with_donuts = final_score_df[score_col_with_donuts].sum()
|
||||
new_donuts = final_score_df[
|
||||
final_score_df[donut_hole_score_only] & ~final_score_df[score_col]
|
||||
].shape[0]
|
||||
|
||||
assert (
|
||||
new_donuts + count_n == count_n_with_donuts
|
||||
), "The math doesn't work! The number of new donut hole tracts plus score tracts (base) does not equal the total number of tracts identified"
|
||||
|
||||
assert (
|
||||
count_donuts < count_n
|
||||
), "There are more donut hole tracts than base tracts. How can it be?"
|
||||
|
||||
assert (
|
||||
new_donuts > 0
|
||||
), "FYI: The adjacency index is doing nothing. Consider removing it?"
|
Loading…
Add table
Add a link
Reference in a new issue