Score tests (#1847)

* update Python version on README; tuple typing fix * Alaska tribal points fix (#1821) * Bump mistune from 0.8.4 to 2.0.3 in /data/data-pipeline (#1777) Bumps [mistune](https://github.com/lepture/mistune) from 0.8.4 to 2.0.3. - [Release notes](https://github.com/lepture/mistune/releases) - [Changelog](https://github.com/lepture/mistune/blob/master/docs/changes.rst) - [Commits](https://github.com/lepture/mistune/compare/v0.8.4...v2.0.3) --- updated-dependencies: - dependency-name: mistune dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * poetry update * initial pass of score tests * add threshold tests * added ses threshold (not donut, not island) * testing suite -- stopping for the day * added test for lead proxy indicator * Refactor score tests to make them less verbose and more direct (#1865) * Cleanup tests slightly before refactor (#1846) * Refactor score calculations tests * Feedback from review * Refactor output tests like calculatoin tests (#1846) (#1870) * Reorganize files (#1846) * Switch from lru_cache to fixture scorpes (#1846) * Add tests for all factors (#1846) * Mark smoketests and run as part of be deply (#1846) * Update renamed var (#1846) * Switch from named tuple to dataclass (#1846) This is annoying, but pylint in python3.8 was crashing parsing the named tuple. We weren't using any namedtuple-specific features, so I made the type a dataclass just to get pylint to behave. * Add default timout to requests (#1846) * Fix type (#1846) * Fix merge mistake on poetry.lock (#1846) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov> Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Matt Bowen <83967628+mattbowen-usds@users.noreply.github.com> Co-authored-by: matt bowen <matthew.r.bowen@omb.eop.gov>
2025-09-30 09:13:17 -07:00 · 2022-08-26 15:23:20 -04:00 · 2022-08-26 15:23:20 -04:00 · 1c4d3e4142
commit 1c4d3e4142
parent e539db86ab
19 changed files with 1425 additions and 29 deletions
--- a/data/data-pipeline/data_pipeline/tests/conftest.py
+++ b/data/data-pipeline/data_pipeline/tests/conftest.py
@ -52,3 +52,16 @@ def mock_etl(monkeypatch, mock_paths) -> None:
    data_path, tmp_path = mock_paths
    monkeypatch.setattr(ExtractTransformLoad, "DATA_PATH", data_path)
    monkeypatch.setattr(ExtractTransformLoad, "TMP_PATH", tmp_path)
+
+
+def pytest_collection_modifyitems(config, items):
+    keywordexpr = config.option.keyword
+    markexpr = config.option.markexpr
+    if keywordexpr or markexpr:
+        return  # let pytest handle this
+
+    smoketest = "smoketest"
+    skip_mymarker = pytest.mark.skip(reason=f"{smoketest} not selected")
+    for item in items:
+        if smoketest in item.keywords:
+            item.add_marker(skip_mymarker)
--- a/data/data-pipeline/data_pipeline/tests/score/fixtures.py
+++ b/data/data-pipeline/data_pipeline/tests/score/fixtures.py
@ -0,0 +1,12 @@
+import pandas as pd
+import pytest
+from data_pipeline.config import settings
+from data_pipeline.score import field_names
+
+
+@pytest.fixture(scope="session")
+def final_score_df():
+    return pd.read_csv(
+        settings.APP_ROOT / "data" / "score" / "csv" / "full" / "usa.csv",
+        dtype={field_names.GEOID_TRACT_FIELD: str},
+    )
--- a/data/data-pipeline/data_pipeline/tests/score/test_calculation.py
+++ b/data/data-pipeline/data_pipeline/tests/score/test_calculation.py
@ -0,0 +1,291 @@
+# flake8: noqa: W0613,W0611,F811
+from dataclasses import dataclass
+import pytest
+from data_pipeline.score import field_names
+from data_pipeline.utils import get_module_logger
+from data_pipeline.score.score_narwhal import ScoreNarwhal
+from .fixtures import final_score_df  # pylint: disable=unused-import
+
+logger = get_module_logger(__name__)
+
+pytestmark = pytest.mark.smoketest
+
+
+@dataclass
+class PercentileTestConfig:
+    percentile_column_name: str
+    threshold_column_name: str
+    threshold: float
+    percentile_column_need_suffix: bool = True
+
+    @property
+    def full_percentile_column_name(self):
+        if self.percentile_column_need_suffix:
+            return (
+                self.percentile_column_name
+                + field_names.PERCENTILE_FIELD_SUFFIX
+            )
+        return self.percentile_column_name
+
+
+### TODO: we need to blow this out for all eight categories
+def _check_percentile_against_threshold(df, config: PercentileTestConfig):
+    """Note - for the purpose of testing, this fills with False"""
+    is_minimum_flagged_ok = (
+        df[df[config.threshold_column_name].fillna(False)][
+            config.full_percentile_column_name
+        ].min()
+        >= config.threshold
+    )
+
+    is_maximum_not_flagged_ok = (
+        df[~df[config.threshold_column_name].fillna(False)][
+            config.full_percentile_column_name
+        ].max()
+        < config.threshold
+    )
+    errors = []
+    if not is_minimum_flagged_ok:
+        errors.append(
+            f"For column {config.threshold_column_name}, there is someone flagged below {config.threshold} percentile!"
+        )
+    if not is_maximum_not_flagged_ok:
+        errors.append(
+            f"For column {config.threshold_column_name}, there is someone not flagged above {config.threshold} percentile!"
+        )
+    return errors
+
+
+def test_percentile_columns(final_score_df):
+    low_income = PercentileTestConfig(
+        field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
+        field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED,
+        ScoreNarwhal.LOW_INCOME_THRESHOLD,
+    )
+    population_loss = PercentileTestConfig(
+        field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
+        field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    agricultural_loss = PercentileTestConfig(
+        field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
+        field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    building_loss = PercentileTestConfig(
+        field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
+        field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    flood = PercentileTestConfig(
+        field_names.FUTURE_FLOOD_RISK_FIELD,
+        field_names.HIGH_FUTURE_FLOOD_RISK_FIELD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    wildfire = PercentileTestConfig(
+        field_names.FUTURE_WILDFIRE_RISK_FIELD,
+        field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    low_high_school = PercentileTestConfig(
+        field_names.HIGH_SCHOOL_ED_FIELD,
+        field_names.LOW_HS_EDUCATION_FIELD,
+        ScoreNarwhal.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD,
+        percentile_column_need_suffix=False,
+    )
+    donut_hole_income = PercentileTestConfig(
+        field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
+        field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED_DONUTS,
+        ScoreNarwhal.LOW_INCOME_THRESHOLD_DONUT,
+    )
+    donut_hole_adjacency = PercentileTestConfig(
+        (field_names.SCORE_N_COMMUNITIES + field_names.ADJACENCY_INDEX_SUFFIX),
+        field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD,
+        ScoreNarwhal.SCORE_THRESHOLD_DONUT,
+        percentile_column_need_suffix=False,
+    )
+    diesel = PercentileTestConfig(
+        field_names.DIESEL_FIELD,
+        field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    dot_burden = PercentileTestConfig(
+        field_names.DOT_TRAVEL_BURDEN_FIELD,
+        field_names.DOT_BURDEN_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    traffic_proximity = PercentileTestConfig(
+        field_names.TRAFFIC_FIELD,
+        field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    energy_burden = PercentileTestConfig(
+        field_names.ENERGY_BURDEN_FIELD,
+        field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    pm25 = PercentileTestConfig(
+        field_names.PM25_FIELD,
+        field_names.PM25_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    kitchen_plumbing = PercentileTestConfig(
+        field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD,
+        field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    # Leadpaint is handled below in a separate method
+    housing = PercentileTestConfig(
+        field_names.HOUSING_BURDEN_FIELD,
+        field_names.HOUSING_BURDEN_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    non_natural_space = PercentileTestConfig(
+        field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME,
+        field_names.NON_NATURAL_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    rmp = PercentileTestConfig(
+        field_names.RMP_FIELD,
+        field_names.RMP_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    npl = PercentileTestConfig(
+        field_names.NPL_FIELD,
+        field_names.NPL_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    tsdf = PercentileTestConfig(
+        field_names.TSDF_FIELD,
+        field_names.TSDF_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    wastewater = PercentileTestConfig(
+        field_names.WASTEWATER_FIELD,
+        field_names.WASTEWATER_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    ust = PercentileTestConfig(
+        field_names.UST_FIELD,
+        field_names.UST_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    diabetes = PercentileTestConfig(
+        field_names.DIABETES_FIELD,
+        field_names.DIABETES_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    asthma = PercentileTestConfig(
+        field_names.ASTHMA_FIELD,
+        field_names.ASTHMA_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    heart_disease = PercentileTestConfig(
+        field_names.HEART_DISEASE_FIELD,
+        field_names.HEART_DISEASE_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    low_life_expectancy = PercentileTestConfig(
+        field_names.LOW_LIFE_EXPECTANCY_FIELD,
+        field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    unemployment = PercentileTestConfig(
+        field_names.UNEMPLOYMENT_FIELD,
+        field_names.UNEMPLOYMENT_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    low_median_income = PercentileTestConfig(
+        field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
+        field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    linguist_isolation = PercentileTestConfig(
+        field_names.LINGUISTIC_ISO_FIELD,
+        field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    poverty = PercentileTestConfig(
+        field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
+        field_names.POVERTY_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    errors = []
+    for threshhold_config in (
+        low_income,
+        population_loss,
+        agricultural_loss,
+        building_loss,
+        flood,
+        wildfire,
+        low_high_school,
+        donut_hole_income,
+        donut_hole_adjacency,
+        dot_burden,
+        diesel,
+        traffic_proximity,
+        energy_burden,
+        pm25,
+        kitchen_plumbing,
+        housing,
+        non_natural_space,
+        rmp,
+        npl,
+        tsdf,
+        wastewater,
+        ust,
+        diabetes,
+        asthma,
+        heart_disease,
+        low_life_expectancy,
+        unemployment,
+        low_median_income,
+        linguist_isolation,
+        poverty,
+    ):
+        errors.extend(
+            _check_percentile_against_threshold(
+                final_score_df, threshhold_config
+            )
+        )
+    error_text = "\n".join(errors)
+    assert not errors, error_text
+
+
+def test_lead_paint_indicator(
+    final_score_df,
+):
+    """We need special logic here because this is a combined threshold, so we need this test to have two parts.
+
+    1. We construct our own threshold columns
+    2. We make sure it's the same as the threshold column in the dataframe
+    """
+    lead_pfs = (
+        field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
+    )
+    home_val_pfs = (
+        field_names.MEDIAN_HOUSE_VALUE_FIELD
+        + field_names.PERCENTILE_FIELD_SUFFIX
+    )
+    combined_proxy_boolean = field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD
+
+    tmp_lead_threshold = (
+        final_score_df[lead_pfs] >= ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD
+    )
+    tmp_mhv_threshold = (
+        final_score_df[home_val_pfs]
+        <= ScoreNarwhal.MEDIAN_HOUSE_VALUE_THRESHOLD
+    )
+
+    true_combined_proxy = tmp_lead_threshold & tmp_mhv_threshold
+
+    assert (
+        tmp_mhv_threshold.sum() > 0
+    ), "MHV threshold alone does not capture any homes"
+
+    assert final_score_df[combined_proxy_boolean].equals(
+        true_combined_proxy
+    ), "Lead proxy calculated improperly"
+    assert (
+        tmp_lead_threshold.sum() > true_combined_proxy.sum()
+    ), "House value is not further limiting this proxy"
--- a/data/data-pipeline/data_pipeline/tests/score/test_output.py
+++ b/data/data-pipeline/data_pipeline/tests/score/test_output.py
@ -0,0 +1,205 @@
+# flake8: noqa: W0613,W0611,F811
+from dataclasses import dataclass
+from typing import List
+import pytest
+import pandas as pd
+from data_pipeline.score import field_names
+from .fixtures import final_score_df  # pylint: disable=unused-import
+
+pytestmark = pytest.mark.smoketest
+
+
+def _helper_test_count_exceeding_threshold(df, col, error_check=1000):
+    """Fills NA with False"""
+    return df[df[col].fillna(False)].shape[0] >= error_check
+
+
+def _helper_single_threshold_test(df, col, socioeconomic_column, score_column):
+    """Note that this fills nulls in the threshold column where nulls exist"""
+    nulls_dont_exist = (
+        df[df[col].fillna(False) & df[socioeconomic_column]][score_column]
+        .isna()
+        .sum()
+        == 0
+    )
+    only_trues = df[df[col].fillna(False) & df[socioeconomic_column]][
+        score_column
+    ].min()
+    return nulls_dont_exist, only_trues
+
+
+@dataclass
+class ThresholdTestConfig:
+    name: str
+    threshhold_columns: List[str]
+    ses_column_name: str = field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
+    score_column_name: str = field_names.SCORE_N_COMMUNITIES
+
+    @property
+    def error_message(self):
+        return f"Eligibility columns have an error, {self.name}"
+
+
+def check_for_threshhold_errors(
+    df: pd.DataFrame, config: ThresholdTestConfig
+) -> List[str]:
+    errors = []
+    for col in config.threshhold_columns:
+        nulls_dont_exist, only_trues = _helper_single_threshold_test(
+            df,
+            col,
+            config.ses_column_name,
+            config.score_column_name,
+        )
+        proper_threshold_identification = (
+            _helper_test_count_exceeding_threshold(df, col)
+        )
+        if not nulls_dont_exist:
+            errors.append(
+                f"For {col}, threshold is not calculated right -- there are NaNs in Score"
+            )
+        if not only_trues:
+            errors.append(
+                f"For {col} and {config.ses_column_name}, threshold is not calculated right "
+                f"-- there are Falses where there should only be Trues"
+            )
+        if not proper_threshold_identification:
+            errors.append(
+                f"Threshold {col} returns too few tracts, are you sure it's nationally-representative?"
+            )
+    if errors:
+        errors.append(config.error_message)
+    return errors
+
+
+def test_threshholds(final_score_df):
+    climate_thresholds = ThresholdTestConfig(
+        "climate",
+        [
+            field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.HIGH_FUTURE_FLOOD_RISK_FIELD,
+            field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD,
+        ],
+    )
+    energy_thresholds = ThresholdTestConfig(
+        "energy",
+        [
+            field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.PM25_EXCEEDS_PCTILE_THRESHOLD,
+        ],
+    )
+    transportation_thresholds = ThresholdTestConfig(
+        "transportation",
+        [
+            field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.DOT_BURDEN_PCTILE_THRESHOLD,
+            field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD,
+        ],
+    )
+    housing_thresholds = ThresholdTestConfig(
+        "housing",
+        [
+            field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
+            field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD,
+            field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD,
+            field_names.HOUSING_BURDEN_PCTILE_THRESHOLD,
+            field_names.NON_NATURAL_PCTILE_THRESHOLD,
+        ],
+    )
+    pollution_thresholds = ThresholdTestConfig(
+        "pollution",
+        [
+            field_names.RMP_PCTILE_THRESHOLD,
+            field_names.NPL_PCTILE_THRESHOLD,
+            field_names.TSDF_PCTILE_THRESHOLD,
+            field_names.AML_BOOLEAN,
+            field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
+        ],
+    )
+    water_thresholds = ThresholdTestConfig(
+        "water",
+        [
+            field_names.WASTEWATER_PCTILE_THRESHOLD,
+            field_names.UST_PCTILE_THRESHOLD,
+        ],
+    )
+    health_thresholds = ThresholdTestConfig(
+        "health",
+        [
+            field_names.DIABETES_PCTILE_THRESHOLD,
+            field_names.ASTHMA_PCTILE_THRESHOLD,
+            field_names.HEART_DISEASE_PCTILE_THRESHOLD,
+            field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD,
+        ],
+    )
+    workforce_base_thresholds = ThresholdTestConfig(
+        "workforce (not island areas)",
+        [
+            field_names.UNEMPLOYMENT_PCTILE_THRESHOLD,
+            field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD,
+            field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD,
+            field_names.POVERTY_PCTILE_THRESHOLD,
+        ],
+        ses_column_name=field_names.LOW_HS_EDUCATION_FIELD,
+    )
+    errors = []
+    for threshhold_config in [
+        climate_thresholds,
+        energy_thresholds,
+        transportation_thresholds,
+        housing_thresholds,
+        pollution_thresholds,
+        water_thresholds,
+        health_thresholds,
+        workforce_base_thresholds,
+    ]:
+        errors.extend(
+            check_for_threshhold_errors(final_score_df, threshhold_config)
+        )
+    error_text = "\n".join(errors)
+    assert not errors, error_text
+
+
+def test_max_40_percent_DAC(final_score_df):
+    score_col_with_donuts = field_names.FINAL_SCORE_N_BOOLEAN
+    total_population_col = field_names.TOTAL_POP_FIELD
+    assert (
+        final_score_df[score_col_with_donuts].isna().sum() == 0
+    ), f"Error: {score_col_with_donuts} contains NULLs"
+    assert (
+        final_score_df[final_score_df[score_col_with_donuts]][
+            total_population_col
+        ].sum()
+        / final_score_df[total_population_col].sum()
+    ) < 0.4, "Error: the scoring methodology identifies >40% of people in  the US as disadvantaged"
+    assert (
+        final_score_df[score_col_with_donuts].sum() > 0
+    ), "FYI: You've identified no tracts at all!"
+
+
+def test_donut_hole_addition_to_score_n(final_score_df):
+    score_col_with_donuts = field_names.FINAL_SCORE_N_BOOLEAN
+    score_col = field_names.SCORE_N_COMMUNITIES
+    donut_hole_score_only = (
+        field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX
+    )
+    count_donuts = final_score_df[donut_hole_score_only].sum()
+    count_n = final_score_df[score_col].sum()
+    count_n_with_donuts = final_score_df[score_col_with_donuts].sum()
+    new_donuts = final_score_df[
+        final_score_df[donut_hole_score_only] & ~final_score_df[score_col]
+    ].shape[0]
+
+    assert (
+        new_donuts + count_n == count_n_with_donuts
+    ), "The math doesn't work! The number of new donut hole tracts plus score tracts (base) does not equal the total number of tracts identified"
+
+    assert (
+        count_donuts < count_n
+    ), "There are more donut hole tracts than base tracts. How can it be?"
+
+    assert (
+        new_donuts > 0
+    ), "FYI: The adjacency index is doing nothing. Consider removing it?"