Issue 1910: Do not impute income for 0 population tracts (#1918)

* should be working, has unnecessary loggers * removing loggers and cleaning up * updating ejscreen tests * adding tests and responding to PR feedback * fixing broken smoke test * delete smoketest docs
2025-09-30 16:13:18 -07:00 · 2022-09-26 11:00:21 -04:00 · 2022-09-26 11:00:21 -04:00 · 9fb9874a15
commit 9fb9874a15
parent 9e85375d9b
13 changed files with 150 additions and 75 deletions
--- a/data/data-pipeline/data_pipeline/tests/score/fixtures.py
+++ b/data/data-pipeline/data_pipeline/tests/score/fixtures.py
@ -15,10 +15,10 @@ def final_score_df():


@pytest.fixture()
-def census_df():
-    census_csv = constants.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
+def census_acs_df():
+    census_acs_csv = constants.DATA_PATH / "dataset" / "census_acs" / "usa.csv"
    return pd.read_csv(
-        census_csv,
+        census_acs_csv,
        dtype={GEOID_TRACT_FIELD: "string"},
        low_memory=False,
    )
--- a/data/data-pipeline/data_pipeline/tests/score/test_output.py
+++ b/data/data-pipeline/data_pipeline/tests/score/test_output.py
@ -11,7 +11,7 @@ from .fixtures import (
    final_score_df,
    ejscreen_df,
    hud_housing_df,
-    census_df,
+    census_acs_df,
    cdc_places_df,
    census_acs_median_incomes_df,
    cdc_life_expectancy_df,
@ -235,7 +235,7 @@ def test_data_sources(
    final_score_df,
    hud_housing_df,
    ejscreen_df,
-    census_df,
+    census_acs_df,
    cdc_places_df,
    census_acs_median_incomes_df,
    cdc_life_expectancy_df,
@ -337,3 +337,41 @@ def test_output_tracts(final_score_df, national_tract_df):

 def test_all_tracts_have_scores(final_score_df):
    assert not final_score_df[field_names.SCORE_N_COMMUNITIES].isna().any()
+
+
+def test_imputed_tracts(final_score_df):
+    # Make sure that any tracts with zero population have null imputed income
+    tracts_with_zero_population_df = final_score_df[
+        final_score_df[field_names.TOTAL_POP_FIELD] == 0
+    ]
+    assert (
+        tracts_with_zero_population_df[
+            field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
+        ]
+        .isna()
+        .all()
+    )
+
+    # Make sure that any tracts with null population have null imputed income
+    tracts_with_null_population_df = final_score_df[
+        final_score_df[field_names.TOTAL_POP_FIELD].isnull()
+    ]
+    assert (
+        tracts_with_null_population_df[
+            field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
+        ]
+        .isna()
+        .all()
+    )
+
+    # Make sure that no tracts with population have null imputed income
+    tracts_with_some_population_df = final_score_df[
+        final_score_df[field_names.TOTAL_POP_FIELD] > 0
+    ]
+    assert (
+        not tracts_with_some_population_df[
+            field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
+        ]
+        .isna()
+        .any()
+    )