Issue 1910: Do not impute income for 0 population tracts (#1918)

* should be working, has unnecessary loggers

* removing loggers and cleaning up

* updating ejscreen tests

* adding tests and responding to PR feedback

* fixing broken smoke test

* delete smoketest docs
This commit is contained in:
Lucas Merrill Brown 2022-09-26 11:00:21 -04:00 committed by GitHub
commit 9fb9874a15
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 150 additions and 75 deletions

View file

@ -15,10 +15,10 @@ def final_score_df():
@pytest.fixture()
def census_df():
census_csv = constants.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
def census_acs_df():
census_acs_csv = constants.DATA_PATH / "dataset" / "census_acs" / "usa.csv"
return pd.read_csv(
census_csv,
census_acs_csv,
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

View file

@ -11,7 +11,7 @@ from .fixtures import (
final_score_df,
ejscreen_df,
hud_housing_df,
census_df,
census_acs_df,
cdc_places_df,
census_acs_median_incomes_df,
cdc_life_expectancy_df,
@ -235,7 +235,7 @@ def test_data_sources(
final_score_df,
hud_housing_df,
ejscreen_df,
census_df,
census_acs_df,
cdc_places_df,
census_acs_median_incomes_df,
cdc_life_expectancy_df,
@ -337,3 +337,41 @@ def test_output_tracts(final_score_df, national_tract_df):
def test_all_tracts_have_scores(final_score_df):
assert not final_score_df[field_names.SCORE_N_COMMUNITIES].isna().any()
def test_imputed_tracts(final_score_df):
# Make sure that any tracts with zero population have null imputed income
tracts_with_zero_population_df = final_score_df[
final_score_df[field_names.TOTAL_POP_FIELD] == 0
]
assert (
tracts_with_zero_population_df[
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
]
.isna()
.all()
)
# Make sure that any tracts with null population have null imputed income
tracts_with_null_population_df = final_score_df[
final_score_df[field_names.TOTAL_POP_FIELD].isnull()
]
assert (
tracts_with_null_population_df[
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
]
.isna()
.all()
)
# Make sure that no tracts with population have null imputed income
tracts_with_some_population_df = final_score_df[
final_score_df[field_names.TOTAL_POP_FIELD] > 0
]
assert (
not tracts_with_some_population_df[
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
]
.isna()
.any()
)