fixing tests

This commit is contained in:
lucasmbrown-usds 2022-09-09 12:20:03 -04:00
parent e325f3e28e
commit 3f98206e6b
8 changed files with 46 additions and 27 deletions

View file

@ -85,8 +85,12 @@ class ExtractTransformLoad:
# NULL_REPRESENTATION is how nulls are represented on the input field # NULL_REPRESENTATION is how nulls are represented on the input field
NULL_REPRESENTATION: str = None NULL_REPRESENTATION: str = None
# Whether this ETL contains data for the nation (the US states) # Whether this ETL contains data for the continental nation (DC & the US states
NATION_EXPECTED_IN_DATA: bool = True # except for Alaska and Hawaii)
CONTINENTAL_US_EXPECTED_IN_DATA: bool = True
# Whether this ETL contains data for Alaska and Hawaii
ALASKA_AND_HAWAII_EXPECTED_IN_DATA: bool = True
# Whether this ETL contains data for Puerto Rico # Whether this ETL contains data for Puerto Rico
PUERTO_RICO_EXPECTED_IN_DATA: bool = True PUERTO_RICO_EXPECTED_IN_DATA: bool = True
@ -223,8 +227,6 @@ class ExtractTransformLoad:
""" """
# TODO: remove this once all ETL classes are converted to using the new # TODO: remove this once all ETL classes are converted to using the new
# base class parameters and patterns. # base class parameters and patterns.
# TODO: determine how to use this currently in the partially refactored world.
# https://github.com/usds/justice40-tool/issues/1891
if self.GEO_LEVEL is None: if self.GEO_LEVEL is None:
logger.info( logger.info(
"Skipping validation step for this class because it does not " "Skipping validation step for this class because it does not "
@ -308,15 +310,17 @@ class ExtractTransformLoad:
) )
# Check whether data contains expected states # Check whether data contains expected states
states_in_output_df = list( states_in_output_df = (
self.output_df[self.GEOID_TRACT_FIELD_NAME] self.output_df[self.GEOID_TRACT_FIELD_NAME]
.astype(str)
.str[0:2] .str[0:2]
.unique() .unique()
.tolist()
) )
compare_to_list_of_expected_state_fips_codes( compare_to_list_of_expected_state_fips_codes(
actual_state_fips_codes=states_in_output_df, actual_state_fips_codes=states_in_output_df,
nation_expected=self.NATION_EXPECTED_IN_DATA, continental_us_expected=self.CONTINENTAL_US_EXPECTED_IN_DATA,
alaska_and_hawaii_expected=self.ALASKA_AND_HAWAII_EXPECTED_IN_DATA,
puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA, puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA,
island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA, island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA,
additional_fips_codes_not_expected=self.EXPECTED_MISSING_STATES, additional_fips_codes_not_expected=self.EXPECTED_MISSING_STATES,

View file

@ -131,9 +131,9 @@ TILES_NATION_THRESHOLD_COUNT = 21
# 60: American Samoa, 66: Guam, 69: N. Mariana Islands, 78: US Virgin Islands # 60: American Samoa, 66: Guam, 69: N. Mariana Islands, 78: US Virgin Islands
TILES_ISLAND_AREA_FIPS_CODES = ["60", "66", "69", "78"] TILES_ISLAND_AREA_FIPS_CODES = ["60", "66", "69", "78"]
TILES_PUERTO_RICO_FIPS_CODE = ["72"] TILES_PUERTO_RICO_FIPS_CODE = ["72"]
TILES_NATION_FIPS_CODE = [ TILES_ALASKA_AND_HAWAII_FIPS_CODE = ["02", "15"]
TILES_CONTINENTAL_US_FIPS_CODE = [
"01", "01",
"02",
"04", "04",
"05", "05",
"06", "06",
@ -143,7 +143,6 @@ TILES_NATION_FIPS_CODE = [
"11", "11",
"12", "12",
"13", "13",
"15",
"16", "16",
"17", "17",
"18", "18",

View file

@ -10,7 +10,8 @@ from data_pipeline.config import settings
from data_pipeline.etl.score.constants import ( from data_pipeline.etl.score.constants import (
TILES_ISLAND_AREA_FIPS_CODES, TILES_ISLAND_AREA_FIPS_CODES,
TILES_PUERTO_RICO_FIPS_CODE, TILES_PUERTO_RICO_FIPS_CODE,
TILES_NATION_FIPS_CODE, TILES_CONTINENTAL_US_FIPS_CODE,
TILES_ALASKA_AND_HAWAII_FIPS_CODE,
) )
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
from data_pipeline.utils import ( from data_pipeline.utils import (
@ -317,7 +318,8 @@ def create_codebook(
# pylint: disable=too-many-arguments # pylint: disable=too-many-arguments
def compare_to_list_of_expected_state_fips_codes( def compare_to_list_of_expected_state_fips_codes(
actual_state_fips_codes: typing.List[str], actual_state_fips_codes: typing.List[str],
nation_expected: bool = True, continental_us_expected: bool = True,
alaska_and_hawaii_expected: bool = True,
puerto_rico_expected: bool = True, puerto_rico_expected: bool = True,
island_areas_expected: bool = True, island_areas_expected: bool = True,
additional_fips_codes_not_expected: typing.List[str] = None, additional_fips_codes_not_expected: typing.List[str] = None,
@ -327,8 +329,10 @@ def compare_to_list_of_expected_state_fips_codes(
Args: Args:
actual_state_fips_codes (List of str): Actual state codes observed in data actual_state_fips_codes (List of str): Actual state codes observed in data
nation_expected (bool, optional): Do you expect the nation (DC & states) to be continental_us_expected (bool, optional): Do you expect the continental nation
represented in data? (DC & states except for Alaska and Hawaii) to be represented in data?
alaska_and_hawaii_expected (bool, optional): Do you expect Alaska and Hawaii
to be represented in the data?
puerto_rico_expected (bool, optional): Do you expect PR to be represented in data? puerto_rico_expected (bool, optional): Do you expect PR to be represented in data?
island_areas_expected (bool, optional): Do you expect Island Areas to be represented in island_areas_expected (bool, optional): Do you expect Island Areas to be represented in
data? data?
@ -354,11 +358,19 @@ def compare_to_list_of_expected_state_fips_codes(
# Start with the list of all FIPS codes for all states and territories. # Start with the list of all FIPS codes for all states and territories.
expected_states_set = set(get_state_fips_codes(settings.DATA_PATH)) expected_states_set = set(get_state_fips_codes(settings.DATA_PATH))
# If nation (states and DC) are not expected to be included, remove it from the # If continental US is not expected to be included, remove it from the
# expected # expected states set.
# states set. if not continental_us_expected:
if not nation_expected: expected_states_set = expected_states_set - set(
expected_states_set = expected_states_set - set(TILES_NATION_FIPS_CODE) TILES_CONTINENTAL_US_FIPS_CODE
)
# If Alaska and Hawaii are not expected to be included, remove them from the
# expected states set.
if not continental_us_expected:
expected_states_set = expected_states_set - set(
TILES_ALASKA_AND_HAWAII_FIPS_CODE
)
# If Puerto Rico is not expected to be included, remove it from the expected # If Puerto Rico is not expected to be included, remove it from the expected
# states set. # states set.

View file

@ -225,5 +225,5 @@ def test_compare_to_list_of_expected_state_fips_codes():
# Should not raise error because Nation is not to be missing # Should not raise error because Nation is not to be missing
compare_to_list_of_expected_state_fips_codes( compare_to_list_of_expected_state_fips_codes(
actual_state_fips_codes=fips_codes_test_4, nation_expected=False actual_state_fips_codes=fips_codes_test_4, continental_us_expected=False
) )

View file

@ -81,7 +81,7 @@ class CDCLifeExpectancy(ExtractTransformLoad):
# Expect that PR, Island Areas, and Maine/Wisconsin are missing # Expect that PR, Island Areas, and Maine/Wisconsin are missing
compare_to_list_of_expected_state_fips_codes( compare_to_list_of_expected_state_fips_codes(
actual_state_fips_codes=states_in_life_expectancy_usa_file, actual_state_fips_codes=states_in_life_expectancy_usa_file,
nation_expected=self.NATION_EXPECTED_IN_DATA, continental_us_expected=self.CONTINENTAL_US_EXPECTED_IN_DATA,
puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA, puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA,
island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA, island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA,
additional_fips_codes_not_expected=self.STATES_MISSING_FROM_USA_FILE, additional_fips_codes_not_expected=self.STATES_MISSING_FROM_USA_FILE,
@ -117,7 +117,7 @@ class CDCLifeExpectancy(ExtractTransformLoad):
# Expect that PR and Island Areas are the only things now missing # Expect that PR and Island Areas are the only things now missing
compare_to_list_of_expected_state_fips_codes( compare_to_list_of_expected_state_fips_codes(
actual_state_fips_codes=states_in_combined_df, actual_state_fips_codes=states_in_combined_df,
nation_expected=self.NATION_EXPECTED_IN_DATA, continental_us_expected=self.CONTINENTAL_US_EXPECTED_IN_DATA,
puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA, puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA,
island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA, island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA,
additional_fips_codes_not_expected=[], additional_fips_codes_not_expected=[],

View file

@ -17,9 +17,7 @@ class WildfireRiskETL(ExtractTransformLoad):
SOURCE_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/fsf_fire.zip" SOURCE_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/fsf_fire.zip"
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
PUERTO_RICO_EXPECTED_IN_DATA = False PUERTO_RICO_EXPECTED_IN_DATA = False
ALASKA_AND_HAWAII_EXPECTED_IN_DATA = False
# Alaska and Hawaii are missing
EXPECTED_MISSING_STATES = ["02", "15"]
# Output score variables (values set on datasets.yml) for linting purposes # Output score variables (values set on datasets.yml) for linting purposes
COUNT_PROPERTIES: str COUNT_PROPERTIES: str

View file

@ -20,6 +20,7 @@ class NatureDeprivedETL(ExtractTransformLoad):
) )
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
PUERTO_RICO_EXPECTED_IN_DATA = False PUERTO_RICO_EXPECTED_IN_DATA = False
ALASKA_AND_HAWAII_EXPECTED_IN_DATA = False
# Alaska and Hawaii are missing # Alaska and Hawaii are missing
EXPECTED_MISSING_STATES = ["02", "15"] EXPECTED_MISSING_STATES = ["02", "15"]

View file

@ -11,7 +11,10 @@ import numpy as np
import pandas as pd import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.score.constants import TILES_NATION_FIPS_CODE from data_pipeline.etl.score.constants import (
TILES_CONTINENTAL_US_FIPS_CODE,
TILES_ALASKA_AND_HAWAII_FIPS_CODE,
)
from data_pipeline.tests.sources.example.etl import ExampleETL from data_pipeline.tests.sources.example.etl import ExampleETL
from data_pipeline.utils import get_module_logger from data_pipeline.utils import get_module_logger
@ -97,11 +100,13 @@ class TestETL:
# Set values to match test fixtures # Set values to match test fixtures
etl_class.EXPECTED_MISSING_STATES = [ etl_class.EXPECTED_MISSING_STATES = [
x x
for x in TILES_NATION_FIPS_CODE for x in TILES_CONTINENTAL_US_FIPS_CODE
+ TILES_ALASKA_AND_HAWAII_FIPS_CODE
if x not in states_expected_from_fixtures if x not in states_expected_from_fixtures
] ]
etl_class.PUERTO_RICO_EXPECTED_IN_DATA = False etl_class.PUERTO_RICO_EXPECTED_IN_DATA = False
etl_class.ISLAND_AREAS_EXPECTED_IN_DATA = False etl_class.ISLAND_AREAS_EXPECTED_IN_DATA = False
etl_class.ALASKA_AND_HAWAII_EXPECTED_IN_DATA = True
return etl_class return etl_class