mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
fixing tests
This commit is contained in:
parent
e325f3e28e
commit
3f98206e6b
8 changed files with 46 additions and 27 deletions
|
@ -85,8 +85,12 @@ class ExtractTransformLoad:
|
||||||
# NULL_REPRESENTATION is how nulls are represented on the input field
|
# NULL_REPRESENTATION is how nulls are represented on the input field
|
||||||
NULL_REPRESENTATION: str = None
|
NULL_REPRESENTATION: str = None
|
||||||
|
|
||||||
# Whether this ETL contains data for the nation (the US states)
|
# Whether this ETL contains data for the continental nation (DC & the US states
|
||||||
NATION_EXPECTED_IN_DATA: bool = True
|
# except for Alaska and Hawaii)
|
||||||
|
CONTINENTAL_US_EXPECTED_IN_DATA: bool = True
|
||||||
|
|
||||||
|
# Whether this ETL contains data for Alaska and Hawaii
|
||||||
|
ALASKA_AND_HAWAII_EXPECTED_IN_DATA: bool = True
|
||||||
|
|
||||||
# Whether this ETL contains data for Puerto Rico
|
# Whether this ETL contains data for Puerto Rico
|
||||||
PUERTO_RICO_EXPECTED_IN_DATA: bool = True
|
PUERTO_RICO_EXPECTED_IN_DATA: bool = True
|
||||||
|
@ -223,8 +227,6 @@ class ExtractTransformLoad:
|
||||||
"""
|
"""
|
||||||
# TODO: remove this once all ETL classes are converted to using the new
|
# TODO: remove this once all ETL classes are converted to using the new
|
||||||
# base class parameters and patterns.
|
# base class parameters and patterns.
|
||||||
# TODO: determine how to use this currently in the partially refactored world.
|
|
||||||
# https://github.com/usds/justice40-tool/issues/1891
|
|
||||||
if self.GEO_LEVEL is None:
|
if self.GEO_LEVEL is None:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Skipping validation step for this class because it does not "
|
"Skipping validation step for this class because it does not "
|
||||||
|
@ -308,15 +310,17 @@ class ExtractTransformLoad:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check whether data contains expected states
|
# Check whether data contains expected states
|
||||||
states_in_output_df = list(
|
states_in_output_df = (
|
||||||
self.output_df[self.GEOID_TRACT_FIELD_NAME]
|
self.output_df[self.GEOID_TRACT_FIELD_NAME]
|
||||||
.astype(str)
|
|
||||||
.str[0:2]
|
.str[0:2]
|
||||||
.unique()
|
.unique()
|
||||||
|
.tolist()
|
||||||
)
|
)
|
||||||
|
|
||||||
compare_to_list_of_expected_state_fips_codes(
|
compare_to_list_of_expected_state_fips_codes(
|
||||||
actual_state_fips_codes=states_in_output_df,
|
actual_state_fips_codes=states_in_output_df,
|
||||||
nation_expected=self.NATION_EXPECTED_IN_DATA,
|
continental_us_expected=self.CONTINENTAL_US_EXPECTED_IN_DATA,
|
||||||
|
alaska_and_hawaii_expected=self.ALASKA_AND_HAWAII_EXPECTED_IN_DATA,
|
||||||
puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA,
|
puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA,
|
||||||
island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA,
|
island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA,
|
||||||
additional_fips_codes_not_expected=self.EXPECTED_MISSING_STATES,
|
additional_fips_codes_not_expected=self.EXPECTED_MISSING_STATES,
|
||||||
|
|
|
@ -131,9 +131,9 @@ TILES_NATION_THRESHOLD_COUNT = 21
|
||||||
# 60: American Samoa, 66: Guam, 69: N. Mariana Islands, 78: US Virgin Islands
|
# 60: American Samoa, 66: Guam, 69: N. Mariana Islands, 78: US Virgin Islands
|
||||||
TILES_ISLAND_AREA_FIPS_CODES = ["60", "66", "69", "78"]
|
TILES_ISLAND_AREA_FIPS_CODES = ["60", "66", "69", "78"]
|
||||||
TILES_PUERTO_RICO_FIPS_CODE = ["72"]
|
TILES_PUERTO_RICO_FIPS_CODE = ["72"]
|
||||||
TILES_NATION_FIPS_CODE = [
|
TILES_ALASKA_AND_HAWAII_FIPS_CODE = ["02", "15"]
|
||||||
|
TILES_CONTINENTAL_US_FIPS_CODE = [
|
||||||
"01",
|
"01",
|
||||||
"02",
|
|
||||||
"04",
|
"04",
|
||||||
"05",
|
"05",
|
||||||
"06",
|
"06",
|
||||||
|
@ -143,7 +143,6 @@ TILES_NATION_FIPS_CODE = [
|
||||||
"11",
|
"11",
|
||||||
"12",
|
"12",
|
||||||
"13",
|
"13",
|
||||||
"15",
|
|
||||||
"16",
|
"16",
|
||||||
"17",
|
"17",
|
||||||
"18",
|
"18",
|
||||||
|
|
|
@ -10,7 +10,8 @@ from data_pipeline.config import settings
|
||||||
from data_pipeline.etl.score.constants import (
|
from data_pipeline.etl.score.constants import (
|
||||||
TILES_ISLAND_AREA_FIPS_CODES,
|
TILES_ISLAND_AREA_FIPS_CODES,
|
||||||
TILES_PUERTO_RICO_FIPS_CODE,
|
TILES_PUERTO_RICO_FIPS_CODE,
|
||||||
TILES_NATION_FIPS_CODE,
|
TILES_CONTINENTAL_US_FIPS_CODE,
|
||||||
|
TILES_ALASKA_AND_HAWAII_FIPS_CODE,
|
||||||
)
|
)
|
||||||
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
|
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
|
||||||
from data_pipeline.utils import (
|
from data_pipeline.utils import (
|
||||||
|
@ -317,7 +318,8 @@ def create_codebook(
|
||||||
# pylint: disable=too-many-arguments
|
# pylint: disable=too-many-arguments
|
||||||
def compare_to_list_of_expected_state_fips_codes(
|
def compare_to_list_of_expected_state_fips_codes(
|
||||||
actual_state_fips_codes: typing.List[str],
|
actual_state_fips_codes: typing.List[str],
|
||||||
nation_expected: bool = True,
|
continental_us_expected: bool = True,
|
||||||
|
alaska_and_hawaii_expected: bool = True,
|
||||||
puerto_rico_expected: bool = True,
|
puerto_rico_expected: bool = True,
|
||||||
island_areas_expected: bool = True,
|
island_areas_expected: bool = True,
|
||||||
additional_fips_codes_not_expected: typing.List[str] = None,
|
additional_fips_codes_not_expected: typing.List[str] = None,
|
||||||
|
@ -327,8 +329,10 @@ def compare_to_list_of_expected_state_fips_codes(
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
actual_state_fips_codes (List of str): Actual state codes observed in data
|
actual_state_fips_codes (List of str): Actual state codes observed in data
|
||||||
nation_expected (bool, optional): Do you expect the nation (DC & states) to be
|
continental_us_expected (bool, optional): Do you expect the continental nation
|
||||||
represented in data?
|
(DC & states except for Alaska and Hawaii) to be represented in data?
|
||||||
|
alaska_and_hawaii_expected (bool, optional): Do you expect Alaska and Hawaii
|
||||||
|
to be represented in the data?
|
||||||
puerto_rico_expected (bool, optional): Do you expect PR to be represented in data?
|
puerto_rico_expected (bool, optional): Do you expect PR to be represented in data?
|
||||||
island_areas_expected (bool, optional): Do you expect Island Areas to be represented in
|
island_areas_expected (bool, optional): Do you expect Island Areas to be represented in
|
||||||
data?
|
data?
|
||||||
|
@ -354,11 +358,19 @@ def compare_to_list_of_expected_state_fips_codes(
|
||||||
# Start with the list of all FIPS codes for all states and territories.
|
# Start with the list of all FIPS codes for all states and territories.
|
||||||
expected_states_set = set(get_state_fips_codes(settings.DATA_PATH))
|
expected_states_set = set(get_state_fips_codes(settings.DATA_PATH))
|
||||||
|
|
||||||
# If nation (states and DC) are not expected to be included, remove it from the
|
# If continental US is not expected to be included, remove it from the
|
||||||
# expected
|
# expected states set.
|
||||||
# states set.
|
if not continental_us_expected:
|
||||||
if not nation_expected:
|
expected_states_set = expected_states_set - set(
|
||||||
expected_states_set = expected_states_set - set(TILES_NATION_FIPS_CODE)
|
TILES_CONTINENTAL_US_FIPS_CODE
|
||||||
|
)
|
||||||
|
|
||||||
|
# If Alaska and Hawaii are not expected to be included, remove them from the
|
||||||
|
# expected states set.
|
||||||
|
if not continental_us_expected:
|
||||||
|
expected_states_set = expected_states_set - set(
|
||||||
|
TILES_ALASKA_AND_HAWAII_FIPS_CODE
|
||||||
|
)
|
||||||
|
|
||||||
# If Puerto Rico is not expected to be included, remove it from the expected
|
# If Puerto Rico is not expected to be included, remove it from the expected
|
||||||
# states set.
|
# states set.
|
||||||
|
|
|
@ -225,5 +225,5 @@ def test_compare_to_list_of_expected_state_fips_codes():
|
||||||
|
|
||||||
# Should not raise error because Nation is not to be missing
|
# Should not raise error because Nation is not to be missing
|
||||||
compare_to_list_of_expected_state_fips_codes(
|
compare_to_list_of_expected_state_fips_codes(
|
||||||
actual_state_fips_codes=fips_codes_test_4, nation_expected=False
|
actual_state_fips_codes=fips_codes_test_4, continental_us_expected=False
|
||||||
)
|
)
|
||||||
|
|
|
@ -81,7 +81,7 @@ class CDCLifeExpectancy(ExtractTransformLoad):
|
||||||
# Expect that PR, Island Areas, and Maine/Wisconsin are missing
|
# Expect that PR, Island Areas, and Maine/Wisconsin are missing
|
||||||
compare_to_list_of_expected_state_fips_codes(
|
compare_to_list_of_expected_state_fips_codes(
|
||||||
actual_state_fips_codes=states_in_life_expectancy_usa_file,
|
actual_state_fips_codes=states_in_life_expectancy_usa_file,
|
||||||
nation_expected=self.NATION_EXPECTED_IN_DATA,
|
continental_us_expected=self.CONTINENTAL_US_EXPECTED_IN_DATA,
|
||||||
puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA,
|
puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA,
|
||||||
island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA,
|
island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA,
|
||||||
additional_fips_codes_not_expected=self.STATES_MISSING_FROM_USA_FILE,
|
additional_fips_codes_not_expected=self.STATES_MISSING_FROM_USA_FILE,
|
||||||
|
@ -117,7 +117,7 @@ class CDCLifeExpectancy(ExtractTransformLoad):
|
||||||
# Expect that PR and Island Areas are the only things now missing
|
# Expect that PR and Island Areas are the only things now missing
|
||||||
compare_to_list_of_expected_state_fips_codes(
|
compare_to_list_of_expected_state_fips_codes(
|
||||||
actual_state_fips_codes=states_in_combined_df,
|
actual_state_fips_codes=states_in_combined_df,
|
||||||
nation_expected=self.NATION_EXPECTED_IN_DATA,
|
continental_us_expected=self.CONTINENTAL_US_EXPECTED_IN_DATA,
|
||||||
puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA,
|
puerto_rico_expected=self.PUERTO_RICO_EXPECTED_IN_DATA,
|
||||||
island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA,
|
island_areas_expected=self.ISLAND_AREAS_EXPECTED_IN_DATA,
|
||||||
additional_fips_codes_not_expected=[],
|
additional_fips_codes_not_expected=[],
|
||||||
|
|
|
@ -17,9 +17,7 @@ class WildfireRiskETL(ExtractTransformLoad):
|
||||||
SOURCE_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/fsf_fire.zip"
|
SOURCE_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/fsf_fire.zip"
|
||||||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||||
PUERTO_RICO_EXPECTED_IN_DATA = False
|
PUERTO_RICO_EXPECTED_IN_DATA = False
|
||||||
|
ALASKA_AND_HAWAII_EXPECTED_IN_DATA = False
|
||||||
# Alaska and Hawaii are missing
|
|
||||||
EXPECTED_MISSING_STATES = ["02", "15"]
|
|
||||||
|
|
||||||
# Output score variables (values set on datasets.yml) for linting purposes
|
# Output score variables (values set on datasets.yml) for linting purposes
|
||||||
COUNT_PROPERTIES: str
|
COUNT_PROPERTIES: str
|
||||||
|
|
|
@ -20,6 +20,7 @@ class NatureDeprivedETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||||
PUERTO_RICO_EXPECTED_IN_DATA = False
|
PUERTO_RICO_EXPECTED_IN_DATA = False
|
||||||
|
ALASKA_AND_HAWAII_EXPECTED_IN_DATA = False
|
||||||
|
|
||||||
# Alaska and Hawaii are missing
|
# Alaska and Hawaii are missing
|
||||||
EXPECTED_MISSING_STATES = ["02", "15"]
|
EXPECTED_MISSING_STATES = ["02", "15"]
|
||||||
|
|
|
@ -11,7 +11,10 @@ import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
|
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
|
||||||
from data_pipeline.etl.score.constants import TILES_NATION_FIPS_CODE
|
from data_pipeline.etl.score.constants import (
|
||||||
|
TILES_CONTINENTAL_US_FIPS_CODE,
|
||||||
|
TILES_ALASKA_AND_HAWAII_FIPS_CODE,
|
||||||
|
)
|
||||||
from data_pipeline.tests.sources.example.etl import ExampleETL
|
from data_pipeline.tests.sources.example.etl import ExampleETL
|
||||||
from data_pipeline.utils import get_module_logger
|
from data_pipeline.utils import get_module_logger
|
||||||
|
|
||||||
|
@ -97,11 +100,13 @@ class TestETL:
|
||||||
# Set values to match test fixtures
|
# Set values to match test fixtures
|
||||||
etl_class.EXPECTED_MISSING_STATES = [
|
etl_class.EXPECTED_MISSING_STATES = [
|
||||||
x
|
x
|
||||||
for x in TILES_NATION_FIPS_CODE
|
for x in TILES_CONTINENTAL_US_FIPS_CODE
|
||||||
|
+ TILES_ALASKA_AND_HAWAII_FIPS_CODE
|
||||||
if x not in states_expected_from_fixtures
|
if x not in states_expected_from_fixtures
|
||||||
]
|
]
|
||||||
etl_class.PUERTO_RICO_EXPECTED_IN_DATA = False
|
etl_class.PUERTO_RICO_EXPECTED_IN_DATA = False
|
||||||
etl_class.ISLAND_AREAS_EXPECTED_IN_DATA = False
|
etl_class.ISLAND_AREAS_EXPECTED_IN_DATA = False
|
||||||
|
etl_class.ALASKA_AND_HAWAII_EXPECTED_IN_DATA = True
|
||||||
|
|
||||||
return etl_class
|
return etl_class
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue