mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-30 14:41:18 -07:00
Add tests for all non-census sources (#1899)
* Refactor CDC life-expectancy (1554) * Update to new tract list (#1554) * Adjust for tests (#1848) * Add tests for cdc_places (#1848) * Add EJScreen tests (#1848) * Add tests for HUD housing (#1848) * Add tests for GeoCorr (#1848) * Add persistent poverty tests (#1848) * Update for sources without zips, for new validation (#1848) * Update tests for new multi-CSV but (#1848) Lucas updated the CDC life expectancy data to handle a bug where two states are missing from the US Overall download. Since virtually none of our other ETL classes download multiple CSVs directly like this, it required a pretty invasive new mocking strategy. * Add basic tests for nature deprived (#1848) * Add wildfire tests (#1848) * Add flood risk tests (#1848) * Add DOT travel tests (#1848) * Add historic redlining tests (#1848) * Add tests for ME and WI (#1848) * Update now that validation exists (#1848) * Adjust for validation (#1848) * Add health insurance back to cdc places (#1848) Ooops * Update tests with new field (#1848) * Test for blank tract removal (#1848) * Add tracts for clipping behavior * Test clipping and zfill behavior (#1848) * Fix bad test assumption (#1848) * Simplify class, add test for tract padding (#1848) * Fix percentage inversion, update tests (#1848) Looking through the transformations, I noticed that we were subtracting a percentage that is usually between 0-100 from 1 instead of 100, and so were endind up with some surprising results. Confirmed with lucasmbrown-usds * Add note about first street data (#1848)
This commit is contained in:
parent
4d02525bb3
commit
876655d2b2
88 changed files with 2032 additions and 178 deletions
|
@ -0,0 +1,2 @@
|
|||
"Tract ID","STATE2KX","CNTY2KX","TRACT2KX","e(0)","se(e(0))","Abridged life table flag"
|
||||
23001010100,23,001,010100,72.3,2.2928,3
|
|
|
@ -0,0 +1,16 @@
|
|||
Tract ID,STATE2KX,CNTY2KX,TRACT2KX,e(0),se(e(0)),Abridged life table flag
|
||||
15001021010,15,001,021010,77.4,1.6548,2
|
||||
15001021101,15,001,021101,82.5,3.9086,3
|
||||
15001021402,15,001,021402,80.4,1.093,2
|
||||
15001021800,15,001,021800,79.5,1.132,2
|
||||
15003010201,15,003,010201,79.4,1.5261,3
|
||||
15007040603,15,007,040603,86.3,2.2285,3
|
||||
15007040604,15,007,040604,84.9,2.1995,3
|
||||
15007040700,15,007,040700,80.4,0.7571,2
|
||||
15009030100,15,009,030100,77.2,1.8736,3
|
||||
15009030402,15,009,030402,83.5,1.8267,3
|
||||
15009030800,15,009,030800,82.2,1.6251,3
|
||||
06027000800,06,007,040500,99.1,3.1415,3
|
||||
06069000802,06,001,020100,99.1,3.1415,3
|
||||
06061021322,06,007,040300,99.1,3.1415,3
|
||||
15009030201,15,009,030201,99.1,3.1415,3
|
|
|
@ -0,0 +1,2 @@
|
|||
"Tract ID","STATE2KX","CNTY2KX","TRACT2KX","e(0)","se(e(0))","Abridged life table flag"
|
||||
55001950201,55,001,950201,74.5,2.5471,3
|
|
|
@ -0,0 +1,16 @@
|
|||
Tract ID,STATE2KX,CNTY2KX,TRACT2KX,e(0),se(e(0)),Abridged life table flag
|
||||
15001021010,15,1,21010,77.4000000000,1.6548000000,2
|
||||
15001021101,15,1,21101,82.5000000000,3.9086000000,3
|
||||
15001021402,15,1,21402,80.4000000000,1.0930000000,2
|
||||
15001021800,15,1,21800,79.5000000000,1.1320000000,2
|
||||
15003010201,15,3,10201,79.4000000000,1.5261000000,3
|
||||
15007040603,15,7,40603,86.3000000000,2.2285000000,3
|
||||
15007040604,15,7,40604,84.9000000000,2.1995000000,3
|
||||
15007040700,15,7,40700,80.4000000000,0.7571000000,2
|
||||
15009030100,15,9,30100,77.2000000000,1.8736000000,3
|
||||
15009030402,15,9,30402,83.5000000000,1.8267000000,3
|
||||
15009030800,15,9,30800,82.2000000000,1.6251000000,3
|
||||
6027000800,6,7,40500,99.1000000000,3.1415000000,3
|
||||
6069000802,6,1,20100,99.1000000000,3.1415000000,3
|
||||
6061021322,6,7,40300,99.1000000000,3.1415000000,3
|
||||
15009030201,15,9,30201,99.1000000000,3.1415000000,3
|
|
|
@ -0,0 +1,18 @@
|
|||
GEOID10_TRACT,Life expectancy (years)
|
||||
15001021010,77.4000000000
|
||||
15001021101,82.5000000000
|
||||
15001021402,80.4000000000
|
||||
15001021800,79.5000000000
|
||||
15003010201,79.4000000000
|
||||
15007040603,86.3000000000
|
||||
15007040604,84.9000000000
|
||||
15007040700,80.4000000000
|
||||
15009030100,77.2000000000
|
||||
15009030402,83.5000000000
|
||||
15009030800,82.2000000000
|
||||
06027000800,99.1000000000
|
||||
06069000802,99.1000000000
|
||||
06061021322,99.1000000000
|
||||
15009030201,99.1000000000
|
||||
23001010100,72.3000000000
|
||||
55001950201,74.5000000000
|
|
|
@ -0,0 +1,18 @@
|
|||
GEOID10_TRACT,STATE2KX,CNTY2KX,TRACT2KX,Life expectancy (years),se(e(0)),Abridged life table flag
|
||||
15001021010,15,1,21010,77.4000000000,1.6548000000,2
|
||||
15001021101,15,1,21101,82.5000000000,3.9086000000,3
|
||||
15001021402,15,1,21402,80.4000000000,1.0930000000,2
|
||||
15001021800,15,1,21800,79.5000000000,1.1320000000,2
|
||||
15003010201,15,3,10201,79.4000000000,1.5261000000,3
|
||||
15007040603,15,7,40603,86.3000000000,2.2285000000,3
|
||||
15007040604,15,7,40604,84.9000000000,2.1995000000,3
|
||||
15007040700,15,7,40700,80.4000000000,0.7571000000,2
|
||||
15009030100,15,9,30100,77.2000000000,1.8736000000,3
|
||||
15009030402,15,9,30402,83.5000000000,1.8267000000,3
|
||||
15009030800,15,9,30800,82.2000000000,1.6251000000,3
|
||||
06027000800,06,7,40500,99.1000000000,3.1415000000,3
|
||||
06069000802,06,1,20100,99.1000000000,3.1415000000,3
|
||||
06061021322,06,7,40300,99.1000000000,3.1415000000,3
|
||||
15009030201,15,9,30201,99.1000000000,3.1415000000,3
|
||||
23001010100,23,1,10100,72.3000000000,2.2928000000,3
|
||||
55001950201,55,1,950201,74.5000000000,2.5471000000,3
|
|
|
@ -0,0 +1,112 @@
|
|||
# pylint: disable=protected-access
|
||||
import pathlib
|
||||
from unittest import mock
|
||||
import requests
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.etl.sources.cdc_life_expectancy.etl import CDCLifeExpectancy
|
||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class TestCDCLifeExpectency(TestETL):
|
||||
"""Tests the CDC Life Expectancy ETL.
|
||||
|
||||
This uses pytest-snapshot.
|
||||
To update individual snapshots: $ poetry run pytest
|
||||
data_pipeline/tests/sources/cdc_life_expectancy/test_etl.py::TestClassNameETL::<testname>
|
||||
--snapshot-update
|
||||
"""
|
||||
|
||||
_ETL_CLASS = CDCLifeExpectancy
|
||||
|
||||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||
_SAMPLE_DATA_FILE_NAME = "US_A.CSV"
|
||||
_SAMPLE_DATA_ZIP_FILE_NAME = None
|
||||
_EXTRACT_TMP_FOLDER_NAME = "CDCLifeExpectanc"
|
||||
_EXTRACT_CSV_FILE_NAME = "extract.csv"
|
||||
_FIXTURES_SHARED_TRACT_IDS = TestETL._FIXTURES_SHARED_TRACT_IDS + [
|
||||
"55001950201", # WI
|
||||
"23001010100", # ME
|
||||
]
|
||||
|
||||
def setup_method(self, _method, filename=__file__):
|
||||
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||
|
||||
This code can be copied identically between all child classes.
|
||||
"""
|
||||
super().setup_method(_method=_method, filename=filename)
|
||||
|
||||
def _setup_etl_instance_and_run_extract(
|
||||
self, mock_etl, mock_paths
|
||||
) -> ExtractTransformLoad:
|
||||
"""Method to setup an ETL instance with proper upstream mocks to run extract.
|
||||
This must be re-implemented in every child class.
|
||||
|
||||
This method can be used by multiple tests that need to run the same fixtures
|
||||
that need these same mocks.
|
||||
|
||||
In order to re-implement this method, usually it will involve a
|
||||
decent amount of work to monkeypatch `requests` or another method that's
|
||||
used to retrieve data in order to force that method to retrieve the fixture
|
||||
data. A basic version of that patching is included here for classes that can use it.
|
||||
"""
|
||||
|
||||
with mock.patch(
|
||||
"data_pipeline.utils.requests"
|
||||
) as requests_mock, mock.patch(
|
||||
"data_pipeline.etl.score.etl_utils.get_state_fips_codes"
|
||||
) as mock_get_state_fips_codes:
|
||||
tmp_path = mock_paths[1]
|
||||
|
||||
def fake_get(url, *args, **kwargs):
|
||||
file_path = url.split("/")[-1]
|
||||
with open(
|
||||
self._DATA_DIRECTORY_FOR_TEST / file_path,
|
||||
"rb",
|
||||
) as file:
|
||||
file_contents = file.read()
|
||||
|
||||
response_mock = requests.Response()
|
||||
response_mock.status_code = 200
|
||||
# pylint: disable=protected-access
|
||||
# Return text fixture:
|
||||
response_mock._content = file_contents
|
||||
return response_mock
|
||||
|
||||
requests_mock.get = fake_get
|
||||
mock_get_state_fips_codes.return_value = [
|
||||
x[0:2] for x in self._FIXTURES_SHARED_TRACT_IDS
|
||||
]
|
||||
# Instantiate the ETL class.
|
||||
etl = self._get_instance_of_etl_class()
|
||||
|
||||
# Monkey-patch the temporary directory to the one used in the test
|
||||
etl.TMP_PATH = tmp_path
|
||||
|
||||
# Run the extract method.
|
||||
etl.extract()
|
||||
return etl
|
||||
|
||||
def test_init(self, mock_etl, mock_paths):
|
||||
etl = self._ETL_CLASS()
|
||||
data_path, _ = mock_paths
|
||||
assert etl.DATA_PATH == data_path
|
||||
assert etl.COLUMNS_TO_KEEP == [
|
||||
"GEOID10_TRACT",
|
||||
"Life expectancy (years)",
|
||||
]
|
||||
assert etl.INPUT_GEOID_TRACT_FIELD_NAME == "Tract ID"
|
||||
assert etl.LIFE_EXPECTANCY_FIELD_NAME == "Life expectancy (years)"
|
||||
|
||||
def test_get_output_file_path(self, mock_etl, mock_paths):
|
||||
"""Tests the right file name is returned."""
|
||||
etl = self._ETL_CLASS()
|
||||
data_path, tmp_path = mock_paths
|
||||
|
||||
output_file_path = etl._get_output_file_path()
|
||||
expected_output_file_path = (
|
||||
data_path / "dataset" / "cdc_life_expectancy" / "usa.csv"
|
||||
)
|
||||
assert output_file_path == expected_output_file_path
|
Loading…
Add table
Add a link
Reference in a new issue