mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-23 16:20:18 -07:00
Add abandoned mine lands data (#1824)
* Add notebook to generate test data (#1780) * Add Abandoned Mine Land data (#1780) Using a similar structure but simpler apporach compared to FUDs, add an indicator for whether a tract has an abandonded mine. * Adding some detail to dataset readmes Just a thought! * Apply feedback from revieiw (#1780) * Fixup bad string that broke test (#1780) * Update a string that I should have renamed (#1780) * Reduce number of threads to reduce memory pressure (#1780) * Try not running geo data (#1780) * Run the high-memory sets separately (#1780) * Actually deduplicate (#1780) * Add flag for memory intensive ETLs (#1780) * Document new flag for datasets (#1780) * Add flag for new datasets fro rebase (#1780) Co-authored-by: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com>
This commit is contained in:
parent
5e378aea81
commit
49623e4da0
13 changed files with 2815 additions and 1 deletions
Binary file not shown.
|
@ -0,0 +1,16 @@
|
|||
AMLIS Key State/Tribe County Congressional District Quadrangle Name Watershed HUC Code FIPS Code Latitude Longitude Funding Source / Program Problem Area Name Problem Area Number Planning Unit Name Planning Unit Number Problem Priority Problem Type Mining Type Ore Types Date Prepared Date Revised Private Owner % State Owner % Other Federal Owner % Park Service Owner % Forest Service Owner % Indian Owner % BLM Owner % Unfunded Standard Units Unfunded Costs Unfunded GPRA Acres Unfunded Metric Units Funded Standard Units Funded Costs Funded GPRA Acres Funded Metric Units Completed Standard Units Completed Costs Completed GPRA Acres Completed Metric Units Unnamed: 40
|
||||
CA000001 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.25161281807095 -117.11772856883819 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
CA000002 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.5498780497345 -121.0070599015156 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
CA000003 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 38.84602113669345 -121.40564726784282 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000004 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.49784370888389 -155.10321769858746 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000005 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.446650238354696 -154.89548634140738 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000006 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.904412260968197 -159.43665201302525 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000007 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.94208315793464 -159.52362041178708 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000008 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.72796381691298 -156.14177664396527 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000009 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.86486713282688 -156.2497797752935 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000010 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.516629328900667 -155.91378867633992 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000011 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.164406070883054 -155.81110884967674 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000012 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.825369670478306 -156.33064622489087 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000013 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.9170439162332 -156.54289869319305 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000014 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.556464980367483 -157.89225964427064 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000015 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.90754283544759 -159.48416846823164 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
|
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Is there at least one abandoned mine in this census tract?
|
||||
06027000800,True
|
||||
06069000802,True
|
||||
06061021322,True
|
||||
15001021010,True
|
||||
15001021101,True
|
||||
15007040603,True
|
||||
15007040700,True
|
||||
15009030100,True
|
||||
15009030201,True
|
||||
15001021402,True
|
||||
15001021800,True
|
||||
15009030402,True
|
||||
15009030800,True
|
||||
15003010201,True
|
||||
15007040604,True
|
|
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Is there at least one abandoned mine in this census tract?
|
||||
06027000800,True
|
||||
06069000802,True
|
||||
06061021322,True
|
||||
15001021010,True
|
||||
15001021101,True
|
||||
15007040603,True
|
||||
15007040700,True
|
||||
15009030100,True
|
||||
15009030201,True
|
||||
15001021402,True
|
||||
15001021800,True
|
||||
15009030402,True
|
||||
15009030800,True
|
||||
15003010201,True
|
||||
15007040604,True
|
|
|
@ -0,0 +1,152 @@
|
|||
# pylint: disable=protected-access
|
||||
from unittest import mock
|
||||
import pathlib
|
||||
from data_pipeline.etl.base import ValidGeoLevel
|
||||
|
||||
from data_pipeline.etl.sources.eamlis.etl import (
|
||||
AbandonedMineETL,
|
||||
)
|
||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
def _fake_add_tracts_for_geometries(df):
|
||||
"""The actual geojoin is too slow for tests. Use precomputed results."""
|
||||
lookups = {
|
||||
(-117.1177285688382, 36.25161281807095): "06027000800",
|
||||
(-121.0070599015156, 36.5498780497345): "06069000802",
|
||||
(-121.40564726784282, 38.84602113669345): "06061021322",
|
||||
(-155.10321769858746, 19.49784370888389): "15001021010",
|
||||
(-154.89548634140738, 19.446650238354696): "15001021101",
|
||||
(-159.43665201302525, 21.9044122609682): "15007040603",
|
||||
(-159.52362041178708, 21.94208315793464): "15007040700",
|
||||
(-156.14177664396527, 20.72796381691298): "15009030100",
|
||||
(-156.2497797752935, 20.86486713282688): "15009030201",
|
||||
(-155.91378867633992, 19.516629328900667): "15001021402",
|
||||
(-155.81110884967674, 20.164406070883054): "15001021800",
|
||||
(-156.33064622489087, 20.825369670478302): "15009030402",
|
||||
(-156.54289869319305, 20.9170439162332): "15009030800",
|
||||
(-157.89225964427064, 21.556464980367483): "15003010201",
|
||||
(-159.48416846823164, 21.90754283544759): "15007040604",
|
||||
}
|
||||
df["GEOID10_TRACT"] = df.geometry.apply(
|
||||
lambda point: lookups[(point.x, point.y)]
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
class TestAbandondedLandMineETL(TestETL):
|
||||
"""Tests the Abandoned Mine Dataset ETL
|
||||
|
||||
This uses pytest-snapshot.
|
||||
To update individual snapshots: $ poetry run pytest
|
||||
data_pipeline/tests/sources/eamlis/test_etl.py::TestClassNameETL::<testname>
|
||||
--snapshot-update
|
||||
"""
|
||||
|
||||
_ETL_CLASS = AbandonedMineETL
|
||||
|
||||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||
_SAMPLE_DATA_FILE_NAME = "eAMLIS export of all data.tsv"
|
||||
_SAMPLE_DATA_ZIP_FILE_NAME = "eAMLIS export of all data.tsv.zip"
|
||||
_EXTRACT_TMP_FOLDER_NAME = "AbandonedMineETL"
|
||||
|
||||
def setup_method(self, _method, filename=__file__):
|
||||
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||
|
||||
This code can be copied identically between all child classes.
|
||||
"""
|
||||
super().setup_method(_method=_method, filename=filename)
|
||||
|
||||
def test_init(self, mock_etl, mock_paths):
|
||||
"""Tests that the mock NationalRiskIndexETL class instance was
|
||||
initiliazed correctly.
|
||||
"""
|
||||
# setup
|
||||
etl = self._ETL_CLASS()
|
||||
# validation
|
||||
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
||||
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
||||
assert etl.NAME == "eamlis"
|
||||
assert etl.GEO_LEVEL == ValidGeoLevel.CENSUS_TRACT
|
||||
assert etl.COLUMNS_TO_KEEP == [
|
||||
etl.GEOID_TRACT_FIELD_NAME,
|
||||
etl.AML_BOOLEAN,
|
||||
]
|
||||
|
||||
def test_get_output_file_path(self, mock_etl, mock_paths):
|
||||
"""Tests the right file name is returned."""
|
||||
etl = self._ETL_CLASS()
|
||||
data_path, tmp_path = mock_paths
|
||||
|
||||
output_file_path = etl._get_output_file_path()
|
||||
expected_output_file_path = (
|
||||
data_path / "dataset" / self._ETL_CLASS.NAME / "usa.csv"
|
||||
)
|
||||
assert output_file_path == expected_output_file_path
|
||||
|
||||
def test_fixtures_contain_shared_tract_ids_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_fixtures_contain_shared_tract_ids_base(
|
||||
mock_etl, mock_paths
|
||||
)
|
||||
|
||||
def test_transform_base(self, snapshot, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_transform_base(
|
||||
snapshot=snapshot, mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
|
||||
def test_transform_sets_output_df_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_transform_sets_output_df_base(
|
||||
mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
|
||||
def test_validate_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_validate_base(mock_etl=mock_etl, mock_paths=mock_paths)
|
||||
|
||||
def test_full_etl_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_full_etl_base(mock_etl, mock_paths)
|
||||
|
||||
def test_get_data_frame_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_get_data_frame_base(mock_etl, mock_paths)
|
||||
|
||||
def test_tracts_without_fuds_not_in_results(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
etl = self._setup_etl_instance_and_run_extract(
|
||||
mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
etl.transform()
|
||||
etl.validate()
|
||||
etl.load()
|
||||
df = etl.get_data_frame()
|
||||
assert len(df[etl.GEOID_TRACT_FIELD_NAME]) == len(
|
||||
self._FIXTURES_SHARED_TRACT_IDS
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue