mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Merge branch 'emma-nechamkin/release/score-narwhal' of github.com:usds/justice40-tool into emma-nechamkin/release/score-narwhal
This commit is contained in:
commit
2e05b1d60c
13 changed files with 2815 additions and 1 deletions
|
@ -3,56 +3,67 @@ DATASET_LIST = [
|
|||
"name": "cdc_places",
|
||||
"module_dir": "cdc_places",
|
||||
"class_name": "CDCPlacesETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "national_risk_index",
|
||||
"module_dir": "national_risk_index",
|
||||
"class_name": "NationalRiskIndexETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "travel_composite",
|
||||
"module_dir": "dot_travel_composite",
|
||||
"class_name": "TravelCompositeETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "tree_equity_score",
|
||||
"module_dir": "tree_equity_score",
|
||||
"class_name": "TreeEquityScoreETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "census_decennial",
|
||||
"module_dir": "census_decennial",
|
||||
"class_name": "CensusDecennialETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "housing_and_transportation",
|
||||
"module_dir": "housing_and_transportation",
|
||||
"class_name": "HousingTransportationETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "mapping_for_ej",
|
||||
"module_dir": "mapping_for_ej",
|
||||
"class_name": "MappingForEJETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "fsf_flood_risk",
|
||||
"module_dir": "fsf_flood_risk",
|
||||
"class_name": "FloodRiskETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "fsf_wildfire_risk",
|
||||
"module_dir": "fsf_wildfire_risk",
|
||||
"class_name": "WildfireRiskETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "ejscreen",
|
||||
"module_dir": "ejscreen",
|
||||
"class_name": "EJSCREENETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "hud_housing",
|
||||
"module_dir": "hud_housing",
|
||||
"class_name": "HudHousingETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "ncld_nature_deprived",
|
||||
|
@ -63,97 +74,122 @@ DATASET_LIST = [
|
|||
"name": "census_acs_median_income",
|
||||
"module_dir": "census_acs_median_income",
|
||||
"class_name": "CensusACSMedianIncomeETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "cdc_life_expectancy",
|
||||
"module_dir": "cdc_life_expectancy",
|
||||
"class_name": "CDCLifeExpectancy",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "doe_energy_burden",
|
||||
"module_dir": "doe_energy_burden",
|
||||
"class_name": "DOEEnergyBurden",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "geocorr",
|
||||
"module_dir": "geocorr",
|
||||
"class_name": "GeoCorrETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "child_opportunity_index",
|
||||
"module_dir": "child_opportunity_index",
|
||||
"class_name": "ChildOpportunityIndex",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "mapping_inequality",
|
||||
"module_dir": "mapping_inequality",
|
||||
"class_name": "MappingInequalityETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "persistent_poverty",
|
||||
"module_dir": "persistent_poverty",
|
||||
"class_name": "PersistentPovertyETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "ejscreen_areas_of_concern",
|
||||
"module_dir": "ejscreen_areas_of_concern",
|
||||
"class_name": "EJSCREENAreasOfConcernETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "calenviroscreen",
|
||||
"module_dir": "calenviroscreen",
|
||||
"class_name": "CalEnviroScreenETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "hud_recap",
|
||||
"module_dir": "hud_recap",
|
||||
"class_name": "HudRecapETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "epa_rsei",
|
||||
"module_dir": "epa_rsei",
|
||||
"class_name": "EPARiskScreeningEnvironmentalIndicatorsETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "energy_definition_alternative_draft",
|
||||
"module_dir": "energy_definition_alternative_draft",
|
||||
"class_name": "EnergyDefinitionAlternativeDraft",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "michigan_ejscreen",
|
||||
"module_dir": "michigan_ejscreen",
|
||||
"class_name": "MichiganEnviroScreenETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "cdc_svi_index",
|
||||
"module_dir": "cdc_svi_index",
|
||||
"class_name": "CDCSVIIndex",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "maryland_ejscreen",
|
||||
"module_dir": "maryland_ejscreen",
|
||||
"class_name": "MarylandEJScreenETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "historic_redlining",
|
||||
"module_dir": "historic_redlining",
|
||||
"class_name": "HistoricRedliningETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
# This has to come after us.json exists
|
||||
{
|
||||
"name": "census_acs",
|
||||
"module_dir": "census_acs",
|
||||
"class_name": "CensusACSETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "census_acs_2010",
|
||||
"module_dir": "census_acs_2010",
|
||||
"class_name": "CensusACS2010ETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "us_army_fuds",
|
||||
"module_dir": "us_army_fuds",
|
||||
"class_name": "USArmyFUDS",
|
||||
"is_memory_intensive": True,
|
||||
},
|
||||
{
|
||||
"name": "eamlis",
|
||||
"module_dir": "eamlis",
|
||||
"class_name": "AbandonedMineETL",
|
||||
"is_memory_intensive": True,
|
||||
},
|
||||
]
|
||||
|
||||
|
|
|
@ -77,10 +77,27 @@ def etl_runner(dataset_to_run: str = None) -> None:
|
|||
"""
|
||||
dataset_list = _get_datasets_to_run(dataset_to_run)
|
||||
|
||||
# Because we are memory constrained on our infrastructure,
|
||||
# we split datasets into those that are not memory intensive
|
||||
# (is_memory_intensive == False) and thereby can be safely
|
||||
# run in parallel, and those that require more RAM and thus
|
||||
# should be run sequentially. The is_memory_intensive_flag is
|
||||
# set manually in constants.py based on experience running
|
||||
# the pipeline
|
||||
concurrent_datasets = [
|
||||
dataset
|
||||
for dataset in dataset_list
|
||||
if not dataset["is_memory_intensive"]
|
||||
]
|
||||
high_memory_datasets = [
|
||||
dataset for dataset in dataset_list if dataset["is_memory_intensive"]
|
||||
]
|
||||
|
||||
logger.info("Running concurrent jobs")
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = {
|
||||
executor.submit(_run_one_dataset, dataset=dataset)
|
||||
for dataset in dataset_list
|
||||
for dataset in concurrent_datasets
|
||||
}
|
||||
|
||||
for fut in concurrent.futures.as_completed(futures):
|
||||
|
@ -88,6 +105,10 @@ def etl_runner(dataset_to_run: str = None) -> None:
|
|||
# Otherwise, the exceptions are silently ignored.
|
||||
fut.result()
|
||||
|
||||
logger.info("Running high-memory jobs")
|
||||
for dataset in high_memory_datasets:
|
||||
_run_one_dataset(dataset=dataset)
|
||||
|
||||
|
||||
def score_generate() -> None:
|
||||
"""Generates the score and saves it on the local data directory
|
||||
|
|
|
@ -145,6 +145,18 @@ datasets:
|
|||
field_type: bool
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: false
|
||||
- long_name: "Abandoned Mine Land Inventory System"
|
||||
short_name: "eAMLIS"
|
||||
module_name: "eamlis"
|
||||
load_fields:
|
||||
- short_name: "has_aml"
|
||||
df_field_name: "AML_BOOLEAN"
|
||||
long_name: "Is there at least one abandoned mine in this census tract?"
|
||||
description_short:
|
||||
"Whether the tract has an abandoned mine"
|
||||
field_type: bool
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
- long_name: "Example ETL"
|
||||
short_name: "Example"
|
||||
module_name: "example_dataset"
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
The following is the description from eAMLIS as of August 16, 2022.
|
||||
---
|
||||
|
||||
e-AMLIS is not a comprehensive database of all AML features or all AML grant activities. e-AMLIS is a national inventory that provides information about known abandoned mine land (AML) features including polluted waters. The majority of the data in e-AMLIS provides information about known coal AML features for the 25 states and 3 tribal SMCRA-approved AML Programs. e-AMLIS also provides limited information on non-coal AML features, and, non-coal reclamation projects as well as AML features for states and tribes that do not have an approved AML Program. Additionally, e-AMLIS only accounts for the direct construction cost to reclaim each AML feature that has been identified by states and Tribes. Other project costs such as planning, design, permitting, and construction oversight are not tracked in e-AMLIS.
|
||||
|
||||
The figures in e-AMLIS are further broken down into 3 cost categories:
|
||||
|
||||
Unfunded Cost represents pre-construction estimates to reclaim the AML feature;
|
||||
Funded Cost indicates that construction has been approved by OSM and these figures may change during construction;
|
||||
Completed Cost is the actual cost to complete construction and reclamation of the AML feature.
|
||||
DOI/OSMRE’s Financial Business & Management System is the system of record to obtain comprehensive information about all AML grant expenditures.
|
||||
|
||||
An inventory of land and water impacted by past mining (primarily coal mining) is maintained by OSMRE to provide information needed to implement the Surface Mining Control and Reclamation Act of 1977 (SMCRA). The inventory contains information on the location, type, and extent of AML impacts, as well as, information on the cost associated with the reclamation of those problems. The inventory is based upon field surveys by State, Tribal, and OSMRE program officials. It is dynamic to the extent that it is modified as new problems are identified and existing problems are reclaimed.
|
||||
|
||||
The Abandoned Mine Land Reclamation Act (AMRA) of 1990, amended SMCRA. The amended law expanded the scope of data OSMRE must collect regarding AML reclamation programs and progress. On December 20, 2006, SMCRA was amended under the Tax Relief and Health Care Act of 2006 to add sources of program funding, emphasize high priority coal reclamation, and expand OSMRE’s responsibilities towards implementation and management of the AML Inventory.
|
||||
|
||||
WHO MAINTAINS THE INFORMATION IN THE AML INVENTORY?
|
||||
The information is developed and/or updated by the States and Indian Tribes managing their own AML programs under SMCRA or by the OSMRE office responsible for States and Indian Tribes not managing their own AML problems.
|
||||
|
||||
TYPES OF PROBLEMS
|
||||
"High Priority"
|
||||
The most serious AML problems are those posing a threat to health, safety and general welfare of people (Priority 1 and Priority 2, or "high priority"). These are the only problems which the law requires to be inventoried. There are 17 Priority 1 and 2 problem types.
|
||||
|
||||
Emergencies
|
||||
Under the 2006 amendments to SMCRA, AML grants to states and tribes increased from $145 million in FY 2007 to $395 million in FY 2011. The increase in funding allowed states to take responsibility for their AML emergencies as part of their regular AML programs.
|
||||
|
||||
Until FY 2011, OSMRE provided Abandoned Mine Land (AML) State Emergency grants to the 15 states that manage their own emergency programs under the Abandoned Mine Land Reclamation Program. Thirteen other states and tribes that had approved AML programs did not receive emergency grants. OSMRE managed emergencies in those 13 states and tribes as well as in Federal Program States without AML programs.
|
||||
|
||||
OSMRE officially notified the state and tribal officials and Congressional delegations that, starting on October 1, 2010, they would fully assume responsibility for funding their emergency programs. OSMRE then worked with states and tribes to ensure a smooth transition to the states’ assumption of responsibility for administering state emergency programs. New funding and carryover balances were used during the transition to address immediate needs.
|
||||
|
||||
Overall, OSMRE successfully transitioned the financial responsibility to the states in FY 2011, and continues to provide technical and program assistance when needed. States with AML programs are now in a position to effectively handle emergency programs.
|
||||
|
||||
Environmental
|
||||
AML problems impacting the environment are known as Priority 3 problems. While SMCRA does not require OSMRE to inventory every unreclaimed priority 3 problem, some program States and Indian tribes have chosen to submit such information. Information for priority 3 problem types is required when reclamation activities are funded and information on completed reclamation of priority 3 problems is kept in the inventory.
|
||||
|
||||
Other Coal Mine Related Problems
|
||||
Information is also kept on lower priority coal related AML problems such as lower priority coal-related projects involving public facilities, and the development of publicly-owned land. The lower priority problems are also categorized-- Priority 4 and 5 problem types.
|
||||
|
||||
Non-coal Mine Related AML Problems
|
||||
The non-coal problems are primarily problems reclaimed by States/Indian tribes that had "Certified" having addressed all known eligible coal related problems. States and Indian tribes managing their own AML programs reclaimed non-coal problems prior to addressing all their coal related problems under SMCRA SEC. 409-- FILLING VOIDS AND SEALING TUNNELS at the request of the Governor of the state or the governing body of the Indian tribe if the Secretary of the Department of the Interior determines such problems meet the criteria for a priority 1, extreme hazard, problems. This Program Area contains historical reclamation accomplishments for Certified Programs reclaiming Priority 1, 2, and 3 non-coal Problem Type features with pre-AML Reauthorization SMCRA funds distributed prior to October 1, 2007.
|
62
data/data-pipeline/data_pipeline/etl/sources/eamlis/etl.py
Normal file
62
data/data-pipeline/data_pipeline/etl/sources/eamlis/etl.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
from pathlib import Path
|
||||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
from data_pipeline.config import settings
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
|
||||
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class AbandonedMineETL(ExtractTransformLoad):
|
||||
"""Data from Office Of Surface Mining Reclamation and Enforcement's
|
||||
eAMLIS. These are the locations of abandoned mines.
|
||||
"""
|
||||
|
||||
# Metadata for the baseclass
|
||||
NAME = "eamlis"
|
||||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||
AML_BOOLEAN: str
|
||||
|
||||
# Define these for easy code completion
|
||||
def __init__(self):
|
||||
self.SOURCE_URL = (
|
||||
settings.AWS_JUSTICE40_DATASOURCES_URL
|
||||
+ "/eAMLIS export of all data.tsv.zip"
|
||||
)
|
||||
|
||||
self.TRACT_INPUT_COLUMN_NAME = self.INPUT_GEOID_TRACT_FIELD_NAME
|
||||
|
||||
self.OUTPUT_PATH: Path = (
|
||||
self.DATA_PATH / "dataset" / "abandoned_mine_land_inventory_system"
|
||||
)
|
||||
|
||||
self.COLUMNS_TO_KEEP = [
|
||||
self.GEOID_TRACT_FIELD_NAME,
|
||||
self.AML_BOOLEAN,
|
||||
]
|
||||
|
||||
self.output_df: pd.DataFrame
|
||||
|
||||
def transform(self) -> None:
|
||||
logger.info("Starting eAMLIS transforms.")
|
||||
df = pd.read_csv(
|
||||
self.get_tmp_path() / "eAMLIS export of all data.tsv",
|
||||
sep="\t",
|
||||
low_memory=False,
|
||||
)
|
||||
gdf = gpd.GeoDataFrame(
|
||||
df,
|
||||
geometry=gpd.points_from_xy(
|
||||
x=df["Longitude"],
|
||||
y=df["Latitude"],
|
||||
),
|
||||
crs="epsg:4326",
|
||||
)
|
||||
gdf = gdf.drop_duplicates(subset=["geometry"], keep="last")
|
||||
gdf_tracts = add_tracts_for_geometries(gdf)
|
||||
gdf_tracts = gdf_tracts.drop_duplicates(self.GEOID_TRACT_FIELD_NAME)
|
||||
gdf_tracts[self.AML_BOOLEAN] = True
|
||||
self.output_df = gdf_tracts[self.COLUMNS_TO_KEEP]
|
2443
data/data-pipeline/data_pipeline/ipython/explore_eamlis.ipynb
Normal file
2443
data/data-pipeline/data_pipeline/ipython/explore_eamlis.ipynb
Normal file
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -0,0 +1,16 @@
|
|||
AMLIS Key State/Tribe County Congressional District Quadrangle Name Watershed HUC Code FIPS Code Latitude Longitude Funding Source / Program Problem Area Name Problem Area Number Planning Unit Name Planning Unit Number Problem Priority Problem Type Mining Type Ore Types Date Prepared Date Revised Private Owner % State Owner % Other Federal Owner % Park Service Owner % Forest Service Owner % Indian Owner % BLM Owner % Unfunded Standard Units Unfunded Costs Unfunded GPRA Acres Unfunded Metric Units Funded Standard Units Funded Costs Funded GPRA Acres Funded Metric Units Completed Standard Units Completed Costs Completed GPRA Acres Completed Metric Units Unnamed: 40
|
||||
CA000001 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.25161281807095 -117.11772856883819 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
CA000002 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.5498780497345 -121.0070599015156 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
CA000003 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 38.84602113669345 -121.40564726784282 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000004 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.49784370888389 -155.10321769858746 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000005 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.446650238354696 -154.89548634140738 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000006 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.904412260968197 -159.43665201302525 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000007 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.94208315793464 -159.52362041178708 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000008 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.72796381691298 -156.14177664396527 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000009 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.86486713282688 -156.2497797752935 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000010 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.516629328900667 -155.91378867633992 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000011 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.164406070883054 -155.81110884967674 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000012 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.825369670478306 -156.33064622489087 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000013 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.9170439162332 -156.54289869319305 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000014 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.556464980367483 -157.89225964427064 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
||||
HI000015 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.90754283544759 -159.48416846823164 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
|
|
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Is there at least one abandoned mine in this census tract?
|
||||
06027000800,True
|
||||
06069000802,True
|
||||
06061021322,True
|
||||
15001021010,True
|
||||
15001021101,True
|
||||
15007040603,True
|
||||
15007040700,True
|
||||
15009030100,True
|
||||
15009030201,True
|
||||
15001021402,True
|
||||
15001021800,True
|
||||
15009030402,True
|
||||
15009030800,True
|
||||
15003010201,True
|
||||
15007040604,True
|
|
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Is there at least one abandoned mine in this census tract?
|
||||
06027000800,True
|
||||
06069000802,True
|
||||
06061021322,True
|
||||
15001021010,True
|
||||
15001021101,True
|
||||
15007040603,True
|
||||
15007040700,True
|
||||
15009030100,True
|
||||
15009030201,True
|
||||
15001021402,True
|
||||
15001021800,True
|
||||
15009030402,True
|
||||
15009030800,True
|
||||
15003010201,True
|
||||
15007040604,True
|
|
|
@ -0,0 +1,152 @@
|
|||
# pylint: disable=protected-access
|
||||
from unittest import mock
|
||||
import pathlib
|
||||
from data_pipeline.etl.base import ValidGeoLevel
|
||||
|
||||
from data_pipeline.etl.sources.eamlis.etl import (
|
||||
AbandonedMineETL,
|
||||
)
|
||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
def _fake_add_tracts_for_geometries(df):
|
||||
"""The actual geojoin is too slow for tests. Use precomputed results."""
|
||||
lookups = {
|
||||
(-117.1177285688382, 36.25161281807095): "06027000800",
|
||||
(-121.0070599015156, 36.5498780497345): "06069000802",
|
||||
(-121.40564726784282, 38.84602113669345): "06061021322",
|
||||
(-155.10321769858746, 19.49784370888389): "15001021010",
|
||||
(-154.89548634140738, 19.446650238354696): "15001021101",
|
||||
(-159.43665201302525, 21.9044122609682): "15007040603",
|
||||
(-159.52362041178708, 21.94208315793464): "15007040700",
|
||||
(-156.14177664396527, 20.72796381691298): "15009030100",
|
||||
(-156.2497797752935, 20.86486713282688): "15009030201",
|
||||
(-155.91378867633992, 19.516629328900667): "15001021402",
|
||||
(-155.81110884967674, 20.164406070883054): "15001021800",
|
||||
(-156.33064622489087, 20.825369670478302): "15009030402",
|
||||
(-156.54289869319305, 20.9170439162332): "15009030800",
|
||||
(-157.89225964427064, 21.556464980367483): "15003010201",
|
||||
(-159.48416846823164, 21.90754283544759): "15007040604",
|
||||
}
|
||||
df["GEOID10_TRACT"] = df.geometry.apply(
|
||||
lambda point: lookups[(point.x, point.y)]
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
class TestAbandondedLandMineETL(TestETL):
|
||||
"""Tests the Abandoned Mine Dataset ETL
|
||||
|
||||
This uses pytest-snapshot.
|
||||
To update individual snapshots: $ poetry run pytest
|
||||
data_pipeline/tests/sources/eamlis/test_etl.py::TestClassNameETL::<testname>
|
||||
--snapshot-update
|
||||
"""
|
||||
|
||||
_ETL_CLASS = AbandonedMineETL
|
||||
|
||||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||
_SAMPLE_DATA_FILE_NAME = "eAMLIS export of all data.tsv"
|
||||
_SAMPLE_DATA_ZIP_FILE_NAME = "eAMLIS export of all data.tsv.zip"
|
||||
_EXTRACT_TMP_FOLDER_NAME = "AbandonedMineETL"
|
||||
|
||||
def setup_method(self, _method, filename=__file__):
|
||||
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||
|
||||
This code can be copied identically between all child classes.
|
||||
"""
|
||||
super().setup_method(_method=_method, filename=filename)
|
||||
|
||||
def test_init(self, mock_etl, mock_paths):
|
||||
"""Tests that the mock NationalRiskIndexETL class instance was
|
||||
initiliazed correctly.
|
||||
"""
|
||||
# setup
|
||||
etl = self._ETL_CLASS()
|
||||
# validation
|
||||
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
||||
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
||||
assert etl.NAME == "eamlis"
|
||||
assert etl.GEO_LEVEL == ValidGeoLevel.CENSUS_TRACT
|
||||
assert etl.COLUMNS_TO_KEEP == [
|
||||
etl.GEOID_TRACT_FIELD_NAME,
|
||||
etl.AML_BOOLEAN,
|
||||
]
|
||||
|
||||
def test_get_output_file_path(self, mock_etl, mock_paths):
|
||||
"""Tests the right file name is returned."""
|
||||
etl = self._ETL_CLASS()
|
||||
data_path, tmp_path = mock_paths
|
||||
|
||||
output_file_path = etl._get_output_file_path()
|
||||
expected_output_file_path = (
|
||||
data_path / "dataset" / self._ETL_CLASS.NAME / "usa.csv"
|
||||
)
|
||||
assert output_file_path == expected_output_file_path
|
||||
|
||||
def test_fixtures_contain_shared_tract_ids_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_fixtures_contain_shared_tract_ids_base(
|
||||
mock_etl, mock_paths
|
||||
)
|
||||
|
||||
def test_transform_base(self, snapshot, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_transform_base(
|
||||
snapshot=snapshot, mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
|
||||
def test_transform_sets_output_df_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_transform_sets_output_df_base(
|
||||
mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
|
||||
def test_validate_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_validate_base(mock_etl=mock_etl, mock_paths=mock_paths)
|
||||
|
||||
def test_full_etl_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_full_etl_base(mock_etl, mock_paths)
|
||||
|
||||
def test_get_data_frame_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_get_data_frame_base(mock_etl, mock_paths)
|
||||
|
||||
def test_tracts_without_fuds_not_in_results(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
etl = self._setup_etl_instance_and_run_extract(
|
||||
mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
etl.transform()
|
||||
etl.validate()
|
||||
etl.load()
|
||||
df = etl.get_data_frame()
|
||||
assert len(df[etl.GEOID_TRACT_FIELD_NAME]) == len(
|
||||
self._FIXTURES_SHARED_TRACT_IDS
|
||||
)
|
Loading…
Add table
Reference in a new issue