mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-26 18:51:17 -07:00
Adding eamlis and fuds data to legacy pollution in score (#1832)
Update to add EAMLIS and FUDS data to score
This commit is contained in:
parent
6e41e0d9f0
commit
cb4866b93f
14 changed files with 93 additions and 24 deletions
|
@ -322,4 +322,16 @@ fields:
|
||||||
format: percentage
|
format: percentage
|
||||||
- score_name: Does the tract have at least 35 acres in it?
|
- score_name: Does the tract have at least 35 acres in it?
|
||||||
label: Does the tract have at least 35 acres in it?
|
label: Does the tract have at least 35 acres in it?
|
||||||
|
format: bool
|
||||||
|
- score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract?
|
||||||
|
label: Is there at least one Formerly Used Defense Site (FUDS) in the tract?
|
||||||
|
format: bool
|
||||||
|
- score_name: Is there at least one abandoned mine in this census tract?
|
||||||
|
label: Is there at least one abandoned mine in this census tract?
|
||||||
|
format: bool
|
||||||
|
- score_name: There is at least one abandoned mine in this census tract and the tract is low income.
|
||||||
|
label: There is at least one abandoned mine in this census tract and the tract is low income.
|
||||||
|
format: bool
|
||||||
|
- score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
|
||||||
|
label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
|
||||||
format: bool
|
format: bool
|
|
@ -326,4 +326,16 @@ sheets:
|
||||||
format: percentage
|
format: percentage
|
||||||
- score_name: Does the tract have at least 35 acres in it?
|
- score_name: Does the tract have at least 35 acres in it?
|
||||||
label: Does the tract have at least 35 acres in it?
|
label: Does the tract have at least 35 acres in it?
|
||||||
|
format: bool
|
||||||
|
- score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract?
|
||||||
|
label: Is there at least one Formerly Used Defense Site (FUDS) in the tract?
|
||||||
|
format: bool
|
||||||
|
- score_name: Is there at least one abandoned mine in this census tract?
|
||||||
|
label: Is there at least one abandoned mine in this census tract?
|
||||||
|
format: bool
|
||||||
|
- score_name: There is at least one abandoned mine in this census tract and the tract is low income.
|
||||||
|
label: There is at least one abandoned mine in this census tract and the tract is low income.
|
||||||
|
format: bool
|
||||||
|
- score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
|
||||||
|
label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.
|
||||||
format: bool
|
format: bool
|
|
@ -93,21 +93,23 @@ def etl_runner(dataset_to_run: str = None) -> None:
|
||||||
dataset for dataset in dataset_list if dataset["is_memory_intensive"]
|
dataset for dataset in dataset_list if dataset["is_memory_intensive"]
|
||||||
]
|
]
|
||||||
|
|
||||||
logger.info("Running concurrent jobs")
|
if concurrent_datasets:
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
logger.info("Running concurrent jobs")
|
||||||
futures = {
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
executor.submit(_run_one_dataset, dataset=dataset)
|
futures = {
|
||||||
for dataset in concurrent_datasets
|
executor.submit(_run_one_dataset, dataset=dataset)
|
||||||
}
|
for dataset in concurrent_datasets
|
||||||
|
}
|
||||||
|
|
||||||
for fut in concurrent.futures.as_completed(futures):
|
for fut in concurrent.futures.as_completed(futures):
|
||||||
# Calling result will raise an exception if one occurred.
|
# Calling result will raise an exception if one occurred.
|
||||||
# Otherwise, the exceptions are silently ignored.
|
# Otherwise, the exceptions are silently ignored.
|
||||||
fut.result()
|
fut.result()
|
||||||
|
|
||||||
logger.info("Running high-memory jobs")
|
if high_memory_datasets:
|
||||||
for dataset in high_memory_datasets:
|
logger.info("Running high-memory jobs")
|
||||||
_run_one_dataset(dataset=dataset)
|
for dataset in high_memory_datasets:
|
||||||
|
_run_one_dataset(dataset=dataset)
|
||||||
|
|
||||||
|
|
||||||
def score_generate() -> None:
|
def score_generate() -> None:
|
||||||
|
|
|
@ -312,6 +312,8 @@ TILES_SCORE_COLUMNS = {
|
||||||
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
|
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
|
||||||
field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
|
field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
|
||||||
|
field_names.AML_BOOLEAN: "AML_ET",
|
||||||
|
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET"
|
||||||
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
|
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
|
||||||
## FPL_200 (there is no higher ed in narwhal)
|
## FPL_200 (there is no higher ed in narwhal)
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,8 @@ from data_pipeline.etl.sources.dot_travel_composite.etl import (
|
||||||
from data_pipeline.etl.sources.fsf_flood_risk.etl import (
|
from data_pipeline.etl.sources.fsf_flood_risk.etl import (
|
||||||
FloodRiskETL,
|
FloodRiskETL,
|
||||||
)
|
)
|
||||||
|
from data_pipeline.etl.sources.eamlis.etl import AbandonedMineETL
|
||||||
|
from data_pipeline.etl.sources.us_army_fuds.etl import USArmyFUDS
|
||||||
from data_pipeline.etl.sources.nlcd_nature_deprived.etl import NatureDeprivedETL
|
from data_pipeline.etl.sources.nlcd_nature_deprived.etl import NatureDeprivedETL
|
||||||
from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
|
from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
|
||||||
from data_pipeline.score.score_runner import ScoreRunner
|
from data_pipeline.score.score_runner import ScoreRunner
|
||||||
|
@ -49,6 +51,8 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
self.fsf_flood_df: pd.DataFrame
|
self.fsf_flood_df: pd.DataFrame
|
||||||
self.fsf_fire_df: pd.DataFrame
|
self.fsf_fire_df: pd.DataFrame
|
||||||
self.nature_deprived_df: pd.DataFrame
|
self.nature_deprived_df: pd.DataFrame
|
||||||
|
self.eamlis_df: pd.DataFrame
|
||||||
|
self.fuds_df: pd.DataFrame
|
||||||
|
|
||||||
def extract(self) -> None:
|
def extract(self) -> None:
|
||||||
logger.info("Loading data sets from disk.")
|
logger.info("Loading data sets from disk.")
|
||||||
|
@ -139,6 +143,12 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
# Load NLCD Nature-Deprived Communities data
|
# Load NLCD Nature-Deprived Communities data
|
||||||
self.nature_deprived_df = NatureDeprivedETL.get_data_frame()
|
self.nature_deprived_df = NatureDeprivedETL.get_data_frame()
|
||||||
|
|
||||||
|
# Load eAMLIS dataset
|
||||||
|
self.eamlis_df = AbandonedMineETL.get_data_frame()
|
||||||
|
|
||||||
|
# Load FUDS dataset
|
||||||
|
self.fuds_df = USArmyFUDS.get_data_frame()
|
||||||
|
|
||||||
# Load GeoCorr Urban Rural Map
|
# Load GeoCorr Urban Rural Map
|
||||||
geocorr_urban_rural_csv = (
|
geocorr_urban_rural_csv = (
|
||||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
||||||
|
@ -362,6 +372,8 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
self.fsf_flood_df,
|
self.fsf_flood_df,
|
||||||
self.fsf_fire_df,
|
self.fsf_fire_df,
|
||||||
self.nature_deprived_df,
|
self.nature_deprived_df,
|
||||||
|
self.eamlis_df,
|
||||||
|
self.fuds_df,
|
||||||
]
|
]
|
||||||
|
|
||||||
# Sanity check each data frame before merging.
|
# Sanity check each data frame before merging.
|
||||||
|
@ -457,6 +469,8 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
|
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
|
||||||
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
|
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
|
||||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||||
|
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||||
|
field_names.AML_BOOLEAN,
|
||||||
]
|
]
|
||||||
|
|
||||||
# For some columns, high values are "good", so we want to reverse the percentile
|
# For some columns, high values are "good", so we want to reverse the percentile
|
||||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -55,7 +55,7 @@ class USArmyFUDS(ExtractTransformLoad):
|
||||||
# before we try to do any transformation, get the tract data
|
# before we try to do any transformation, get the tract data
|
||||||
# so it's loaded and the census ETL is out of scope
|
# so it's loaded and the census ETL is out of scope
|
||||||
|
|
||||||
logger.info("Loading FUDs data as GeoDataFrame for transform")
|
logger.info("Loading FUDS data as GeoDataFrame for transform")
|
||||||
raw_df = gpd.read_file(
|
raw_df = gpd.read_file(
|
||||||
filename=self.DOWNLOAD_FILE_NAME,
|
filename=self.DOWNLOAD_FILE_NAME,
|
||||||
low_memory=False,
|
low_memory=False,
|
||||||
|
@ -88,7 +88,7 @@ class USArmyFUDS(ExtractTransformLoad):
|
||||||
.size()
|
.size()
|
||||||
)
|
)
|
||||||
self.output_df = (
|
self.output_df = (
|
||||||
self.output_df.fillna(0).astype("int64").sort_index().reset_index()
|
self.output_df.fillna(0).astype(np.int64).sort_index().reset_index()
|
||||||
)
|
)
|
||||||
|
|
||||||
self.output_df[self.ELIGIBLE_FUDS_BINARY_FIELD_NAME] = np.where(
|
self.output_df[self.ELIGIBLE_FUDS_BINARY_FIELD_NAME] = np.where(
|
||||||
|
|
|
@ -340,6 +340,12 @@ MOBILE_HOME = "Mobile Home"
|
||||||
SINGLE_PARENT = "Single Parent"
|
SINGLE_PARENT = "Single Parent"
|
||||||
TRANSPORTATION_COSTS = "Transportation Costs"
|
TRANSPORTATION_COSTS = "Transportation Costs"
|
||||||
|
|
||||||
|
# eAMLIS and FUDS variables
|
||||||
|
AML_BOOLEAN = "Is there at least one abandoned mine in this census tract?"
|
||||||
|
ELIGIBLE_FUDS_BINARY_FIELD_NAME = (
|
||||||
|
"Is there at least one Formerly Used Defense Site (FUDS) in the tract?"
|
||||||
|
)
|
||||||
|
|
||||||
#####
|
#####
|
||||||
# Names for individual factors being exceeded
|
# Names for individual factors being exceeded
|
||||||
|
|
||||||
|
@ -399,6 +405,10 @@ HAZARDOUS_WASTE_LOW_INCOME_FIELD = (
|
||||||
f" for proximity to hazardous waste facilities and is low income?"
|
f" for proximity to hazardous waste facilities and is low income?"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
AML_LOW_INCOME_FIELD = "There is at least one abandoned mine in this census tract and the tract is low income."
|
||||||
|
ELIGIBLE_FUDS_LOW_INCOME_FIELD = "There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income."
|
||||||
|
|
||||||
|
|
||||||
# Critical Clean Water and Waste Infrastructure
|
# Critical Clean Water and Waste Infrastructure
|
||||||
WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income?"
|
WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income?"
|
||||||
UST_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underground storage tanks and is low income?"
|
UST_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underground storage tanks and is low income?"
|
||||||
|
|
|
@ -464,6 +464,8 @@ class ScoreNarwhal(Score):
|
||||||
field_names.RMP_LOW_INCOME_FIELD,
|
field_names.RMP_LOW_INCOME_FIELD,
|
||||||
field_names.SUPERFUND_LOW_INCOME_FIELD,
|
field_names.SUPERFUND_LOW_INCOME_FIELD,
|
||||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
|
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
|
||||||
|
field_names.AML_LOW_INCOME_FIELD,
|
||||||
|
field_names.ELIGIBLE_FUDS_LOW_INCOME_FIELD,
|
||||||
]
|
]
|
||||||
|
|
||||||
self.df[field_names.RMP_PCTILE_THRESHOLD] = (
|
self.df[field_names.RMP_PCTILE_THRESHOLD] = (
|
||||||
|
@ -483,10 +485,15 @@ class ScoreNarwhal(Score):
|
||||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = (
|
self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = self.df[
|
||||||
self.df[field_names.RMP_PCTILE_THRESHOLD]
|
[
|
||||||
| self.df[field_names.NPL_PCTILE_THRESHOLD]
|
field_names.RMP_PCTILE_THRESHOLD,
|
||||||
) | self.df[field_names.TSDF_PCTILE_THRESHOLD]
|
field_names.NPL_PCTILE_THRESHOLD,
|
||||||
|
field_names.TSDF_PCTILE_THRESHOLD,
|
||||||
|
field_names.AML_BOOLEAN,
|
||||||
|
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||||
|
]
|
||||||
|
].any(axis="columns")
|
||||||
|
|
||||||
# individual series-by-series
|
# individual series-by-series
|
||||||
self.df[field_names.RMP_LOW_INCOME_FIELD] = (
|
self.df[field_names.RMP_LOW_INCOME_FIELD] = (
|
||||||
|
@ -502,6 +509,16 @@ class ScoreNarwhal(Score):
|
||||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.df[field_names.AML_LOW_INCOME_FIELD] = (
|
||||||
|
self.df[field_names.AML_BOOLEAN]
|
||||||
|
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.df[field_names.ELIGIBLE_FUDS_LOW_INCOME_FIELD] = (
|
||||||
|
self.df[field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME]
|
||||||
|
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||||
|
)
|
||||||
|
|
||||||
self._increment_total_eligibility_exceeded(
|
self._increment_total_eligibility_exceeded(
|
||||||
pollution_eligibility_columns,
|
pollution_eligibility_columns,
|
||||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||||
|
|
|
@ -61,9 +61,9 @@ class TestAbandondedLandMineETL(TestETL):
|
||||||
super().setup_method(_method=_method, filename=filename)
|
super().setup_method(_method=_method, filename=filename)
|
||||||
|
|
||||||
def test_init(self, mock_etl, mock_paths):
|
def test_init(self, mock_etl, mock_paths):
|
||||||
"""Tests that the mock NationalRiskIndexETL class instance was
|
"""Tests that the mock class instance was
|
||||||
initiliazed correctly.
|
initiliazed correctly.
|
||||||
"""
|
"""
|
||||||
# setup
|
# setup
|
||||||
etl = self._ETL_CLASS()
|
etl = self._ETL_CLASS()
|
||||||
# validation
|
# validation
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue