mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 13:51:16 -07:00
Adding NLCD data (#1826)
Adding NLCD's natural space indicator end to end to the score.
This commit is contained in:
parent
49623e4da0
commit
7d89d41e49
18 changed files with 288 additions and 18 deletions
|
@ -35,7 +35,6 @@ datasets:
|
|||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- short_name: "ex_ag_loss"
|
||||
df_field_name: "EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME"
|
||||
long_name: "Expected agricultural loss rate (Natural Hazards Risk Index)"
|
||||
|
@ -54,7 +53,6 @@ datasets:
|
|||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- short_name: "ex_bldg_loss"
|
||||
df_field_name: "EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME"
|
||||
long_name: "Expected building loss rate (Natural Hazards Risk Index)"
|
||||
|
@ -72,7 +70,6 @@ datasets:
|
|||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- short_name: "has_ag_val"
|
||||
df_field_name: "CONTAINS_AGRIVALUE"
|
||||
long_name: "Contains agricultural value"
|
||||
|
@ -168,7 +165,6 @@ datasets:
|
|||
field_type: float
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
|
||||
- long_name: "First Street Foundation Flood Risk"
|
||||
short_name: "FSF Flood Risk"
|
||||
module_name: fsf_flood_risk
|
||||
|
@ -209,7 +205,6 @@ datasets:
|
|||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- long_name: "First Street Foundation Wildfire Risk"
|
||||
short_name: "FSF Wildfire Risk"
|
||||
module_name: fsf_wildfire_risk
|
||||
|
@ -250,7 +245,6 @@ datasets:
|
|||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- long_name: "DOT Travel Disadvantage Index"
|
||||
short_name: "DOT"
|
||||
module_name: "travel_composite"
|
||||
|
@ -263,3 +257,36 @@ datasets:
|
|||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
- long_name: "National Land Cover Database (NLCD) Lack of Green Space / Nature-Deprived Communities dataset, as compiled by TPL"
|
||||
short_name: "nlcd_nature_deprived"
|
||||
module_name: "nlcd_nature_deprived"
|
||||
input_geoid_tract_field_name: "GEOID10_TRACT"
|
||||
load_fields:
|
||||
- short_name: "ncld_eligible"
|
||||
df_field_name: "ELIGIBLE_FOR_NATURE_DEPRIVED_FIELD_NAME"
|
||||
long_name: "Does the tract have at least 35 acres in it?"
|
||||
field_type: bool
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
- short_name: "percent_impervious"
|
||||
df_field_name: "TRACT_PERCENT_IMPERVIOUS_FIELD_NAME"
|
||||
long_name: "Share of the tract's land area that is covered by impervious surface as a percent"
|
||||
field_type: percentage
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
- short_name: "percent_nonnatural"
|
||||
df_field_name: "TRACT_PERCENT_NON_NATURAL_FIELD_NAME"
|
||||
long_name: "Share of the tract's land area that is covered by impervious surface or cropland as a percent"
|
||||
field_type: percentage
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
- short_name: "percent_cropland"
|
||||
df_field_name: "TRACT_PERCENT_CROPLAND_FIELD_NAME"
|
||||
long_name: "Share of the tract's land area that is covered by cropland as a percent"
|
||||
field_type: percentage
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
|
@ -305,6 +305,9 @@ TILES_SCORE_COLUMNS = {
|
|||
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
|
||||
field_names.HIGH_FUTURE_FLOOD_RISK_FIELD: "FLD_ET",
|
||||
field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD: "WF_ET",
|
||||
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
|
||||
field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
|
||||
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
|
||||
## FPL_200 (there is no higher ed in narwhal)
|
||||
}
|
||||
|
@ -361,4 +364,6 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.FUTURE_WILDFIRE_RISK_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
|
|
|
@ -14,6 +14,7 @@ from data_pipeline.etl.sources.dot_travel_composite.etl import (
|
|||
from data_pipeline.etl.sources.fsf_flood_risk.etl import (
|
||||
FloodRiskETL,
|
||||
)
|
||||
from data_pipeline.etl.sources.nlcd_nature_deprived.etl import NatureDeprivedETL
|
||||
from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
|
||||
from data_pipeline.score.score_runner import ScoreRunner
|
||||
from data_pipeline.score import field_names
|
||||
|
@ -47,6 +48,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.dot_travel_disadvantage_df: pd.DataFrame
|
||||
self.fsf_flood_df: pd.DataFrame
|
||||
self.fsf_fire_df: pd.DataFrame
|
||||
self.nature_deprived_df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Loading data sets from disk.")
|
||||
|
@ -134,6 +136,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# Load flood risk data
|
||||
self.fsf_flood_df = FloodRiskETL.get_data_frame()
|
||||
|
||||
# Load NLCD Nature-Deprived Communities data
|
||||
self.nature_deprived_df = NatureDeprivedETL.get_data_frame()
|
||||
|
||||
# Load GeoCorr Urban Rural Map
|
||||
geocorr_urban_rural_csv = (
|
||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
||||
|
@ -356,6 +361,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.dot_travel_disadvantage_df,
|
||||
self.fsf_flood_df,
|
||||
self.fsf_fire_df,
|
||||
self.nature_deprived_df,
|
||||
]
|
||||
|
||||
# Sanity check each data frame before merging.
|
||||
|
@ -439,9 +445,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||
field_names.UST_FIELD,
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD,
|
||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||
field_names.FUTURE_FLOOD_RISK_FIELD,
|
||||
field_names.FUTURE_WILDFIRE_RISK_FIELD,
|
||||
field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
]
|
||||
|
||||
|
@ -449,6 +455,8 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.GEOID_TRACT_FIELD_NAME,
|
||||
field_names.PERSISTENT_POVERTY_FIELD,
|
||||
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
|
||||
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
|
||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||
]
|
||||
|
||||
# For some columns, high values are "good", so we want to reverse the percentile
|
||||
|
@ -500,7 +508,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric)
|
||||
|
||||
# Convert all columns to numeric and do math
|
||||
# Note that we have a few special conditions here, that we handle explicitly.
|
||||
# Note that we have a few special conditions here and we handle them explicitly.
|
||||
# For *Linguistic Isolation*, we do NOT want to include Puerto Rico in the percentile
|
||||
# calculation. This is because linguistic isolation as a category doesn't make much sense
|
||||
# in Puerto Rico, where Spanish is a recognized language. Thus, we construct a list
|
||||
|
@ -509,6 +517,10 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# For *Expected Agricultural Loss*, we only want to include in the percentile tracts
|
||||
# in which there is some agricultural value. This helps us adjust the data such that we have
|
||||
# the ability to discern which tracts truly are at the 90th percentile, since many tracts have 0 value.
|
||||
#
|
||||
# For *Non-Natural Space*, we may only want to include tracts that have at least 35 acreas, I think. This will
|
||||
# get rid of tracts that we think are aberrations statistically. Right now, we have left this out
|
||||
# pending ground-truthing.
|
||||
|
||||
for numeric_column in numeric_columns:
|
||||
drop_tracts = []
|
||||
|
@ -524,7 +536,6 @@ class ScoreETL(ExtractTransformLoad):
|
|||
logger.info(
|
||||
f"Dropping {len(drop_tracts)} tracts from Agricultural Value Loss"
|
||||
)
|
||||
|
||||
elif numeric_column == field_names.LINGUISTIC_ISO_FIELD:
|
||||
drop_tracts = df_copy[
|
||||
# 72 is the FIPS code for Puerto Rico
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue