mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 22:01:16 -07:00
Adding first street foundation data (#1823)
Adding FSF flood and wildfire risk datasets to the score.
This commit is contained in:
parent
ebac552d75
commit
5e378aea81
21 changed files with 430 additions and 82 deletions
|
@ -34,6 +34,16 @@ DATASET_LIST = [
|
|||
"module_dir": "mapping_for_ej",
|
||||
"class_name": "MappingForEJETL",
|
||||
},
|
||||
{
|
||||
"name": "fsf_flood_risk",
|
||||
"module_dir": "fsf_flood_risk",
|
||||
"class_name": "FloodRiskETL",
|
||||
},
|
||||
{
|
||||
"name": "fsf_wildfire_risk",
|
||||
"module_dir": "fsf_wildfire_risk",
|
||||
"class_name": "WildfireRiskETL",
|
||||
},
|
||||
{
|
||||
"name": "ejscreen",
|
||||
"module_dir": "ejscreen",
|
||||
|
|
|
@ -157,6 +157,88 @@ datasets:
|
|||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
|
||||
- long_name: "First Street Foundation Flood Risk"
|
||||
short_name: "FSF Flood Risk"
|
||||
module_name: fsf_flood_risk
|
||||
input_geoid_tract_field_name: "GEOID"
|
||||
load_fields:
|
||||
- short_name: "flood_eligible_properties"
|
||||
df_field_name: "COUNT_PROPERTIES"
|
||||
long_name: "Count of properties eligible for flood risk calculation within tract (floor of 250)"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
- short_name: "flood_risk_properties_today"
|
||||
df_field_name: "PROPERTIES_AT_RISK_FROM_FLOODING_TODAY"
|
||||
long_name: "Count of properties at risk of flood today"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
- short_name: "flood_risk_properties_30yrs"
|
||||
df_field_name: "PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS"
|
||||
long_name: "Count of properties at risk of flood in 30 years"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
- short_name: "flood_risk_share_today"
|
||||
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_TODAY"
|
||||
long_name: "Share of properties at risk of flood today"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
- short_name: "flood_risk_share_30yrs"
|
||||
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS"
|
||||
long_name: "Share of properties at risk of flood in 30 years"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- long_name: "First Street Foundation Wildfire Risk"
|
||||
short_name: "FSF Wildfire Risk"
|
||||
module_name: fsf_wildfire_risk
|
||||
input_geoid_tract_field_name: "GEOID"
|
||||
load_fields:
|
||||
- short_name: "fire_eligible_properties"
|
||||
df_field_name: "COUNT_PROPERTIES"
|
||||
long_name: "Count of properties eligible for wildfire risk calculation within tract (floor of 250)"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
- short_name: "fire_risk_properties_today"
|
||||
df_field_name: "PROPERTIES_AT_RISK_FROM_FIRE_TODAY"
|
||||
long_name: "Count of properties at risk of wildfire today"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
- short_name: "fire_risk_properties_30yrs"
|
||||
df_field_name: "PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS"
|
||||
long_name: "Count of properties at risk of wildfire in 30 years"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
- short_name: "fire_risk_share_today"
|
||||
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_TODAY"
|
||||
long_name: "Share of properties at risk of fire today"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
- short_name: "fire_risk_share_30yrs"
|
||||
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS"
|
||||
long_name: "Share of properties at risk of fire in 30 years"
|
||||
field_type: float
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- long_name: "DOT Travel Disadvantage Index"
|
||||
short_name: "DOT"
|
||||
module_name: "travel_composite"
|
||||
|
|
|
@ -293,12 +293,18 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.WORKFORCE_THRESHOLD_EXCEEDED: "M_WKFC_EOMI",
|
||||
# These are the booleans for socioeconomic indicators
|
||||
## this measures low income boolean
|
||||
field_names.FPL_200_SERIES: "FPL200S",
|
||||
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED: "FPL200S",
|
||||
## Low high school for t&wd
|
||||
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
|
||||
field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET",
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS"
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS",
|
||||
field_names.FUTURE_FLOOD_RISK_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "FLD_PFS",
|
||||
field_names.FUTURE_WILDFIRE_RISK_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
|
||||
field_names.HIGH_FUTURE_FLOOD_RISK_FIELD: "FLD_ET",
|
||||
field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD: "WF_ET",
|
||||
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
|
||||
## FPL_200 (there is no higher ed in narwhal)
|
||||
}
|
||||
|
@ -352,4 +358,7 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
|
||||
field_names.COLLEGE_ATTENDANCE_FIELD,
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.FUTURE_WILDFIRE_RISK_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
|
|
|
@ -11,6 +11,10 @@ from data_pipeline.etl.sources.national_risk_index.etl import (
|
|||
from data_pipeline.etl.sources.dot_travel_composite.etl import (
|
||||
TravelCompositeETL,
|
||||
)
|
||||
from data_pipeline.etl.sources.fsf_flood_risk.etl import (
|
||||
FloodRiskETL,
|
||||
)
|
||||
from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
|
||||
from data_pipeline.score.score_runner import ScoreRunner
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.etl.score import constants
|
||||
|
@ -41,6 +45,8 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.child_opportunity_index_df: pd.DataFrame
|
||||
self.hrs_df: pd.DataFrame
|
||||
self.dot_travel_disadvantage_df: pd.DataFrame
|
||||
self.fsf_flood_df: pd.DataFrame
|
||||
self.fsf_fire_df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Loading data sets from disk.")
|
||||
|
@ -122,6 +128,12 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# Load DOT Travel Disadvantage
|
||||
self.dot_travel_disadvantage_df = TravelCompositeETL.get_data_frame()
|
||||
|
||||
# Load fire risk data
|
||||
self.fsf_fire_df = WildfireRiskETL.get_data_frame()
|
||||
|
||||
# Load flood risk data
|
||||
self.fsf_flood_df = FloodRiskETL.get_data_frame()
|
||||
|
||||
# Load GeoCorr Urban Rural Map
|
||||
geocorr_urban_rural_csv = (
|
||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
||||
|
@ -342,6 +354,8 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.child_opportunity_index_df,
|
||||
self.hrs_df,
|
||||
self.dot_travel_disadvantage_df,
|
||||
self.fsf_flood_df,
|
||||
self.fsf_fire_df,
|
||||
]
|
||||
|
||||
# Sanity check each data frame before merging.
|
||||
|
@ -426,6 +440,8 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.UST_FIELD,
|
||||
field_names.DOT_TRAVEL_BURDEN_FIELD,
|
||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||
field_names.FUTURE_FLOOD_RISK_FIELD,
|
||||
field_names.FUTURE_WILDFIRE_RISK_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
]
|
||||
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,3 @@
|
|||
# FSF flood risk data
|
||||
|
||||
Flood risk computed as 1 in 100 year flood zone
|
|
@ -0,0 +1,93 @@
|
|||
# pylint: disable=unsubscriptable-object
|
||||
# pylint: disable=unsupported-assignment-operation
|
||||
|
||||
import pandas as pd
|
||||
from data_pipeline.config import settings
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class FloodRiskETL(ExtractTransformLoad):
|
||||
"""ETL class for the First Street Foundation flood risk dataset"""
|
||||
|
||||
NAME = "fsf_flood_risk"
|
||||
SOURCE_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/fsf_flood.zip"
|
||||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||
|
||||
# Output score variables (values set on datasets.yml) for linting purposes
|
||||
COUNT_PROPERTIES: str
|
||||
PROPERTIES_AT_RISK_FROM_FLOODING_TODAY: str
|
||||
PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS: str
|
||||
SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_TODAY: str
|
||||
SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS: str
|
||||
|
||||
def __init__(self):
|
||||
# define the full path for the input CSV file
|
||||
self.INPUT_CSV = (
|
||||
self.get_tmp_path() / "fsf_flood" / "flood_tract_2010.csv"
|
||||
)
|
||||
|
||||
# this is the main dataframe
|
||||
self.df: pd.DataFrame
|
||||
|
||||
# Start dataset-specific vars here
|
||||
self.COUNT_PROPERTIES_NATIVE_FIELD_NAME = "count_properties"
|
||||
self.COUNT_PROPERTIES_AT_RISK_TODAY = "mid_depth_100_year00"
|
||||
self.COUNT_PROPERTIES_AT_RISK_30_YEARS = "mid_depth_100_year30"
|
||||
self.CLIP_PROPERTIES_COUNT = 250
|
||||
|
||||
def transform(self) -> None:
|
||||
"""Reads the unzipped data file into memory and applies the following
|
||||
transformations to prepare it for the load() method:
|
||||
|
||||
- Renames the Census Tract column to match the other datasets
|
||||
- Calculates share of properties at risk, left-clipping number of properties at 250
|
||||
"""
|
||||
logger.info("Transforming National Risk Index Data")
|
||||
|
||||
logger.info(self.COLUMNS_TO_KEEP)
|
||||
# read in the unzipped csv data source then rename the
|
||||
# Census Tract column for merging
|
||||
df_fsf_flood_disagg: pd.DataFrame = pd.read_csv(
|
||||
self.INPUT_CSV,
|
||||
dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str},
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
df_fsf_flood_disagg[self.GEOID_TRACT_FIELD_NAME] = df_fsf_flood_disagg[
|
||||
self.INPUT_GEOID_TRACT_FIELD_NAME
|
||||
].str.zfill(11)
|
||||
|
||||
# Because we have some tracts that are listed twice, we aggregate based on
|
||||
# GEOID10_TRACT. Note that I haven't confirmed this with the FSF boys -- to do!
|
||||
df_fsf_flood = (
|
||||
df_fsf_flood_disagg.groupby(self.GEOID_TRACT_FIELD_NAME)
|
||||
.sum()
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
df_fsf_flood[self.COUNT_PROPERTIES] = df_fsf_flood[
|
||||
self.COUNT_PROPERTIES_NATIVE_FIELD_NAME
|
||||
].clip(lower=self.CLIP_PROPERTIES_COUNT)
|
||||
|
||||
df_fsf_flood[self.SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_TODAY] = (
|
||||
df_fsf_flood[self.COUNT_PROPERTIES_AT_RISK_TODAY]
|
||||
/ df_fsf_flood[self.COUNT_PROPERTIES]
|
||||
)
|
||||
df_fsf_flood[
|
||||
self.SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS
|
||||
] = (
|
||||
df_fsf_flood[self.COUNT_PROPERTIES_AT_RISK_30_YEARS]
|
||||
/ df_fsf_flood[self.COUNT_PROPERTIES]
|
||||
)
|
||||
|
||||
# Assign the final df to the class' output_df for the load method with rename
|
||||
self.output_df = df_fsf_flood.rename(
|
||||
columns={
|
||||
self.COUNT_PROPERTIES_AT_RISK_TODAY: self.PROPERTIES_AT_RISK_FROM_FLOODING_TODAY,
|
||||
self.COUNT_PROPERTIES_AT_RISK_30_YEARS: self.PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS,
|
||||
}
|
||||
)
|
|
@ -0,0 +1,3 @@
|
|||
# FSF wildfire risk data
|
||||
|
||||
Fire risk computed as >= 0.003 burn risk probability
|
|
@ -0,0 +1,91 @@
|
|||
# pylint: disable=unsubscriptable-object
|
||||
# pylint: disable=unsupported-assignment-operation
|
||||
|
||||
import pandas as pd
|
||||
from data_pipeline.config import settings
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class WildfireRiskETL(ExtractTransformLoad):
|
||||
"""ETL class for the First Street Foundation wildfire risk dataset"""
|
||||
|
||||
NAME = "fsf_wildfire_risk"
|
||||
SOURCE_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/fsf_fire.zip"
|
||||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||
|
||||
# Output score variables (values set on datasets.yml) for linting purposes
|
||||
COUNT_PROPERTIES: str
|
||||
PROPERTIES_AT_RISK_FROM_FIRE_TODAY: str
|
||||
PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS: str
|
||||
SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_TODAY: str
|
||||
SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS: str
|
||||
|
||||
def __init__(self):
|
||||
# define the full path for the input CSV file
|
||||
self.INPUT_CSV = (
|
||||
self.get_tmp_path() / "fsf_fire" / "fire_tract_2010.csv"
|
||||
)
|
||||
|
||||
# this is the main dataframe
|
||||
self.df: pd.DataFrame
|
||||
|
||||
# Start dataset-specific vars here
|
||||
self.COUNT_PROPERTIES_NATIVE_FIELD_NAME = "count_properties"
|
||||
self.COUNT_PROPERTIES_AT_RISK_TODAY = "burnprob_year00_flag"
|
||||
self.COUNT_PROPERTIES_AT_RISK_30_YEARS = "burnprob_year30_flag"
|
||||
self.CLIP_PROPERTIES_COUNT = 250
|
||||
|
||||
def transform(self) -> None:
|
||||
"""Reads the unzipped data file into memory and applies the following
|
||||
transformations to prepare it for the load() method:
|
||||
|
||||
- Renames the Census Tract column to match the other datasets
|
||||
- Calculates share of properties at risk, left-clipping number of properties at 250
|
||||
"""
|
||||
logger.info("Transforming National Risk Index Data")
|
||||
|
||||
logger.info(self.COLUMNS_TO_KEEP)
|
||||
# read in the unzipped csv data source then rename the
|
||||
# Census Tract column for merging
|
||||
df_fsf_fire_disagg: pd.DataFrame = pd.read_csv(
|
||||
self.INPUT_CSV,
|
||||
dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str},
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
df_fsf_fire_disagg[self.GEOID_TRACT_FIELD_NAME] = df_fsf_fire_disagg[
|
||||
self.INPUT_GEOID_TRACT_FIELD_NAME
|
||||
].str.zfill(11)
|
||||
|
||||
# Because we have some tracts that are listed twice, we aggregate based on
|
||||
# GEOID10_TRACT. Note that I haven't confirmed this with the FSF boys -- to do!
|
||||
df_fsf_fire = (
|
||||
df_fsf_fire_disagg.groupby(self.GEOID_TRACT_FIELD_NAME)
|
||||
.sum()
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
df_fsf_fire[self.COUNT_PROPERTIES] = df_fsf_fire[
|
||||
self.COUNT_PROPERTIES_NATIVE_FIELD_NAME
|
||||
].clip(lower=self.CLIP_PROPERTIES_COUNT)
|
||||
|
||||
df_fsf_fire[self.SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_TODAY] = (
|
||||
df_fsf_fire[self.COUNT_PROPERTIES_AT_RISK_TODAY]
|
||||
/ df_fsf_fire[self.COUNT_PROPERTIES]
|
||||
)
|
||||
df_fsf_fire[self.SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS] = (
|
||||
df_fsf_fire[self.COUNT_PROPERTIES_AT_RISK_30_YEARS]
|
||||
/ df_fsf_fire[self.COUNT_PROPERTIES]
|
||||
)
|
||||
|
||||
# Assign the final df to the class' output_df for the load method with rename
|
||||
self.output_df = df_fsf_fire.rename(
|
||||
columns={
|
||||
self.COUNT_PROPERTIES_AT_RISK_TODAY: self.PROPERTIES_AT_RISK_FROM_FIRE_TODAY,
|
||||
self.COUNT_PROPERTIES_AT_RISK_30_YEARS: self.PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS,
|
||||
}
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue