mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-29 20:21:17 -07:00
NRI dataset and initial score YAML configuration (#1534)
* update be staging gha * NRI dataset and initial score YAML configuration * checkpoint * adding data checks for release branch * passing tests * adding INPUT_EXTRACTED_FILE_NAME to base class * lint * columns to keep and tests * update be staging gha * checkpoint * update be staging gha * NRI dataset and initial score YAML configuration * checkpoint * adding data checks for release branch * passing tests * adding INPUT_EXTRACTED_FILE_NAME to base class * lint * columns to keep and tests * checkpoint * PR Review * renoving source url * tests * stop execution of ETL if there's a YAML schema issue * update be staging gha * adding source url as class var again * clean up * force cache bust * gha cache bust * dynamically set score vars from YAML * docsctrings * removing last updated year - optional reverse percentile * passing tests * sort order * column ordening * PR review * class level vars * Updating DatasetsConfig * fix pylint errors * moving metadata hint back to code Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
parent
1833e3e794
commit
1c448a77f9
15 changed files with 272 additions and 3485 deletions
|
@ -0,0 +1,79 @@
|
|||
---
|
||||
datasets:
|
||||
- long_name: "FEMA National Risk Index"
|
||||
short_name: "nri"
|
||||
module_name: national_risk_index
|
||||
input_geoid_tract_field_name: "TRACTFIPS"
|
||||
load_fields:
|
||||
- short_name: "ex_loss"
|
||||
df_field_name: "RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME"
|
||||
long_name: "FEMA Risk Index Expected Annual Loss Score"
|
||||
field_type: float
|
||||
number_of_decimals_in_output: 6
|
||||
|
||||
- short_name: "ex_pop_loss"
|
||||
df_field_name: "EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME"
|
||||
long_name: "Expected population loss rate (Natural Hazards Risk Index)"
|
||||
description_short:
|
||||
"Rate of fatalities and injuries resulting from natural hazards each year"
|
||||
description_long:
|
||||
"Rate relative to the population of fatalities and injuries due to fourteen
|
||||
types of natural hazards each year that have some link to climate change:
|
||||
avalanche, coastal flooding, cold wave, drought, hail, heat wave, hurricane,
|
||||
ice storm, landslide, riverine flooding, strong wind, tornado, wildfire, and
|
||||
winter weather. Population loss is defined as the Spatial Hazard Events and
|
||||
Losses and National Centers for Environmental Information’s (NCEI) reported
|
||||
number of fatalities and injuries caused by the hazard occurrence. To combine
|
||||
fatalities and injuries for the computation of population loss value, an
|
||||
injury is counted as one-tenth (1/10) of a fatality. The NCEI Storm Events
|
||||
Database classifies injuries and fatalities as direct or indirect. Both direct
|
||||
and indirect injuries and fatalities are counted as population loss. This
|
||||
total number of injuries and fatalities is then divided by the population in
|
||||
the census tract to get a per-capita rate of population risk."
|
||||
field_type: float
|
||||
number_of_decimals_in_output: 6
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- short_name: "ex_ag_loss"
|
||||
df_field_name: "EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME"
|
||||
long_name: "Expected agricultural loss rate (Natural Hazards Risk Index)"
|
||||
description_short:
|
||||
"Economic loss rate to agricultural value resulting from natural hazards each
|
||||
year"
|
||||
description_long:
|
||||
"Percent of agricultural value at risk from losses due to fourteen types of
|
||||
natural hazards that have some link to climate change: avalanche, coastal
|
||||
flooding, cold wave, drought, hail, heat wave, hurricane, ice storm,
|
||||
landslide, riverine flooding, strong wind, tornado, wildfire, and winter
|
||||
weather. Rate calculated by dividing the agricultural value at risk in a
|
||||
census tract by the total agricultural value in that census tract."
|
||||
field_type: float
|
||||
number_of_decimals_in_output: 6
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- short_name: "ex_bldg_loss"
|
||||
df_field_name: "EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME"
|
||||
long_name: "Expected building loss rate (Natural Hazards Risk Index)"
|
||||
description_short:
|
||||
"Economic loss rate to building value resulting from natural hazards each year"
|
||||
description_long:
|
||||
"Percent of building value at risk from losses due to fourteen types of
|
||||
natural hazards that have some link to climate change: avalanche, coastal
|
||||
flooding, cold wave, drought, hail, heat wave, hurricane, ice storm,
|
||||
landslide, riverine flooding, strong wind, tornado, wildfire, and winter
|
||||
weather. Rate calculated by dividing the building value at risk in a census
|
||||
tract by the total building value in that census tract."
|
||||
field_type: float
|
||||
number_of_decimals_in_output: 6
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
|
||||
- short_name: "has_ag_val"
|
||||
df_field_name: "CONTAINS_AGRIVALUE"
|
||||
long_name: "Contains agricultural value"
|
||||
field_type: bool
|
|
@ -480,6 +480,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# for instance, 3rd grade reading level : Low 3rd grade reading level.
|
||||
# This low field will not exist yet, it is only calculated for the
|
||||
# percentile.
|
||||
# TODO: This will come from the YAML dataset config
|
||||
ReversePercentile(
|
||||
field_name=field_names.READING_FIELD,
|
||||
low_field_name=field_names.LOW_READING_FIELD,
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class FieldType(Enum):
|
||||
STRING = "string"
|
||||
INT64 = "int64"
|
||||
BOOL = "bool"
|
||||
FLOAT = "float"
|
||||
PERCENTAGE = "percentage"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DatasetsConfig:
|
||||
@dataclass
|
||||
class Dataset:
|
||||
"""A class that defines a dataset and its load variables.
|
||||
|
||||
Attributes:
|
||||
long_name (str): A human readable title for the dataset.
|
||||
short_name (str): used to compose the short variable names for tiles/arcgis. All short variable names will be prepended
|
||||
with the short name of the data set it comes from, i.e. `nri__ex_loss`.
|
||||
module_name (str): A string that matches both the Python module name for the dataset and the `NAME` property on the ETL class.
|
||||
load_fields (LoadField): A list of type LoadField that will drive the score ETL and side effects (tiles, downloadables).
|
||||
"""
|
||||
|
||||
@dataclass
|
||||
class LoadField:
|
||||
"""A class to define the fields to be saved on the dataset's output.
|
||||
|
||||
These fields will be then imported by the score generation ETL.
|
||||
|
||||
Attributes:
|
||||
short_name (str): Used in conjunction with the dataset's `short_name` for files where short names are needed.
|
||||
df_field_name (str): Name for the field in the etl class.
|
||||
long_name (str): Column name for the dataset's output csv.
|
||||
field_type (FieldType): An enum that dictates what type of field this is.
|
||||
description_short (Optional str): Description used if the field appears in the side panel.
|
||||
description_long (Optional str): Description used if the field appears in the Methodology page.
|
||||
number_of_decimals_in_output (Optional int): Used to represent number of decimals in side effects, like Excel. Defaults to 2 decimals.
|
||||
include_in_tiles (Optional bool): Include this field on the tile export. Defaults to False.
|
||||
include_in_downloadable_files (Optional bool): Include this field on the CSV and Excel exports. Defaults to False.
|
||||
create_percentile (Optional bool): Whether or not the backend processing should create a percentile field (ranked in ascending order)
|
||||
from the values in this field. Defaults to False.
|
||||
create_reverse_percentile (Optional bool): Whether or not the backend processing should create a "reverse percentile" field (ranked in
|
||||
descending order) from the values in this field. Defaults to False.
|
||||
include_in_comparison_tool_as_index (Optional bool): Whether or not to include this field in the comparison tool
|
||||
as an index used as comparison (e.g., this field might be a state or national index that identifies priority communities).
|
||||
The field itself must be a boolean for the comparison tool to work appropriately. Defaults to False.
|
||||
include_in_comparison_tool_as_statistical_descriptor (Optional bool): Whether or not to include this field in the comparison tool as a
|
||||
statistical descriptor of census tracts (e.g., this field might income levels, life expectancy, etc). This will be
|
||||
used to generate reports that produce information such as, tracts identified by Index A but not Index B have higher
|
||||
income levels but lower life expectancy. Defaults to False.
|
||||
"""
|
||||
|
||||
short_name: str
|
||||
df_field_name: str
|
||||
long_name: str
|
||||
field_type: FieldType = field(
|
||||
metadata={"by_value": True}
|
||||
) # This will be used on the `etl_score_post` for the
|
||||
# data manipulation. The `by_value` metadata prop will load the field type's Enum value instead of the index, i.e. "string"
|
||||
# and not STRING
|
||||
description_short: Optional[str] = None
|
||||
description_long: Optional[str] = None
|
||||
number_of_decimals_in_output: Optional[int] = 2
|
||||
include_in_tiles: Optional[bool] = False
|
||||
include_in_downloadable_files: Optional[bool] = False
|
||||
create_percentile: Optional[bool] = False
|
||||
create_reverse_percentile: Optional[bool] = False
|
||||
include_in_comparison_tool_as_index: Optional[bool] = False
|
||||
include_in_comparison_tool_as_statistical_descriptor: Optional[
|
||||
bool
|
||||
] = False
|
||||
|
||||
long_name: str
|
||||
short_name: str
|
||||
module_name: str
|
||||
input_geoid_tract_field_name: str
|
||||
load_fields: List[LoadField]
|
||||
|
||||
datasets: List[Dataset]
|
Loading…
Add table
Add a link
Reference in a new issue