mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
removing last updated year - optional reverse percentile
This commit is contained in:
parent
7415a15bc0
commit
e5b84dc28d
6 changed files with 23 additions and 29 deletions
|
@ -123,7 +123,6 @@ class ExtractTransformLoad:
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
# set some of the basic fields
|
# set some of the basic fields
|
||||||
cls.LAST_UPDATED_YEAR = dataset_config["last_updated_year"]
|
|
||||||
cls.INPUT_GEOID_TRACT_FIELD_NAME = dataset_config[
|
cls.INPUT_GEOID_TRACT_FIELD_NAME = dataset_config[
|
||||||
"input_geoid_tract_field_name"
|
"input_geoid_tract_field_name"
|
||||||
]
|
]
|
||||||
|
@ -150,16 +149,10 @@ class ExtractTransformLoad:
|
||||||
if cls.NAME is None:
|
if cls.NAME is None:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
f"Child ETL class needs to specify `cls.NAME` (currently "
|
f"Child ETL class needs to specify `cls.NAME` (currently "
|
||||||
f"{cls.NAME}) and `cls.LAST_UPDATED_YEAR` (currently "
|
f"{cls.NAME})."
|
||||||
f"{cls.LAST_UPDATED_YEAR})."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
output_file_path = (
|
output_file_path = cls.DATA_PATH / "dataset" / f"{cls.NAME}" / "usa.csv"
|
||||||
cls.DATA_PATH
|
|
||||||
/ "dataset"
|
|
||||||
/ f"{cls.NAME}_{cls.LAST_UPDATED_YEAR}"
|
|
||||||
/ "usa.csv"
|
|
||||||
)
|
|
||||||
return output_file_path
|
return output_file_path
|
||||||
|
|
||||||
def get_tmp_path(self) -> pathlib.Path:
|
def get_tmp_path(self) -> pathlib.Path:
|
||||||
|
|
|
@ -3,7 +3,6 @@ datasets:
|
||||||
- long_name: "FEMA National Risk Index"
|
- long_name: "FEMA National Risk Index"
|
||||||
short_name: "nri"
|
short_name: "nri"
|
||||||
module_name: national_risk_index
|
module_name: national_risk_index
|
||||||
last_updated_year: 2020
|
|
||||||
description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards."
|
description: "Dataset from FEMA that identifies communities most at risk to 18 natural hazards."
|
||||||
input_geoid_tract_field_name: "TRACTFIPS"
|
input_geoid_tract_field_name: "TRACTFIPS"
|
||||||
load_fields:
|
load_fields:
|
||||||
|
@ -11,34 +10,34 @@ datasets:
|
||||||
df_field_name: "RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME"
|
df_field_name: "RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME"
|
||||||
long_name: "FEMA Risk Index Expected Annual Loss Score"
|
long_name: "FEMA Risk Index Expected Annual Loss Score"
|
||||||
field_type: float
|
field_type: float
|
||||||
tile_include: true
|
include_in_tiles: true
|
||||||
csv_download: true
|
include_in_csv: true
|
||||||
excel_download: true
|
include_in_excel: true
|
||||||
- short_name: "ex_pop_loss"
|
- short_name: "ex_pop_loss"
|
||||||
df_field_name: "EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME"
|
df_field_name: "EXPECTED_POPULATION_LOSS_RATE_FIELD_NAME"
|
||||||
long_name: "Expected population loss rate (Natural Hazards Risk Index)"
|
long_name: "Expected population loss rate (Natural Hazards Risk Index)"
|
||||||
field_type: float
|
field_type: float
|
||||||
tile_include: true
|
include_in_tiles: true
|
||||||
csv_download: true
|
include_in_csv: true
|
||||||
excel_download: true
|
include_in_excel: true
|
||||||
- short_name: "ex_ag_loss"
|
- short_name: "ex_ag_loss"
|
||||||
df_field_name: "EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME"
|
df_field_name: "EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME"
|
||||||
long_name: "Expected agricultural loss rate (Natural Hazards Risk Index)"
|
long_name: "Expected agricultural loss rate (Natural Hazards Risk Index)"
|
||||||
field_type: float
|
field_type: float
|
||||||
tile_include: true
|
include_in_tiles: true
|
||||||
csv_download: true
|
include_in_csv: true
|
||||||
excel_download: true
|
include_in_excel: true
|
||||||
- short_name: "ex_bldg_loss"
|
- short_name: "ex_bldg_loss"
|
||||||
df_field_name: "EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME"
|
df_field_name: "EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME"
|
||||||
long_name: "Expected building loss rate (Natural Hazards Risk Index)"
|
long_name: "Expected building loss rate (Natural Hazards Risk Index)"
|
||||||
field_type: float
|
field_type: float
|
||||||
tile_include: true
|
include_in_tiles: true
|
||||||
csv_download: true
|
include_in_csv: true
|
||||||
excel_download: true
|
include_in_excel: true
|
||||||
- short_name: "has_ag_val"
|
- short_name: "has_ag_val"
|
||||||
df_field_name: "CONTAINS_AGRIVALUE"
|
df_field_name: "CONTAINS_AGRIVALUE"
|
||||||
long_name: "Contains agricultural value"
|
long_name: "Contains agricultural value"
|
||||||
field_type: bool
|
field_type: bool
|
||||||
tile_include: true
|
include_in_tiles: true
|
||||||
csv_download: true
|
include_in_csv: true
|
||||||
excel_download: true
|
include_in_excel: true
|
||||||
|
|
|
@ -480,6 +480,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
# for instance, 3rd grade reading level : Low 3rd grade reading level.
|
# for instance, 3rd grade reading level : Low 3rd grade reading level.
|
||||||
# This low field will not exist yet, it is only calculated for the
|
# This low field will not exist yet, it is only calculated for the
|
||||||
# percentile.
|
# percentile.
|
||||||
|
# TODO: This will come from the YAML dataset config
|
||||||
ReversePercentile(
|
ReversePercentile(
|
||||||
field_name=field_names.READING_FIELD,
|
field_name=field_names.READING_FIELD,
|
||||||
low_field_name=field_names.LOW_READING_FIELD,
|
low_field_name=field_names.LOW_READING_FIELD,
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
|
|
||||||
|
|
||||||
class FieldType(Enum):
|
class FieldType(Enum):
|
||||||
|
@ -40,6 +40,8 @@ class DatasetsConfig:
|
||||||
field_type (FieldType): An enum that dictates what type of field this is. This will be used on the `etl_score_post`
|
field_type (FieldType): An enum that dictates what type of field this is. This will be used on the `etl_score_post`
|
||||||
for the data manipulation.
|
for the data manipulation.
|
||||||
The `by_value` metadata prop will load the field type's Enum value instead of the index, i.e. "string" and not STRING
|
The `by_value` metadata prop will load the field type's Enum value instead of the index, i.e. "string" and not STRING
|
||||||
|
reverse_percentile (Optional bool): An optional bool to denote this field to be a reverse_percentile.
|
||||||
|
TODO: data/data-pipeline/data_pipeline/etl/score/etl_score.py:477
|
||||||
include_in_tiles (bool): Include this field on the tile export.
|
include_in_tiles (bool): Include this field on the tile export.
|
||||||
include_in_csv (bool): Include this field on the CSV export.
|
include_in_csv (bool): Include this field on the CSV export.
|
||||||
include_in_excel (bool): Include this field on the Excel export.
|
include_in_excel (bool): Include this field on the Excel export.
|
||||||
|
@ -49,6 +51,7 @@ class DatasetsConfig:
|
||||||
df_field_name: str
|
df_field_name: str
|
||||||
long_name: str
|
long_name: str
|
||||||
field_type: FieldType = field(metadata={"by_value": True})
|
field_type: FieldType = field(metadata={"by_value": True})
|
||||||
|
reverse_percentile: Optional[bool]
|
||||||
include_in_tiles: bool
|
include_in_tiles: bool
|
||||||
include_in_csv: bool
|
include_in_csv: bool
|
||||||
include_in_excel: bool
|
include_in_excel: bool
|
||||||
|
|
|
@ -127,7 +127,6 @@ class TestETL:
|
||||||
|
|
||||||
# Also make sure all parameters that need to be non-null are non-null
|
# Also make sure all parameters that need to be non-null are non-null
|
||||||
assert etl.NAME is not None
|
assert etl.NAME is not None
|
||||||
assert etl.LAST_UPDATED_YEAR is not None
|
|
||||||
assert etl.GEO_LEVEL is not None
|
assert etl.GEO_LEVEL is not None
|
||||||
assert etl.COLUMNS_TO_KEEP is not None
|
assert etl.COLUMNS_TO_KEEP is not None
|
||||||
assert len(etl.COLUMNS_TO_KEEP) > 0
|
assert len(etl.COLUMNS_TO_KEEP) > 0
|
||||||
|
|
|
@ -87,7 +87,6 @@ class TestNationalRiskIndexETL(TestETL):
|
||||||
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
||||||
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
||||||
assert etl.NAME == "national_risk_index"
|
assert etl.NAME == "national_risk_index"
|
||||||
assert etl.LAST_UPDATED_YEAR == 2020
|
|
||||||
assert etl.GEO_LEVEL == ValidGeoLevel.CENSUS_TRACT
|
assert etl.GEO_LEVEL == ValidGeoLevel.CENSUS_TRACT
|
||||||
assert etl.COLUMNS_TO_KEEP == [
|
assert etl.COLUMNS_TO_KEEP == [
|
||||||
etl.GEOID_TRACT_FIELD_NAME,
|
etl.GEOID_TRACT_FIELD_NAME,
|
||||||
|
@ -105,6 +104,6 @@ class TestNationalRiskIndexETL(TestETL):
|
||||||
|
|
||||||
output_file_path = etl._get_output_file_path()
|
output_file_path = etl._get_output_file_path()
|
||||||
expected_output_file_path = (
|
expected_output_file_path = (
|
||||||
data_path / "dataset" / "national_risk_index_2020" / "usa.csv"
|
data_path / "dataset" / "national_risk_index" / "usa.csv"
|
||||||
)
|
)
|
||||||
assert output_file_path == expected_output_file_path
|
assert output_file_path == expected_output_file_path
|
||||||
|
|
Loading…
Add table
Reference in a new issue