mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 09:11:17 -07:00
Adding first street foundation data (#1823)
Adding FSF flood and wildfire risk datasets to the score.
This commit is contained in:
parent
ebac552d75
commit
5e378aea81
21 changed files with 430 additions and 82 deletions
|
@ -0,0 +1,28 @@
|
|||
# How to add variables to a score
|
||||
|
||||
So, there's a variable you want to add to the score! Once you have the data source created in `etl/sources`, what should you do? There are 6 steps across a minimum of 7 files.
|
||||
|
||||
__Updating `field_names.py`__
|
||||
Per indicator, you need to make (usually) three variables to get used in other files.
|
||||
- raw variable: this is the name of the variable's raw data, not scaled into a percentile
|
||||
- variable with threshold exceeded: this is a boolean for whether the tract meets the threshold for the indicator alone
|
||||
- variable with threshold exceeded and socioeconomic criterion exceeded: this is whether the tract will be a DAC based on the socioeconomic criterion and the indicator
|
||||
|
||||
__Updating `etl_score.py`__
|
||||
- add the dataframe from the source to the ScoreETL constructor and add a line to read the dataframe into memory
|
||||
- then, add the dataframe into the list of `census_tract_dfs`
|
||||
- finally, add columns you want to include as percentiles to the `numeric_columns` list
|
||||
|
||||
__Updating `score_narwhal.py`__ (or whatever the score file is)
|
||||
- per factor, add the columns that show the threshold and socioeconomic criterion is exceeded to the `eligibility_columns` list
|
||||
- construct all columns specified in `field_names`, using the factor method as a guide
|
||||
|
||||
__Updating `constants.py`__
|
||||
- add the columns' shortnames to the tiles dictionary (using Vim's UI sheet to guide short names)
|
||||
- add the floats to the list of floats
|
||||
|
||||
__Updating `csv.yml` and `excel.yml`__
|
||||
- make sure each column you want to be in the downloadable files is listed here
|
||||
|
||||
__Update the fixtures__
|
||||
Follow the instructions on the repo to modify tiles so that `test_etl_post.py` doesn't fail. Then, confirm results.
|
|
@ -1,8 +1,5 @@
|
|||
# Suffixes
|
||||
PERCENTILE_FIELD_SUFFIX = " (percentile)"
|
||||
PERCENTILE_URBAN_RURAL_FIELD_SUFFIX = " (percentile urban/rural)"
|
||||
MIN_MAX_FIELD_SUFFIX = " (min-max normalized)"
|
||||
TOP_25_PERCENTILE_SUFFIX = " (top 25th percentile)"
|
||||
ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
|
||||
|
||||
# Geographic field names
|
||||
|
@ -11,38 +8,6 @@ STATE_FIELD = "State/Territory"
|
|||
COUNTY_FIELD = "County Name"
|
||||
|
||||
# Score file field names
|
||||
SCORE_A = "Score A"
|
||||
SCORE_B = "Score B"
|
||||
SCORE_C = "Score C"
|
||||
C_SOCIOECONOMIC = "Socioeconomic Factors"
|
||||
C_SENSITIVE = "Sensitive populations"
|
||||
C_ENVIRONMENTAL = "Environmental effects"
|
||||
C_EXPOSURES = "Exposures"
|
||||
SCORE_D = "Score D"
|
||||
SCORE_E = "Score E"
|
||||
SCORE_F_COMMUNITIES = "Score F (communities)"
|
||||
SCORE_G = "Score G"
|
||||
SCORE_G_COMMUNITIES = "Score G (communities)"
|
||||
SCORE_H = "Score H"
|
||||
SCORE_H_COMMUNITIES = "Score H (communities)"
|
||||
SCORE_I = "Score I"
|
||||
SCORE_I_COMMUNITIES = "Score I (communities)"
|
||||
SCORE_K = "NMTC (communities)"
|
||||
SCORE_K_COMMUNITIES = "Score K (communities)"
|
||||
|
||||
# Definition L fields
|
||||
SCORE_L = "Definition L"
|
||||
SCORE_L_COMMUNITIES = "Definition L (communities)"
|
||||
L_CLIMATE = "Climate Factor (Definition L)"
|
||||
L_ENERGY = "Energy Factor (Definition L)"
|
||||
L_TRANSPORTATION = "Transportation Factor (Definition L)"
|
||||
L_HOUSING = "Housing Factor (Definition L)"
|
||||
L_POLLUTION = "Pollution Factor (Definition L)"
|
||||
L_WATER = "Water Factor (Definition L)"
|
||||
L_HEALTH = "Health Factor (Definition L)"
|
||||
L_WORKFORCE = "Workforce Factor (Definition L)"
|
||||
L_NON_WORKFORCE = "Any Non-Workforce Factor (Definition L)"
|
||||
|
||||
# Definition M fields
|
||||
SCORE_M = "Definition M"
|
||||
SCORE_M_COMMUNITIES = "Definition M (communities)"
|
||||
|
@ -85,25 +50,6 @@ WORKFORCE_SOCIO_INDICATORS_EXCEEDED = (
|
|||
"Both workforce socioeconomic indicators exceeded"
|
||||
)
|
||||
|
||||
# For now, these are not used. Will delete after following up with Vim.
|
||||
POLLUTION_SOCIO_INDICATORS_EXCEEDED = (
|
||||
"Both pollution socioeconomic indicators exceeded"
|
||||
)
|
||||
CLIMATE_SOCIO_INDICATORS_EXCEEDED = (
|
||||
"Both climate socioeconomic indicators exceeded"
|
||||
)
|
||||
ENERGY_SOCIO_INDICATORS_EXCEEDED = (
|
||||
"Both energy socioeconomic indicators exceeded"
|
||||
)
|
||||
HOUSING_SOCIO_INDICATORS_EXCEEDED = (
|
||||
"Both housing socioeconomic indicators exceeded"
|
||||
)
|
||||
WATER_SOCIO_INDICATORS_EXCEEDED = "Both water socioeconomic indicators exceeded"
|
||||
|
||||
HEALTH_SOCIO_INDICATORS_EXCEEDED = (
|
||||
"Both health socioeconomic indicators exceeded"
|
||||
)
|
||||
|
||||
# Poverty / Income
|
||||
POVERTY_FIELD = "Poverty (Less than 200% of federal poverty line)"
|
||||
|
||||
|
@ -156,6 +102,8 @@ EXPECTED_AGRICULTURE_LOSS_RATE_FIELD = (
|
|||
EXPECTED_POPULATION_LOSS_RATE_FIELD = (
|
||||
"Expected population loss rate (Natural Hazards Risk Index)"
|
||||
)
|
||||
FUTURE_FLOOD_RISK_FIELD = "Share of properties at risk of flood in 30 years"
|
||||
FUTURE_WILDFIRE_RISK_FIELD = "Share of properties at risk of fire in 30 years"
|
||||
|
||||
# Environment
|
||||
DIESEL_FIELD = "Diesel particulate matter exposure"
|
||||
|
@ -408,6 +356,15 @@ EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD = (
|
|||
)
|
||||
AGRICULTURAL_VALUE_BOOL_FIELD = "Contains agricultural value"
|
||||
|
||||
HIGH_FUTURE_FLOOD_RISK_LOW_INCOME_FIELD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile for share of "
|
||||
"properties at risk of flood in 30 years and is low income?"
|
||||
)
|
||||
HIGH_FUTURE_WILDFIRE_RISK_LOW_INCOME_FIELD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile for "
|
||||
"share of properties at risk of fire in 30 years and is low income?"
|
||||
)
|
||||
|
||||
# Clean energy and efficiency
|
||||
PM25_EXPOSURE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for PM2.5 exposure and is low income?"
|
||||
ENERGY_BURDEN_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for energy burden and is low income?"
|
||||
|
@ -670,6 +627,16 @@ LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD = (
|
|||
UNEMPLOYMENT_PCTILE_THRESHOLD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile for unemployment"
|
||||
)
|
||||
HIGH_FUTURE_FLOOD_RISK_FIELD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile for share of properties "
|
||||
"at risk of flood in 30 years"
|
||||
)
|
||||
HIGH_FUTURE_WILDFIRE_RISK_FIELD = (
|
||||
f"Greater than or equal to the {PERCENTILE}th percentile for share of properties "
|
||||
"at risk of fire in 30 years"
|
||||
)
|
||||
|
||||
|
||||
LINGUISTIC_ISOLATION_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for households in linguistic isolation"
|
||||
POVERTY_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for households at or below 100% federal poverty level"
|
||||
LOW_MEDIAN_INCOME_PCTILE_THRESHOLD = (
|
||||
|
|
|
@ -122,8 +122,13 @@ class ScoreNarwhal(Score):
|
|||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.HIGH_FUTURE_FLOOD_RISK_LOW_INCOME_FIELD,
|
||||
field_names.HIGH_FUTURE_WILDFIRE_RISK_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
# TODO: When we refactor this... it's the same code over and over and over again
|
||||
# We should make a function, _get_all_columns(), that returns all three of these columns
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
] = (
|
||||
|
@ -152,6 +157,22 @@ class ScoreNarwhal(Score):
|
|||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HIGH_FUTURE_FLOOD_RISK_FIELD] = (
|
||||
self.df[
|
||||
field_names.FUTURE_FLOOD_RISK_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD] = (
|
||||
self.df[
|
||||
field_names.FUTURE_WILDFIRE_RISK_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.CLIMATE_THRESHOLD_EXCEEDED] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
|
@ -162,6 +183,8 @@ class ScoreNarwhal(Score):
|
|||
| self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
| self.df[field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD]
|
||||
| self.df[field_names.HIGH_FUTURE_FLOOD_RISK_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
|
@ -183,6 +206,16 @@ class ScoreNarwhal(Score):
|
|||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self.df[field_names.HIGH_FUTURE_FLOOD_RISK_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.HIGH_FUTURE_FLOOD_RISK_FIELD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self.df[field_names.HIGH_FUTURE_WILDFIRE_RISK_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
climate_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
|
@ -865,11 +898,6 @@ class ScoreNarwhal(Score):
|
|||
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
|
||||
# TODO: move this inside of
|
||||
# `_create_low_income_and_low_college_attendance_threshold`
|
||||
# and change the return signature of that method.
|
||||
# Create a standalone field that captures the college attendance boolean
|
||||
# threshold.
|
||||
self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (
|
||||
self.df[
|
||||
# UPDATE: Pull the imputed poverty statistic
|
||||
|
|
|
@ -1,13 +1,4 @@
|
|||
import pandas as pd
|
||||
from data_pipeline.score.score_a import ScoreA
|
||||
from data_pipeline.score.score_b import ScoreB
|
||||
from data_pipeline.score.score_c import ScoreC
|
||||
from data_pipeline.score.score_f import ScoreF
|
||||
from data_pipeline.score.score_g import ScoreG
|
||||
from data_pipeline.score.score_h import ScoreH
|
||||
from data_pipeline.score.score_i import ScoreI
|
||||
from data_pipeline.score.score_k import ScoreK
|
||||
from data_pipeline.score.score_l import ScoreL
|
||||
from data_pipeline.score.score_m import ScoreM
|
||||
from data_pipeline.score.score_narwhal import ScoreNarwhal
|
||||
|
||||
|
@ -23,15 +14,6 @@ class ScoreRunner:
|
|||
|
||||
def calculate_scores(self) -> pd.DataFrame:
|
||||
# Index scores
|
||||
self.df = ScoreA(df=self.df).add_columns()
|
||||
self.df = ScoreB(df=self.df).add_columns()
|
||||
self.df = ScoreC(df=self.df).add_columns()
|
||||
self.df = ScoreF(df=self.df).add_columns()
|
||||
self.df = ScoreG(df=self.df).add_columns()
|
||||
self.df = ScoreH(df=self.df).add_columns()
|
||||
self.df = ScoreI(df=self.df).add_columns()
|
||||
self.df = ScoreK(df=self.df).add_columns()
|
||||
self.df = ScoreL(df=self.df).add_columns()
|
||||
self.df = ScoreM(df=self.df).add_columns()
|
||||
self.df = ScoreNarwhal(df=self.df).add_columns()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue