mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 13:41:17 -07:00
Imputing income using geographic neighbors (#1559)
Imputes income field with a light refactor. Needs more refactor and more tests (I spotchecked). Next ticket will check and address but a lot of "narwhal" architecture is here.
This commit is contained in:
parent
485a9a8316
commit
f047ca9d83
16 changed files with 1245 additions and 81 deletions
|
@ -5,6 +5,9 @@ from data_pipeline.config import settings
|
|||
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
## note: to keep map porting "right" fields, keeping descriptors the same.
|
||||
|
||||
|
||||
# Base Paths
|
||||
DATA_PATH = Path(settings.APP_ROOT) / "data"
|
||||
TMP_PATH = DATA_PATH / "tmp"
|
||||
|
@ -179,6 +182,8 @@ TILES_SCORE_COLUMNS = {
|
|||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P100_PFS",
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P200_PFS",
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P200_I_PFS",
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LPF_PFS",
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "NPL_PFS",
|
||||
|
@ -198,7 +203,8 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.M_HOUSING: "M_HSG",
|
||||
field_names.M_POLLUTION: "M_PLN",
|
||||
field_names.M_HEALTH: "M_HLTH",
|
||||
field_names.SCORE_M_COMMUNITIES: "SM_C",
|
||||
# temporarily update this so that it's the Narwhal score that gets visualized on the map
|
||||
field_names.SCORE_N_COMMUNITIES: "SM_C",
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
|
||||
|
@ -283,7 +289,7 @@ TILES_SCORE_COLUMNS = {
|
|||
## Low high school and low higher ed for t&wd
|
||||
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
|
||||
## FPL 200 and low higher ed for all others
|
||||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES: "M_EBSI",
|
||||
field_names.FPL_200_SERIES: "M_EBSI",
|
||||
}
|
||||
|
||||
# columns to round floats to 2 decimals
|
||||
|
@ -311,6 +317,8 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
@ -332,7 +340,6 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
|
||||
field_names.COLLEGE_ATTENDANCE_FIELD,
|
||||
]
|
||||
|
|
|
@ -405,6 +405,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD]
|
||||
)
|
||||
|
||||
# QQ: why don't we just filter to the numeric columns by type?
|
||||
numeric_columns = [
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
|
@ -458,6 +459,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||
# We have to pass this boolean here in order to include it in ag value loss percentiles.
|
||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
]
|
||||
|
||||
non_numeric_columns = [
|
||||
|
|
|
@ -29,7 +29,7 @@ from . import constants
|
|||
logger = get_module_logger(__name__)
|
||||
|
||||
# Define the DAC variable
|
||||
DISADVANTAGED_COMMUNITIES_FIELD = field_names.SCORE_M_COMMUNITIES
|
||||
DISADVANTAGED_COMMUNITIES_FIELD = field_names.SCORE_N_COMMUNITIES
|
||||
|
||||
|
||||
class PostScoreETL(ExtractTransformLoad):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue