mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
Changing LHE in tiles to a boolean (#1767)
also includes merging / clean up of the release
This commit is contained in:
parent
daf188c1f3
commit
bbb5bbc60a
5 changed files with 18 additions and 89 deletions
|
@ -342,8 +342,6 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
# Island areas HS degree attainment rate
|
# Island areas HS degree attainment rate
|
||||||
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009,
|
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009,
|
||||||
field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
|
|
||||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
|
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
|
||||||
field_names.COLLEGE_ATTENDANCE_FIELD,
|
field_names.COLLEGE_ATTENDANCE_FIELD,
|
||||||
|
|
|
@ -274,26 +274,6 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
E.g., "PM2.5 exposure (percentile)".
|
E.g., "PM2.5 exposure (percentile)".
|
||||||
This will be for the entire country.
|
This will be for the entire country.
|
||||||
|
|
||||||
For an "apples-to-apples" comparison of urban tracts to other urban tracts,
|
|
||||||
and compare rural tracts to other rural tracts.
|
|
||||||
|
|
||||||
This percentile will be created and returned as
|
|
||||||
f"{output_column_name_root}{field_names.PERCENTILE_URBAN_RURAL_FIELD_SUFFIX}".
|
|
||||||
E.g., "PM2.5 exposure (percentile urban/rural)".
|
|
||||||
This field exists for every tract, but for urban tracts this value will be the
|
|
||||||
percentile compared to other urban tracts, and for rural tracts this value
|
|
||||||
will be the percentile compared to other rural tracts.
|
|
||||||
|
|
||||||
Specific methdology:
|
|
||||||
1. Decide a methodology for confirming whether a tract counts as urban or
|
|
||||||
rural. Currently in the codebase, we use Geocorr to identify the % rural of
|
|
||||||
a tract, and mark the tract as rural if the percentage is >50% and urban
|
|
||||||
otherwise. This may or may not be the right methodology.
|
|
||||||
2. Once tracts are marked as urban or rural, create one percentile rank
|
|
||||||
that only ranks urban tracts, and one percentile rank that only ranks rural
|
|
||||||
tracts.
|
|
||||||
3. Combine into a single field.
|
|
||||||
|
|
||||||
`output_column_name_root` is different from `input_column_name` to enable the
|
`output_column_name_root` is different from `input_column_name` to enable the
|
||||||
reverse percentile use case. In that use case, `input_column_name` may be
|
reverse percentile use case. In that use case, `input_column_name` may be
|
||||||
something like "3rd grade reading proficiency" and `output_column_name_root`
|
something like "3rd grade reading proficiency" and `output_column_name_root`
|
||||||
|
@ -413,7 +393,6 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.NPL_FIELD,
|
field_names.NPL_FIELD,
|
||||||
field_names.WASTEWATER_FIELD,
|
field_names.WASTEWATER_FIELD,
|
||||||
field_names.LEAD_PAINT_FIELD,
|
field_names.LEAD_PAINT_FIELD,
|
||||||
field_names.UST_FIELD,
|
|
||||||
field_names.UNDER_5_FIELD,
|
field_names.UNDER_5_FIELD,
|
||||||
field_names.OVER_64_FIELD,
|
field_names.OVER_64_FIELD,
|
||||||
field_names.LINGUISTIC_ISO_FIELD,
|
field_names.LINGUISTIC_ISO_FIELD,
|
||||||
|
@ -436,6 +415,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.EXTREME_HEAT_FIELD,
|
field_names.EXTREME_HEAT_FIELD,
|
||||||
field_names.HEALTHY_FOOD_FIELD,
|
field_names.HEALTHY_FOOD_FIELD,
|
||||||
field_names.IMPENETRABLE_SURFACES_FIELD,
|
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||||
|
field_names.UST_FIELD,
|
||||||
# We have to pass this boolean here in order to include it in ag value loss percentiles.
|
# We have to pass this boolean here in order to include it in ag value loss percentiles.
|
||||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||||
|
@ -488,7 +468,13 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
df_copy = df[columns_to_keep].copy()
|
df_copy = df[columns_to_keep].copy()
|
||||||
|
|
||||||
df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric)
|
assert len(numeric_columns) == len(
|
||||||
|
set(numeric_columns)
|
||||||
|
), "You have a double-entered column in the numeric columns list"
|
||||||
|
|
||||||
|
df_copy[numeric_columns] = df_copy[numeric_columns].apply(
|
||||||
|
pd.to_numeric
|
||||||
|
)
|
||||||
|
|
||||||
# Convert all columns to numeric and do math
|
# Convert all columns to numeric and do math
|
||||||
# Note that we have a few special conditions here, that we handle explicitly.
|
# Note that we have a few special conditions here, that we handle explicitly.
|
||||||
|
@ -534,24 +520,6 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
drop_tracts=drop_tracts,
|
drop_tracts=drop_tracts,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Min-max normalization:
|
|
||||||
# (
|
|
||||||
# Observed value
|
|
||||||
# - minimum of all values
|
|
||||||
# )
|
|
||||||
# divided by
|
|
||||||
# (
|
|
||||||
# Maximum of all values
|
|
||||||
# - minimum of all values
|
|
||||||
# )
|
|
||||||
min_value = df_copy[numeric_column].min(skipna=True)
|
|
||||||
|
|
||||||
max_value = df_copy[numeric_column].max(skipna=True)
|
|
||||||
|
|
||||||
df_copy[f"{numeric_column}{field_names.MIN_MAX_FIELD_SUFFIX}"] = (
|
|
||||||
df_copy[numeric_column] - min_value
|
|
||||||
) / (max_value - min_value)
|
|
||||||
|
|
||||||
# Create reversed percentiles for these fields
|
# Create reversed percentiles for these fields
|
||||||
for reverse_percentile in reverse_percentiles:
|
for reverse_percentile in reverse_percentiles:
|
||||||
# Calculate reverse percentiles
|
# Calculate reverse percentiles
|
||||||
|
|
|
@ -432,7 +432,6 @@ HAZARDOUS_WASTE_LOW_INCOME_FIELD = (
|
||||||
WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income?"
|
WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income?"
|
||||||
UST_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underground storage tanks and is low income?"
|
UST_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for leaky underground storage tanks and is low income?"
|
||||||
|
|
||||||
|
|
||||||
# Health Burdens
|
# Health Burdens
|
||||||
DIABETES_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for diabetes and is low income?"
|
DIABETES_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for diabetes and is low income?"
|
||||||
ASTHMA_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for asthma and is low income?"
|
ASTHMA_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for asthma and is low income?"
|
||||||
|
|
|
@ -435,6 +435,11 @@ class ScoreNarwhal(Score):
|
||||||
# poverty level and has a low percent of higher ed students
|
# poverty level and has a low percent of higher ed students
|
||||||
# Source: Census's American Community Survey
|
# Source: Census's American Community Survey
|
||||||
|
|
||||||
|
eligibility_columns = [
|
||||||
|
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
|
||||||
|
field_names.UST_LOW_INCOME_FIELD,
|
||||||
|
]
|
||||||
|
|
||||||
self.df[field_names.WASTEWATER_PCTILE_THRESHOLD] = (
|
self.df[field_names.WASTEWATER_PCTILE_THRESHOLD] = (
|
||||||
self.df[
|
self.df[
|
||||||
field_names.WASTEWATER_FIELD
|
field_names.WASTEWATER_FIELD
|
||||||
|
@ -457,28 +462,17 @@ class ScoreNarwhal(Score):
|
||||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||||
)
|
)
|
||||||
|
|
||||||
self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[
|
|
||||||
[
|
|
||||||
field_names.WASTEWATER_PCTILE_THRESHOLD,
|
|
||||||
field_names.UST_PCTILE_THRESHOLD,
|
|
||||||
]
|
|
||||||
].max(axis=1)
|
|
||||||
|
|
||||||
self._increment_total_eligibility_exceeded(
|
self._increment_total_eligibility_exceeded(
|
||||||
[
|
eligibility_columns,
|
||||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
|
|
||||||
field_names.UST_LOW_INCOME_FIELD,
|
|
||||||
],
|
|
||||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||||
)
|
)
|
||||||
|
|
||||||
return self.df[
|
self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[
|
||||||
[
|
eligibility_columns
|
||||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
|
|
||||||
field_names.UST_LOW_INCOME_FIELD,
|
|
||||||
]
|
|
||||||
].any(axis=1)
|
].any(axis=1)
|
||||||
|
|
||||||
|
return self.df[field_names.WATER_THRESHOLD_EXCEEDED]
|
||||||
|
|
||||||
def _health_factor(self) -> bool:
|
def _health_factor(self) -> bool:
|
||||||
# In Xth percentile or above for diabetes (Source: CDC Places)
|
# In Xth percentile or above for diabetes (Source: CDC Places)
|
||||||
# or
|
# or
|
||||||
|
|
|
@ -28,7 +28,6 @@ class ScoreRunner:
|
||||||
self.df = ScoreA(df=self.df).add_columns()
|
self.df = ScoreA(df=self.df).add_columns()
|
||||||
self.df = ScoreB(df=self.df).add_columns()
|
self.df = ScoreB(df=self.df).add_columns()
|
||||||
self.df = ScoreC(df=self.df).add_columns()
|
self.df = ScoreC(df=self.df).add_columns()
|
||||||
self.df = ScoreD(df=self.df).add_columns()
|
|
||||||
self.df = ScoreF(df=self.df).add_columns()
|
self.df = ScoreF(df=self.df).add_columns()
|
||||||
self.df = ScoreG(df=self.df).add_columns()
|
self.df = ScoreG(df=self.df).add_columns()
|
||||||
self.df = ScoreH(df=self.df).add_columns()
|
self.df = ScoreH(df=self.df).add_columns()
|
||||||
|
@ -38,33 +37,4 @@ class ScoreRunner:
|
||||||
self.df = ScoreM(df=self.df).add_columns()
|
self.df = ScoreM(df=self.df).add_columns()
|
||||||
self.df = ScoreNarwhal(df=self.df).add_columns()
|
self.df = ScoreNarwhal(df=self.df).add_columns()
|
||||||
|
|
||||||
# TODO do this with each score instead of in a bundle
|
|
||||||
# Create percentiles for these index scores
|
|
||||||
self.df = self._add_score_percentiles()
|
|
||||||
|
|
||||||
return self.df
|
|
||||||
|
|
||||||
def _add_score_percentiles(self) -> pd.DataFrame:
|
|
||||||
logger.info("Adding Score Percentiles")
|
|
||||||
for score_field in [
|
|
||||||
field_names.SCORE_A,
|
|
||||||
field_names.SCORE_B,
|
|
||||||
field_names.SCORE_C,
|
|
||||||
field_names.SCORE_D,
|
|
||||||
field_names.SCORE_E,
|
|
||||||
]:
|
|
||||||
self.df[
|
|
||||||
f"{score_field}{field_names.PERCENTILE_FIELD_SUFFIX}"
|
|
||||||
] = self.df[score_field].rank(pct=True)
|
|
||||||
|
|
||||||
for threshold in [0.25, 0.3, 0.35, 0.4]:
|
|
||||||
fraction_converted_to_percent = int(100 * threshold)
|
|
||||||
self.df[
|
|
||||||
f"{score_field} (top {fraction_converted_to_percent}th percentile)"
|
|
||||||
] = (
|
|
||||||
self.df[
|
|
||||||
f"{score_field}{field_names.PERCENTILE_FIELD_SUFFIX}"
|
|
||||||
]
|
|
||||||
>= 1 - threshold
|
|
||||||
)
|
|
||||||
return self.df
|
return self.df
|
||||||
|
|
Loading…
Add table
Reference in a new issue