mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 07:11:16 -07:00
Issue 970: reverse percentiles for AMI and life expectancy (#1018)
* switching to low * fixing score-etl-post * updating comments * fixing comparison * create separate field for clarity * comment fix * removing healthy food * fixing bug in score post * running black and adding comment * Update pickles and add a helpful notes to README Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
This commit is contained in:
parent
24bac56d9e
commit
7fcecaee42
11 changed files with 144 additions and 100 deletions
|
@ -81,7 +81,7 @@ TILES_SCORE_COLUMNS = [
|
|||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
@ -89,7 +89,7 @@ TILES_SCORE_COLUMNS = [
|
|||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
@ -115,7 +115,7 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
@ -123,7 +123,7 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
@ -137,7 +137,6 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
|
|||
field_names.DIABETES_FIELD,
|
||||
field_names.ASTHMA_FIELD,
|
||||
field_names.HEART_DISEASE_FIELD,
|
||||
field_names.LIFE_EXPECTANCY_FIELD,
|
||||
field_names.TRAFFIC_FIELD,
|
||||
field_names.FEMA_RISK_FIELD,
|
||||
field_names.ENERGY_BURDEN_FIELD,
|
||||
|
@ -149,11 +148,11 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
|
|||
field_names.TOTAL_POP_FIELD,
|
||||
]
|
||||
|
||||
# For every indicator above, we want to include percentile and min-max normalized variants also
|
||||
# For every indicator above, we want to include percentile also.
|
||||
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
|
||||
pd.core.common.flatten(
|
||||
[
|
||||
[p, f"{p} (percentile)"]
|
||||
[p, f"{p}{field_names.PERCENTILE_FIELD_SUFFIX}"]
|
||||
for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
|
||||
]
|
||||
)
|
||||
|
@ -165,8 +164,15 @@ DOWNLOADABLE_SCORE_COLUMNS = [
|
|||
field_names.COUNTY_FIELD,
|
||||
field_names.STATE_FIELD,
|
||||
field_names.SCORE_G_COMMUNITIES,
|
||||
# Note: the reverse percentile fields get moved down here because
|
||||
# we put the raw value in the download along with the *reversed* percentile.
|
||||
# All other fields we put in f"{field_name}" and
|
||||
# f"{field_name}{field_names.PERCENTILE_FIELD_SUFFIX}", which doesn't work for the
|
||||
# reversed percentile fields.
|
||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LIFE_EXPECTANCY_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
*DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
|
||||
]
|
||||
|
|
|
@ -404,9 +404,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.POVERTY_LESS_THAN_150_FPL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
||||
field_names.AMI_FIELD,
|
||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||
field_names.MEDIAN_INCOME_FIELD,
|
||||
field_names.LIFE_EXPECTANCY_FIELD,
|
||||
field_names.ENERGY_BURDEN_FIELD,
|
||||
field_names.FEMA_RISK_FIELD,
|
||||
field_names.URBAN_HEURISTIC_FIELD,
|
||||
|
@ -439,7 +437,6 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||
field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009,
|
||||
field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
||||
field_names.EXTREME_HEAT_FIELD,
|
||||
field_names.HEALTHY_FOOD_FIELD,
|
||||
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||
|
@ -468,7 +465,19 @@ class ScoreETL(ExtractTransformLoad):
|
|||
ReversePercentile(
|
||||
field_name=field_names.READING_FIELD,
|
||||
low_field_name=field_names.LOW_READING_FIELD,
|
||||
)
|
||||
),
|
||||
ReversePercentile(
|
||||
field_name=field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||
low_field_name=field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||
),
|
||||
ReversePercentile(
|
||||
field_name=field_names.LIFE_EXPECTANCY_FIELD,
|
||||
low_field_name=field_names.LOW_LIFE_EXPECTANCY_FIELD,
|
||||
),
|
||||
ReversePercentile(
|
||||
field_name=field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
||||
low_field_name=field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009,
|
||||
),
|
||||
]
|
||||
|
||||
columns_to_keep = (
|
||||
|
@ -505,10 +514,6 @@ class ScoreETL(ExtractTransformLoad):
|
|||
|
||||
max_value = df_copy[numeric_column].max(skipna=True)
|
||||
|
||||
logger.info(
|
||||
f"For data set {numeric_column}, the min value is {min_value} and the max value is {max_value}."
|
||||
)
|
||||
|
||||
df_copy[f"{numeric_column}{field_names.MIN_MAX_FIELD_SUFFIX}"] = (
|
||||
df_copy[numeric_column] - min_value
|
||||
) / (max_value - min_value)
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue