undo score module

This commit is contained in:
Saran Ahluwalia 2022-01-12 18:46:07 -05:00
parent 159516df80
commit 321dadf6d5
2 changed files with 79 additions and 120 deletions

View file

@ -63,13 +63,6 @@ SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
# Column subsets
CENSUS_COUNTIES_COLUMNS = ["USPS", "GEOID", "NAME"]
# Percent prefixes for rounding
PERCENT_PREFIXES_SUFFIXES = [
"Percent",
"Percentage",
field_names.PERCENTILE_FIELD_SUFFIX,
]
TILES_ROUND_NUM_DECIMALS = 2
# Tiles data: full field name, tile index name
TILES_SCORE_COLUMNS = {
@ -198,88 +191,91 @@ DOWNLOADABLE_SCORE_COLUMNS = [
field_names.GEOID_TRACT_FIELD,
field_names.COUNTY_FIELD,
field_names.STATE_FIELD,
field_names.THRESHOLD_COUNT,
field_names.SCORE_L_COMMUNITIES,
field_names.TOTAL_POP_FIELD,
field_names.FPL_200_SERIES,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.ENERGY_BURDEN_FIELD,
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.PM25_FIELD,
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.DIESEL_FIELD,
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.TRAFFIC_FIELD,
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.HOUSING_BURDEN_FIELD,
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LEAD_PAINT_FIELD,
field_names.MEDIAN_HOUSE_VALUE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.MEDIAN_HOUSE_VALUE_FIELD,
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.TSDF_FIELD,
field_names.SUPERFUND_LOW_INCOME_FIELD,
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.NPL_FIELD,
field_names.RMP_LOW_INCOME_FIELD,
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.RMP_FIELD,
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.WASTEWATER_FIELD,
field_names.ASTHMA_LOW_INCOME_FIELD,
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.ASTHMA_FIELD,
field_names.DIABETES_LOW_INCOME_FIELD,
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.DIABETES_FIELD,
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.HEART_DISEASE_FIELD,
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LIFE_EXPECTANCY_FIELD,
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LINGUISTIC_ISO_FIELD,
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.UNEMPLOYMENT_FIELD,
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
field_names.ENERGY_BURDEN_FIELD,
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
field_names.PM25_FIELD,
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
field_names.DIESEL_FIELD,
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
field_names.TRAFFIC_FIELD,
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
field_names.HOUSING_BURDEN_FIELD,
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
field_names.LEAD_PAINT_FIELD,
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
field_names.MEDIAN_HOUSE_VALUE_FIELD,
field_names.MEDIAN_HOUSE_VALUE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.TSDF_FIELD,
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
field_names.NPL_FIELD,
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.SUPERFUND_LOW_INCOME_FIELD,
field_names.RMP_FIELD,
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.RMP_LOW_INCOME_FIELD,
field_names.WASTEWATER_FIELD,
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
field_names.ASTHMA_FIELD,
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.ASTHMA_LOW_INCOME_FIELD,
field_names.DIABETES_FIELD,
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.DIABETES_LOW_INCOME_FIELD,
field_names.HEART_DISEASE_FIELD,
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
field_names.LIFE_EXPECTANCY_FIELD,
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
field_names.LINGUISTIC_ISO_FIELD,
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
field_names.UNEMPLOYMENT_FIELD,
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
field_names.HIGH_SCHOOL_ED_FIELD,
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LOW_HS_EDUCATION_FIELD,
field_names.THRESHOLD_COUNT,
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
field_names.COMBINED_UNEMPLOYMENT_2010,
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
]

View file

@ -129,7 +129,7 @@ class PostScoreETL(ExtractTransformLoad):
new_df = initial_states_df.rename(
columns={
"fips": "State Code",
"state_name": field_names.STATE_FIELD,
"state_name": "State Name",
"state_abbreviation": "State Abbreviation",
}
)
@ -206,9 +206,7 @@ class PostScoreETL(ExtractTransformLoad):
tiles_score_column_titles = list(constants.TILES_SCORE_COLUMNS.keys())
# filter the columns on full score
score_tiles = score_county_state_merged_df[
tiles_score_column_titles
].copy()
score_tiles = score_county_state_merged_df[tiles_score_column_titles]
score_tiles[constants.TILES_SCORE_FLOAT_COLUMNS] = score_tiles[
constants.TILES_SCORE_FLOAT_COLUMNS
@ -240,44 +238,9 @@ class PostScoreETL(ExtractTransformLoad):
def _create_downloadable_data(
self, score_county_state_merged_df: pd.DataFrame
) -> pd.DataFrame:
df = score_county_state_merged_df[
return score_county_state_merged_df[
constants.DOWNLOADABLE_SCORE_COLUMNS
].copy()
float_columns = df.select_dtypes(include=["float64"]).columns
# convert percentile_columns
percent_target_columns = []
for x in float_columns:
for col in constants.PERCENT_PREFIXES_SUFFIXES:
if col in x:
percent_target_columns.append(x)
df[percent_target_columns] = df[percent_target_columns].apply(
func=lambda series: floor_series(
series=series * 100,
number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS,
)
)
# # convert percentile_columns
# non_percentile_float_columns = [
# x
# for x in float_columns
# if x not in constants.PERCENT_PREFIXES_SUFFIXES
# ]
# df[non_percentile_float_columns] = df[
# non_percentile_float_columns
# ].apply(
# func=lambda series: floor_series(
# series=series,
# number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS,
# ),
# axis=0,
# )
return df
]
def transform(self) -> None:
logger.info("Transforming data sources for Score + County CSVs")
@ -334,7 +297,7 @@ class PostScoreETL(ExtractTransformLoad):
# Rename score column
downloadable_df_copy = downloadable_df.rename(
columns={
field_names.SCORE_L_COMMUNITIES: "Identified as disadvantaged (v0.1)"
field_names.SCORE_L_COMMUNITIES: "Community of focus (v0.1)"
},
inplace=False,
)