checkpoint

This commit is contained in:
Jorge Escobar 2022-01-07 16:47:42 -05:00
parent 7029a0dff5
commit a995542e70
5 changed files with 61 additions and 37 deletions

View file

@ -13,6 +13,15 @@
"score-run" "score-run"
] ]
}, },
{
"name": "Generate Score Post",
"type": "python",
"request": "launch",
"module": "data_pipeline.application",
"args": [
"generate-score-post"
]
},
{ {
"name": "Data Cleanup", "name": "Data Cleanup",
"type": "python", "type": "python",

View file

@ -129,7 +129,7 @@ class PostScoreETL(ExtractTransformLoad):
new_df = initial_states_df.rename( new_df = initial_states_df.rename(
columns={ columns={
"fips": "State Code", "fips": "State Code",
"state_name": "State Name", "state_name": field_names.STATE_FIELD,
"state_abbreviation": "State Abbreviation", "state_abbreviation": "State Abbreviation",
} }
) )
@ -242,7 +242,19 @@ class PostScoreETL(ExtractTransformLoad):
) -> pd.DataFrame: ) -> pd.DataFrame:
df = score_county_state_merged_df[constants.DOWNLOADABLE_SCORE_COLUMNS] df = score_county_state_merged_df[constants.DOWNLOADABLE_SCORE_COLUMNS]
# rename fields float_columns = df.select_dtypes(include=["float64"])
# score_tiles[constants.TILES_SCORE_FLOAT_COLUMNS] = score_tiles[
# constants.TILES_SCORE_FLOAT_COLUMNS
# ].apply(
# func=lambda series: floor_series(
# series=series,
# number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS,
# ),
# axis=0,
# )
# [x for x in df.columns if field_names.PERCENTILE_FIELD_SUFFIX in x]
return df return df
@ -301,7 +313,7 @@ class PostScoreETL(ExtractTransformLoad):
# Rename score column # Rename score column
downloadable_df_copy = downloadable_df.rename( downloadable_df_copy = downloadable_df.rename(
columns={ columns={
field_names.SCORE_L_COMMUNITIES: "Community of focus (v0.1)" field_names.SCORE_L_COMMUNITIES: "Identified as disadvantaged (v0.1)"
}, },
inplace=False, inplace=False,
) )

View file

@ -23,7 +23,7 @@ class CensusACSETL(ExtractTransformLoad):
self.TOTAL_UNEMPLOYED_FIELD, self.TOTAL_UNEMPLOYED_FIELD,
self.TOTAL_IN_LABOR_FORCE, self.TOTAL_IN_LABOR_FORCE,
] ]
self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)" self.UNEMPLOYED_FIELD_NAME = "Unemployment (percent)"
self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)" self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)"
self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = ( self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = (

View file

@ -74,7 +74,7 @@ class CensusACS2010ETL(ExtractTransformLoad):
self.EMPLOYMENT_COLLEGE_IN_LABOR_FORCE, self.EMPLOYMENT_COLLEGE_IN_LABOR_FORCE,
] ]
self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)" self.UNEMPLOYED_FIELD_NAME = "Unemployment (percent)"
self.POVERTY_FIELDS = [ self.POVERTY_FIELDS = [
"C17002_001E", # Estimate!!Total, "C17002_001E", # Estimate!!Total,

View file

@ -6,7 +6,7 @@ TOP_25_PERCENTILE_SUFFIX = " (top 25th percentile)"
# Geographic field names # Geographic field names
GEOID_TRACT_FIELD = "GEOID10_TRACT" GEOID_TRACT_FIELD = "GEOID10_TRACT"
STATE_FIELD = "State Name" STATE_FIELD = "State/Territory"
COUNTY_FIELD = "County Name" COUNTY_FIELD = "County Name"
# Score file field names # Score file field names
@ -83,15 +83,15 @@ EXPECTED_POPULATION_LOSS_RATE_FIELD = (
) )
# Environment # Environment
DIESEL_FIELD = "Diesel particulate matter" DIESEL_FIELD = "Diesel particulate matter exposure"
PM25_FIELD = "Particulate matter (PM2.5)" PM25_FIELD = "PM2.5 in the air"
OZONE_FIELD = "Ozone" OZONE_FIELD = "Ozone"
TRAFFIC_FIELD = "Traffic proximity and volume" TRAFFIC_FIELD = "Traffic proximity and volume"
LEAD_PAINT_FIELD = "Percent pre-1960s housing (lead paint indicator)" LEAD_PAINT_FIELD = "Percent pre-1960s housing (lead paint indicator)"
WASTEWATER_FIELD = "Wastewater discharge" WASTEWATER_FIELD = "Wastewater discharge"
AGGREGATION_POLLUTION_FIELD = "Pollution Burden" AGGREGATION_POLLUTION_FIELD = "Pollution Burden"
RMP_FIELD = "Proximity to Risk Management Plan (RMP) facilities" RMP_FIELD = "Proximity to Risk Management Plan (RMP) facilities"
TSDF_FIELD = "Proximity to TSDF sites" TSDF_FIELD = "Proximity to hazardous waste sites"
NPL_FIELD = "Proximity to NPL sites" NPL_FIELD = "Proximity to NPL sites"
AIR_TOXICS_CANCER_RISK_FIELD = "Air toxics cancer risk" AIR_TOXICS_CANCER_RISK_FIELD = "Air toxics cancer risk"
RESPIRATORY_HAZARD_FIELD = "Respiratory hazard index" RESPIRATORY_HAZARD_FIELD = "Respiratory hazard index"
@ -125,7 +125,7 @@ LOW_LIFE_EXPECTANCY_FIELD = "Low life expectancy"
# Other Demographics # Other Demographics
TOTAL_POP_FIELD = "Total population" TOTAL_POP_FIELD = "Total population"
UNEMPLOYMENT_FIELD = "Unemployed civilians (percent)" UNEMPLOYMENT_FIELD = "Unemployment (percent)"
LINGUISTIC_ISO_FIELD = "Linguistic isolation (percent)" LINGUISTIC_ISO_FIELD = "Linguistic isolation (percent)"
HOUSEHOLDS_LINGUISTIC_ISO_FIELD = ( HOUSEHOLDS_LINGUISTIC_ISO_FIELD = (
"Percent of households in linguistic isolation" "Percent of households in linguistic isolation"
@ -143,16 +143,14 @@ CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009 = (
"Percentage households below 100% of federal poverty line in 2009" "Percentage households below 100% of federal poverty line in 2009"
) )
CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009 = "Percent individuals age 25 or over with less than high school degree in 2009" CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009 = "Percent individuals age 25 or over with less than high school degree in 2009"
CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 = ( CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 = "Unemployment (percent) in 2009"
"Unemployed civilians (percent) in 2009"
)
CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009 = "Total population in 2009" CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009 = "Total population in 2009"
CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = ( CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = (
"Median household income as a percent of territory median income in 2009" "Median household income as a percent of territory median income in 2009"
) )
LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = "Low median household income as a percent of territory median income in 2009" LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = "Low median household income as a percent of territory median income in 2009"
# Fields from 2010 ACS (loaded for comparison with the territories) # Fields from 2010 ACS (loaded for comparison with the territories)
CENSUS_UNEMPLOYMENT_FIELD_2010 = "Unemployed civilians (percent) in 2010" CENSUS_UNEMPLOYMENT_FIELD_2010 = "Unemployment (percent) in 2010"
CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = ( CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = (
"Percent of individuals less than 100% Federal Poverty Line in 2010" "Percent of individuals less than 100% Federal Poverty Line in 2010"
) )
@ -161,7 +159,9 @@ CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = (
COMBINED_CENSUS_TOTAL_POPULATION_2010 = ( COMBINED_CENSUS_TOTAL_POPULATION_2010 = (
"Total population in 2009 (island areas) and 2019 (states and PR)" "Total population in 2009 (island areas) and 2019 (states and PR)"
) )
COMBINED_UNEMPLOYMENT_2010 = "Unemployed civilians (percent) in 2009 (island areas) and 2010 (states and PR)" COMBINED_UNEMPLOYMENT_2010 = (
"Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)"
)
COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = ( COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = (
"Percentage households below 100% of federal poverty line in 2009 (island areas) " "Percentage households below 100% of federal poverty line in 2009 (island areas) "
"and 2010 (states and PR)" "and 2010 (states and PR)"
@ -265,52 +265,55 @@ TRANSPORTATION_COSTS = "Transportation Costs"
# Climate Change # Climate Change
EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD = ( EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile" f"Greater than or equal to the {PERCENTILE}th percentile"
f" for expected population loss rate and is low income" f" for expected population loss rate and is low income?"
) )
EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD = ( EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile" f"Greater than or equal to the {PERCENTILE}th percentile"
f" for expected agriculture loss rate and is low income" f" for expected agriculture loss rate and is low income?"
) )
EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD = ( EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile" f"Greater than or equal to the {PERCENTILE}th percentile"
f" for expected building loss rate and is low income" f" for expected building loss rate and is low income?"
) )
# Clean energy and efficiency # Clean energy and efficiency
PM25_EXPOSURE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for PM2.5 exposure and is low income" PM25_EXPOSURE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for PM2.5 exposure and is low income?"
ENERGY_BURDEN_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for energy burden and is low income" ENERGY_BURDEN_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for energy burden and is low income?"
# Clean transportation # Clean transportation
DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for diesel particulate matter and is low income" DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD = (
TRAFFIC_PROXIMITY_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for traffic proximity and is low income" f"Greater than or equal to the {PERCENTILE}th percentile for "
"diesel particulate matter and is low income?"
)
TRAFFIC_PROXIMITY_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for traffic proximity and is low income?"
# Affordable and Sustainable Housing # Affordable and Sustainable Housing
LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = ( LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile for lead paint and" f"Greater than or equal to the {PERCENTILE}th percentile for lead paint and"
f" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th " f" the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th "
f"percentile and is low income" f"percentile and is low income?"
) )
HOUSING_BURDEN_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for housing burden and is low income" HOUSING_BURDEN_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for housing burden and is low income?"
# Remediation and Reduction of Legacy Pollution # Remediation and Reduction of Legacy Pollution
RMP_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for proximity to RMP sites and is low income" RMP_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for proximity to RMP sites and is low income?"
SUPERFUND_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for proximity to superfund sites and is low income" SUPERFUND_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for proximity to superfund sites and is low income?"
HAZARDOUS_WASTE_LOW_INCOME_FIELD = ( HAZARDOUS_WASTE_LOW_INCOME_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile" f"Greater than or equal to the {PERCENTILE}th percentile"
f" for proximity to hazardous waste facilities and is low income" f" for proximity to hazardous waste facilities and is low income?"
) )
# Critical Clean Water and Waste Infrastructure # Critical Clean Water and Waste Infrastructure
WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income" WASTEWATER_DISCHARGE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for wastewater discharge and is low income?"
# Health Burdens # Health Burdens
DIABETES_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for diabetes and is low income" DIABETES_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for diabetes and is low income?"
ASTHMA_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for asthma and is low income" ASTHMA_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for asthma and is low income?"
HEART_DISEASE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for heart disease and is low income" HEART_DISEASE_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for heart disease and is low income?"
LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD = ( LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile " f"Greater than or equal to the {PERCENTILE}th percentile "
f"for low life expectancy and is low income" f"for low life expectancy and is low income?"
) )
# Workforce # Workforce
@ -362,17 +365,17 @@ ISLAND_AREAS_LOW_HS_EDUCATION_FIELD = (
EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = ( EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile for summer days above 90F and " f"Greater than or equal to the {PERCENTILE}th percentile for summer days above 90F and "
f"the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th " f"the median house value is less than {MEDIAN_HOUSE_VALUE_PERCENTILE}th "
f"percentile and is low income" f"percentile and is low income?"
) )
IMPENETRABLE_SURFACES_LOW_INCOME_FIELD = ( IMPENETRABLE_SURFACES_LOW_INCOME_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile for impenetrable surfaces and is low " f"Greater than or equal to the {PERCENTILE}th percentile for impenetrable surfaces and is low "
f"income" f"income"
) )
AIR_TOXICS_CANCER_RISK_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for air toxics cancer risk and is low income" AIR_TOXICS_CANCER_RISK_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for air toxics cancer risk and is low income?"
RESPIRATORY_HAZARD_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for respiratory hazard index and is low income" RESPIRATORY_HAZARD_LOW_INCOME_FIELD = f"Greater than or equal to the {PERCENTILE}th percentile for respiratory hazard index and is low income?"
HEALTHY_FOOD_LOW_INCOME_FIELD = ( HEALTHY_FOOD_LOW_INCOME_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile for low " f"Greater than or equal to the {PERCENTILE}th percentile for low "
f"access to healthy food and is low income" f"access to healthy food and is low income?"
) )
LOW_READING_LOW_HS_EDUCATION_FIELD = ( LOW_READING_LOW_HS_EDUCATION_FIELD = (
f"Greater than or equal to the {PERCENTILE}th percentile for low 3rd grade reading proficiency" f"Greater than or equal to the {PERCENTILE}th percentile for low 3rd grade reading proficiency"
@ -381,6 +384,6 @@ LOW_READING_LOW_HS_EDUCATION_FIELD = (
THRESHOLD_COUNT = "Total threshold criteria exceeded" THRESHOLD_COUNT = "Total threshold criteria exceeded"
FPL_200_SERIES = "Is low income" FPL_200_SERIES = "Is low income?"
# End of names for individual factors being exceeded # End of names for individual factors being exceeded
#### ####