Issue 1141: Definition M (#1151)

This commit is contained in:
Lucas Merrill Brown 2022-01-18 14:56:55 -05:00 committed by GitHub
commit 18f299c5f8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
21 changed files with 1000 additions and 143 deletions

View file

@ -90,8 +90,8 @@ DATASET_LIST = [
"class_name": "HudRecapETL",
},
{
"name": "epa_rsei_aggregate",
"module_dir": "epa_rsei_aggregate",
"name": "epa_rsei",
"module_dir": "epa_rsei",
"class_name": "EPARiskScreeningEnvironmentalIndicatorsETL",
},
{

View file

@ -120,16 +120,16 @@ TILES_SCORE_COLUMNS = {
+ field_names.PERCENTILE_FIELD_SUFFIX: "UF_PFS",
field_names.WASTEWATER_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
field_names.L_WATER: "L_WTR",
field_names.L_WORKFORCE: "L_WKFC",
field_names.L_CLIMATE: "L_CLT",
field_names.L_ENERGY: "L_ENY",
field_names.L_TRANSPORTATION: "L_TRN",
field_names.L_HOUSING: "L_HSG",
field_names.L_POLLUTION: "L_PLN",
field_names.L_HEALTH: "L_HLTH",
field_names.SCORE_L_COMMUNITIES: "SL_C",
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX: "SL_PFS",
field_names.M_WATER: "M_WTR",
field_names.M_WORKFORCE: "M_WKFC",
field_names.M_CLIMATE: "M_CLT",
field_names.M_ENERGY: "M_ENY",
field_names.M_TRANSPORTATION: "M_TRN",
field_names.M_HOUSING: "M_HSG",
field_names.M_POLLUTION: "M_PLN",
field_names.M_HEALTH: "M_HLTH",
field_names.SCORE_M_COMMUNITIES: "SM_C",
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
@ -151,8 +151,8 @@ TILES_SCORE_COLUMNS = {
field_names.POVERTY_LOW_HS_EDUCATION_FIELD: "PLHSE",
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "LMILHSE",
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "ULHSE",
field_names.LOW_HS_EDUCATION_FIELD: "LHE",
field_names.FPL_200_SERIES: "FPL200S",
field_names.LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD: "LHE",
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES: "FPL200S",
field_names.THRESHOLD_COUNT: "TC",
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "ISPLHSE",
@ -191,10 +191,10 @@ TILES_SCORE_FLOAT_COLUMNS = [
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.LOW_HS_EDUCATION_FIELD,
field_names.LOW_HS_EDUCATION_LOW_COLLEGE_ATTENDANCE_FIELD,
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX,
]
# Finally we augment with the GEOID10, county, and state
@ -203,9 +203,9 @@ DOWNLOADABLE_SCORE_COLUMNS = [
field_names.COUNTY_FIELD,
field_names.STATE_FIELD,
field_names.THRESHOLD_COUNT,
field_names.SCORE_L_COMMUNITIES,
field_names.SCORE_M_COMMUNITIES,
field_names.TOTAL_POP_FIELD,
field_names.FPL_200_SERIES,
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,

View file

@ -33,12 +33,12 @@ class GeoScoreETL(ExtractTransformLoad):
self.DATA_PATH / "census" / "geojson" / "us.json"
)
# Import the shortened name for Score L percentile ("SL_PFS") that's used on the
# Import the shortened name for Score M percentile ("SM_PFS") that's used on the
# tiles.
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX
]
self.TARGET_SCORE_RENAME_TO = "L_SCORE"
self.TARGET_SCORE_RENAME_TO = "M_SCORE"
# Import the shortened name for tract ("GTF") that's used on the tiles.
self.TRACT_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[

View file

@ -323,7 +323,7 @@ class PostScoreETL(ExtractTransformLoad):
# Rename score column
downloadable_df_copy = downloadable_df.rename(
columns={
field_names.SCORE_L_COMMUNITIES: "Identified as disadvantaged (v0.1)"
field_names.SCORE_M_COMMUNITIES: "Identified as disadvantaged (v0.1)"
},
inplace=False,
)

File diff suppressed because one or more lines are too long

View file

@ -42,7 +42,7 @@ class CDCSVIIndex(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Extracting 43 MB CDC SVI INDEX")
logger.info("Downloading 43 MB CDC SVI INDEX")
self.df = pd.read_csv(
filepath_or_buffer=self.CDC_SVI_INDEX_URL,
dtype={self.CDC_SVI_INDEX_TRACTS_FIPS_CODE: "string"},

View file

@ -22,9 +22,7 @@ class EPARiskScreeningEnvironmentalIndicatorsETL(ExtractTransformLoad):
def __init__(self):
self.AGGREGATED_RSEI_SCORE_FILE_URL = "http://abt-rsei.s3.amazonaws.com/microdata2019/census_agg/CensusMicroTracts2019_2019_aggregated.zip"
self.OUTPUT_PATH: Path = (
self.DATA_PATH / "dataset" / "epa_rsei_aggregated"
)
self.OUTPUT_PATH: Path = self.DATA_PATH / "dataset" / "epa_rsei"
self.EPA_RSEI_SCORE_THRESHOLD_CUTOFF = 0.75
self.TRACT_INPUT_COLUMN_NAME = "GEOID10"
self.NUMBER_FACILITIES_INPUT_FIELD = "NUMFACS"
@ -74,12 +72,12 @@ class EPARiskScreeningEnvironmentalIndicatorsETL(ExtractTransformLoad):
unzip_file_from_url(
file_url=self.AGGREGATED_RSEI_SCORE_FILE_URL,
download_path=self.TMP_PATH,
unzipped_file_path=self.TMP_PATH / "epa_rsei_aggregated",
unzipped_file_path=self.TMP_PATH / "epa_rsei",
)
self.df = pd.read_csv(
filepath_or_buffer=self.TMP_PATH
/ "epa_rsei_aggregated"
/ "epa_rsei"
/ "CensusMicroTracts2019_2019_aggregated.csv",
# The following need to remain as strings for all of their digits, not get
# converted to numbers.

View file

@ -33,7 +33,7 @@ class MarylandEJScreenETL(ExtractTransformLoad):
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Downloading Maryland EJSCREEN Data")
logger.info("Downloading 207MB Maryland EJSCREEN Data")
super().extract(
self.MARYLAND_EJSCREEN_URL,
self.TMP_PATH,