Revised Columns on Download File + PDF (#701)

* Revised Columns on Download File + PDF

* finishing ticket
This commit is contained in:
Jorge Escobar 2021-09-17 13:11:23 -04:00 committed by GitHub
parent b6789c4d0d
commit cd33f323c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 3524 additions and 15 deletions

View file

@ -6,6 +6,7 @@ from data_pipeline.config import settings
# Base Paths
DATA_PATH = Path(settings.APP_ROOT) / "data"
TMP_PATH = DATA_PATH / "tmp"
FILES_PATH = Path(settings.APP_ROOT) / "files"
# Remote Paths
CENSUS_COUNTIES_ZIP_URL = "https://www2.census.gov/geo/docs/maps-data/data/gazetteer/Gaz_counties_national.zip"
@ -42,6 +43,7 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv"
SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx"
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / "Draft_Communities_List.pdf"
SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip"
)
@ -77,7 +79,6 @@ TILES_SCORE_COLUMNS = [
"Particulate matter (PM2.5) (percentile)",
"Median household income (% of AMI) (percentile)",
"Percent of individuals < 200% Federal Poverty Line (percentile)",
"Percent individuals age 25 or over with less than high school degree (percentile)",
]
# columns to round floats to 2 decimals
@ -113,18 +114,21 @@ TILES_SCORE_FLOAT_COLUMNS = [
TILES_ROUND_NUM_DECIMALS = 2
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
"Area Median Income (State or metropolitan)",
"Percent of individuals < 100% Federal Poverty Line",
"Percent individuals age 25 or over with less than high school degree",
"Linguistic isolation (percent)",
"Poverty (Less than 200% of federal poverty line)",
"Unemployed civilians (percent)",
"Housing burden (percent)",
"Respiratory hazard index",
"Diesel particulate matter",
"Particulate matter (PM2.5)",
"Diagnosed diabetes among adults aged >=18 years",
"Current asthma among adults aged >=18 years",
"Coronary heart disease among adults aged >=18 years",
"Life expectancy (years)",
"Traffic proximity and volume",
"Proximity to RMP sites",
"FEMA Risk Index Expected Annual Loss Score",
"Energy burden",
"Housing burden (percent)",
"Wastewater discharge",
"Percent pre-1960s housing (lead paint indicator)",
"Diesel particulate matter",
"Particulate matter (PM2.5)",
"Total population",
]
@ -132,7 +136,7 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
pd.core.common.flatten(
[
[p, f"{p} (percentile)", f"{p} (min-max normalized)"]
[p, f"{p} (percentile)"]
for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
]
)
@ -143,7 +147,8 @@ DOWNLOADABLE_SCORE_COLUMNS = [
"GEOID10",
"County Name",
"State Name",
"Score D (percentile)",
"Score D (top 25th percentile)",
"Score G (communities)",
"Median household income (% of AMI)",
"Median household income (% of state median household income) (percentile)",
*DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
]

View file

@ -230,14 +230,18 @@ class PostScoreETL(ExtractTransformLoad):
) -> None:
logger.info("Saving Full Score CSV with County Information")
score_csv_path.parent.mkdir(parents=True, exist_ok=True)
score_county_state_merged.to_csv(score_csv_path, index=False)
score_county_state_merged.to_csv(
score_csv_path,
index=False,
encoding="utf-8-sig", # windows compat https://stackoverflow.com/a/43684587
)
def _load_tile_csv(
self, score_tiles_df: pd.DataFrame, tile_score_path: Path
) -> None:
logger.info("Saving Tile Score CSV")
tile_score_path.parent.mkdir(parents=True, exist_ok=True)
score_tiles_df.to_csv(tile_score_path, index=False)
score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")
def _load_downloadable_zip(
self, downloadable_df: pd.DataFrame, downloadable_info_path: Path
@ -248,6 +252,13 @@ class PostScoreETL(ExtractTransformLoad):
csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
zip_path = constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH
pdf_path = constants.SCORE_DOWNLOADABLE_PDF_FILE_PATH
# Rename score column
downloadable_df.rename(
columns={"Score G (communities)": "Community of focus (v0.1)"},
inplace=True,
)
logger.info("Writing downloadable csv")
downloadable_df.to_csv(csv_path, index=False)
@ -256,7 +267,7 @@ class PostScoreETL(ExtractTransformLoad):
downloadable_df.to_excel(excel_path, index=False)
logger.info("Compressing files")
files_to_compress = [csv_path, excel_path]
files_to_compress = [csv_path, excel_path, pdf_path]
with zipfile.ZipFile(zip_path, "w") as zf:
for f in files_to_compress:
zf.write(f, arcname=Path(f).name, compress_type=compression)

File diff suppressed because one or more lines are too long