Revised Columns on Download File + PDF (#701)

* Revised Columns on Download File + PDF

* finishing ticket
This commit is contained in:
Jorge Escobar 2021-09-17 13:11:23 -04:00 committed by GitHub
parent b6789c4d0d
commit cd33f323c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 3524 additions and 15 deletions

View file

@ -6,6 +6,7 @@ from data_pipeline.config import settings
# Base Paths # Base Paths
DATA_PATH = Path(settings.APP_ROOT) / "data" DATA_PATH = Path(settings.APP_ROOT) / "data"
TMP_PATH = DATA_PATH / "tmp" TMP_PATH = DATA_PATH / "tmp"
FILES_PATH = Path(settings.APP_ROOT) / "files"
# Remote Paths # Remote Paths
CENSUS_COUNTIES_ZIP_URL = "https://www2.census.gov/geo/docs/maps-data/data/gazetteer/Gaz_counties_national.zip" CENSUS_COUNTIES_ZIP_URL = "https://www2.census.gov/geo/docs/maps-data/data/gazetteer/Gaz_counties_national.zip"
@ -42,6 +43,7 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable" SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable"
SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv" SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv"
SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx" SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx"
SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / "Draft_Communities_List.pdf"
SCORE_DOWNLOADABLE_ZIP_FILE_PATH = ( SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip" SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip"
) )
@ -77,7 +79,6 @@ TILES_SCORE_COLUMNS = [
"Particulate matter (PM2.5) (percentile)", "Particulate matter (PM2.5) (percentile)",
"Median household income (% of AMI) (percentile)", "Median household income (% of AMI) (percentile)",
"Percent of individuals < 200% Federal Poverty Line (percentile)", "Percent of individuals < 200% Federal Poverty Line (percentile)",
"Percent individuals age 25 or over with less than high school degree (percentile)",
] ]
# columns to round floats to 2 decimals # columns to round floats to 2 decimals
@ -113,18 +114,21 @@ TILES_SCORE_FLOAT_COLUMNS = [
TILES_ROUND_NUM_DECIMALS = 2 TILES_ROUND_NUM_DECIMALS = 2
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
"Area Median Income (State or metropolitan)",
"Percent of individuals < 100% Federal Poverty Line",
"Percent individuals age 25 or over with less than high school degree", "Percent individuals age 25 or over with less than high school degree",
"Linguistic isolation (percent)", "Diagnosed diabetes among adults aged >=18 years",
"Poverty (Less than 200% of federal poverty line)", "Current asthma among adults aged >=18 years",
"Unemployed civilians (percent)", "Coronary heart disease among adults aged >=18 years",
"Housing burden (percent)", "Life expectancy (years)",
"Respiratory hazard index",
"Diesel particulate matter",
"Particulate matter (PM2.5)",
"Traffic proximity and volume", "Traffic proximity and volume",
"Proximity to RMP sites", "FEMA Risk Index Expected Annual Loss Score",
"Energy burden",
"Housing burden (percent)",
"Wastewater discharge", "Wastewater discharge",
"Percent pre-1960s housing (lead paint indicator)", "Percent pre-1960s housing (lead paint indicator)",
"Diesel particulate matter",
"Particulate matter (PM2.5)",
"Total population", "Total population",
] ]
@ -132,7 +136,7 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list( DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
pd.core.common.flatten( pd.core.common.flatten(
[ [
[p, f"{p} (percentile)", f"{p} (min-max normalized)"] [p, f"{p} (percentile)"]
for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
] ]
) )
@ -143,7 +147,8 @@ DOWNLOADABLE_SCORE_COLUMNS = [
"GEOID10", "GEOID10",
"County Name", "County Name",
"State Name", "State Name",
"Score D (percentile)", "Score G (communities)",
"Score D (top 25th percentile)", "Median household income (% of AMI)",
"Median household income (% of state median household income) (percentile)",
*DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL, *DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
] ]

View file

@ -230,14 +230,18 @@ class PostScoreETL(ExtractTransformLoad):
) -> None: ) -> None:
logger.info("Saving Full Score CSV with County Information") logger.info("Saving Full Score CSV with County Information")
score_csv_path.parent.mkdir(parents=True, exist_ok=True) score_csv_path.parent.mkdir(parents=True, exist_ok=True)
score_county_state_merged.to_csv(score_csv_path, index=False) score_county_state_merged.to_csv(
score_csv_path,
index=False,
encoding="utf-8-sig", # windows compat https://stackoverflow.com/a/43684587
)
def _load_tile_csv( def _load_tile_csv(
self, score_tiles_df: pd.DataFrame, tile_score_path: Path self, score_tiles_df: pd.DataFrame, tile_score_path: Path
) -> None: ) -> None:
logger.info("Saving Tile Score CSV") logger.info("Saving Tile Score CSV")
tile_score_path.parent.mkdir(parents=True, exist_ok=True) tile_score_path.parent.mkdir(parents=True, exist_ok=True)
score_tiles_df.to_csv(tile_score_path, index=False) score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")
def _load_downloadable_zip( def _load_downloadable_zip(
self, downloadable_df: pd.DataFrame, downloadable_info_path: Path self, downloadable_df: pd.DataFrame, downloadable_info_path: Path
@ -248,6 +252,13 @@ class PostScoreETL(ExtractTransformLoad):
csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
zip_path = constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH zip_path = constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH
pdf_path = constants.SCORE_DOWNLOADABLE_PDF_FILE_PATH
# Rename score column
downloadable_df.rename(
columns={"Score G (communities)": "Community of focus (v0.1)"},
inplace=True,
)
logger.info("Writing downloadable csv") logger.info("Writing downloadable csv")
downloadable_df.to_csv(csv_path, index=False) downloadable_df.to_csv(csv_path, index=False)
@ -256,7 +267,7 @@ class PostScoreETL(ExtractTransformLoad):
downloadable_df.to_excel(excel_path, index=False) downloadable_df.to_excel(excel_path, index=False)
logger.info("Compressing files") logger.info("Compressing files")
files_to_compress = [csv_path, excel_path] files_to_compress = [csv_path, excel_path, pdf_path]
with zipfile.ZipFile(zip_path, "w") as zf: with zipfile.ZipFile(zip_path, "w") as zf:
for f in files_to_compress: for f in files_to_compress:
zf.write(f, arcname=Path(f).name, compress_type=compression) zf.write(f, arcname=Path(f).name, compress_type=compression)

File diff suppressed because one or more lines are too long