mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-03 10:34:18 -07:00
Update Side Panel Tile Data (#866)
* Update Side Panel Tile Data * Update Side Panel Tile Data * Correct indicator names to match csv * Replace Score with Rate * Comment out FEMA Loss Rate to troubleshoot * Removes all "FEMA Loss Rate" array elements * Revert FEMA to Score * Remove expected loss rate * Remove RMP and NPL from BASIC array * Attempt to make shape mismatch align - update README typo * Add Score L indicators to TILE_SCORE_FLOAT_COLUMNS * removing cbg references * completes the ticket * Update side panel fields * Update index file writing to create parent dir * Updates from linting * fixing missing field_names for island territories 90th percentile fields * Update downloadable fields and fix field name * Update file fields and tests * Update ordering of fields and leave TODO * Update pickle after re-ordering of file * fixing bugs in etl_score_geo * Repeating index for diesel fix * passing tests * adding pytest.ini Co-authored-by: Vim USDS <vimal.k.shah@omb.eop.gov> Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov> Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
parent
83eb7b0982
commit
9709d08ca3
13 changed files with 328 additions and 141 deletions
|
@ -1,7 +1,6 @@
|
|||
from pathlib import Path
|
||||
import datetime
|
||||
|
||||
import pandas as pd
|
||||
from data_pipeline.config import settings
|
||||
|
||||
from data_pipeline.score import field_names
|
||||
|
@ -38,6 +37,9 @@ FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH = (
|
|||
# Score Tile CSV source path
|
||||
DATA_SCORE_CSV_TILES_PATH = DATA_SCORE_CSV_DIR / "tiles"
|
||||
DATA_SCORE_CSV_TILES_FILE_PATH = DATA_SCORE_CSV_TILES_PATH / "usa.csv"
|
||||
DATA_SCORE_JSON_INDEX_FILE_PATH = (
|
||||
DATA_SCORE_CSV_TILES_PATH / "tile_indexes.json"
|
||||
)
|
||||
|
||||
## Tile path
|
||||
DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
|
||||
|
@ -60,119 +62,214 @@ SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
|
|||
|
||||
# Column subsets
|
||||
CENSUS_COUNTIES_COLUMNS = ["USPS", "GEOID", "NAME"]
|
||||
TILES_SCORE_COLUMNS = [
|
||||
field_names.GEOID_TRACT_FIELD,
|
||||
field_names.STATE_FIELD,
|
||||
field_names.COUNTY_FIELD,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
field_names.SCORE_D + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_D + field_names.TOP_25_PERCENTILE_SUFFIX,
|
||||
field_names.SCORE_E + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_E + field_names.TOP_25_PERCENTILE_SUFFIX,
|
||||
field_names.SCORE_G_COMMUNITIES,
|
||||
field_names.SCORE_G,
|
||||
field_names.SCORE_L_COMMUNITIES,
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
||||
TILES_ROUND_NUM_DECIMALS = 2
|
||||
# Tiles data: full field name, tile index name
|
||||
TILES_SCORE_COLUMNS = {
|
||||
field_names.GEOID_TRACT_FIELD: "GTF",
|
||||
field_names.STATE_FIELD: "SF",
|
||||
field_names.COUNTY_FIELD: "CF",
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "DF_PFS",
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "AF_PFS",
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "HDF_PFS",
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "DSF_PFS",
|
||||
field_names.ENERGY_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "EBF_PFS",
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "EALR_PFS",
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "EBLR_PFS",
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "EPLR_PFS",
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "HBF_PFS",
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LLEF_PFS",
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LIF_PFS",
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LMI_PFS",
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "MHVF_PFS",
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "PM25F_PFS",
|
||||
field_names.HIGH_SCHOOL_ED_FIELD: "HSEF",
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P100_PFS",
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P200_PFS",
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LPF_PFS",
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "NPL_PFS",
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "RMP_PFS",
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "TSDF_PFS",
|
||||
field_names.TOTAL_POP_FIELD: "TPF",
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "TF_PFS",
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "UF_PFS",
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
|
||||
field_names.L_WATER: "L_WTR",
|
||||
field_names.L_WORKFORCE: "L_WKFC",
|
||||
field_names.L_CLIMATE: "L_CLT",
|
||||
field_names.L_ENERGY: "L_ENY",
|
||||
field_names.L_TRANSPORTATION: "L_TRN",
|
||||
field_names.L_HOUSING: "L_HSG",
|
||||
field_names.L_POLLUTION: "L_PLN",
|
||||
field_names.L_HEALTH: "L_HLTH",
|
||||
field_names.SCORE_L_COMMUNITIES: "SL_C",
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX: "SL_PFS",
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD: "PM25LI",
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD: "EBLI",
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD: "DPMLI",
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD: "TPLI",
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD: "LPMHVLI",
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD: "HBLI",
|
||||
field_names.RMP_LOW_INCOME_FIELD: "RMPLI",
|
||||
field_names.SUPERFUND_LOW_INCOME_FIELD: "SFLI",
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD: "HWLI",
|
||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD: "WDLI",
|
||||
field_names.DIABETES_LOW_INCOME_FIELD: "DLI",
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD: "ALI",
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD: "HDLI",
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD: "LLELI",
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD: "LILHSE",
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD: "PLHSE",
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "LMILHSE",
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "ULHSE",
|
||||
field_names.FPL_200_SERIES: "FPL200S",
|
||||
field_names.THRESHOLD_COUNT: "TC",
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "ISPLHSE",
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "IALMILHSE",
|
||||
}
|
||||
|
||||
# columns to round floats to 2 decimals
|
||||
# TODO refactor to use much smaller subset of fields we DON'T want to round
|
||||
TILES_SCORE_FLOAT_COLUMNS = [
|
||||
field_names.SCORE_D + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_D + field_names.TOP_25_PERCENTILE_SUFFIX,
|
||||
field_names.SCORE_E + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_E + field_names.TOP_25_PERCENTILE_SUFFIX,
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
field_names.LINGUISTIC_ISO_FIELD,
|
||||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
TILES_ROUND_NUM_DECIMALS = 2
|
||||
|
||||
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
|
||||
field_names.AMI_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
field_names.DIABETES_FIELD,
|
||||
field_names.ASTHMA_FIELD,
|
||||
field_names.HEART_DISEASE_FIELD,
|
||||
field_names.TRAFFIC_FIELD,
|
||||
field_names.FEMA_RISK_FIELD,
|
||||
field_names.ENERGY_BURDEN_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.WASTEWATER_FIELD,
|
||||
field_names.LEAD_PAINT_FIELD,
|
||||
field_names.DIESEL_FIELD,
|
||||
field_names.PM25_FIELD,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
]
|
||||
|
||||
# For every indicator above, we want to include percentile also.
|
||||
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
|
||||
pd.core.common.flatten(
|
||||
[
|
||||
[p, f"{p}{field_names.PERCENTILE_FIELD_SUFFIX}"]
|
||||
for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
# Finally we augment with the GEOID10, county, and state
|
||||
DOWNLOADABLE_SCORE_COLUMNS = [
|
||||
field_names.GEOID_TRACT_FIELD,
|
||||
field_names.COUNTY_FIELD,
|
||||
field_names.STATE_FIELD,
|
||||
field_names.SCORE_G_COMMUNITIES,
|
||||
# Note: the reverse percentile fields get moved down here because
|
||||
# we put the raw value in the download along with the *reversed* percentile.
|
||||
# All other fields we put in f"{field_name}" and
|
||||
# f"{field_name}{field_names.PERCENTILE_FIELD_SUFFIX}", which doesn't work for the
|
||||
# reversed percentile fields.
|
||||
field_names.SCORE_L_COMMUNITIES,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
field_names.FPL_200_SERIES,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.ENERGY_BURDEN_FIELD,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
|
||||
field_names.PM25_FIELD,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
|
||||
field_names.DIESEL_FIELD,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
|
||||
field_names.TRAFFIC_FIELD,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
||||
field_names.LEAD_PAINT_FIELD,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD,
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TSDF_FIELD,
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
|
||||
field_names.NPL_FIELD,
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SUPERFUND_LOW_INCOME_FIELD,
|
||||
field_names.RMP_FIELD,
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.RMP_LOW_INCOME_FIELD,
|
||||
field_names.WASTEWATER_FIELD,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
|
||||
field_names.ASTHMA_FIELD,
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||
field_names.DIABETES_FIELD,
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||
field_names.HEART_DISEASE_FIELD,
|
||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||
field_names.LIFE_EXPECTANCY_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LIFE_EXPECTANCY_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
*DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LINGUISTIC_ISO_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.THRESHOLD_COUNT,
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
|
|
@ -3,10 +3,12 @@ import pandas as pd
|
|||
import geopandas as gpd
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.etl.score import constants
|
||||
from data_pipeline.etl.sources.census.etl_utils import (
|
||||
check_census_data_source,
|
||||
)
|
||||
from data_pipeline.etl.score.etl_utils import check_score_data_source
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
@ -31,9 +33,19 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
||||
)
|
||||
|
||||
self.TARGET_SCORE_NAME = "Definition L (percentile)"
|
||||
# Import the shortened name for Score L percentile ("SL_PFS") that's used on the
|
||||
# tiles.
|
||||
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
self.TARGET_SCORE_RENAME_TO = "L_SCORE"
|
||||
|
||||
# Import the shortened name for tract ("GTF") that's used on the tiles.
|
||||
self.TRACT_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||
field_names.GEOID_TRACT_FIELD
|
||||
]
|
||||
self.GEOMETRY_FIELD_NAME = "geometry"
|
||||
|
||||
self.NUMBER_OF_BUCKETS = 10
|
||||
|
||||
self.geojson_usa_df: gpd.GeoDataFrame
|
||||
|
@ -57,45 +69,52 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
logger.info("Reading US GeoJSON (~6 minutes)")
|
||||
self.geojson_usa_df = gpd.read_file(
|
||||
self.CENSUS_USA_GEOJSON,
|
||||
dtype={"GEOID10": "string"},
|
||||
usecols=["GEOID10", "geometry"],
|
||||
dtype={self.GEOID_FIELD_NAME: "string"},
|
||||
usecols=[self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME],
|
||||
low_memory=False,
|
||||
)
|
||||
self.geojson_usa_df.head()
|
||||
|
||||
logger.info("Reading score CSV")
|
||||
self.score_usa_df = pd.read_csv(
|
||||
self.TILE_SCORE_CSV,
|
||||
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
||||
dtype={self.TRACT_SHORT_FIELD: "string"},
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
# rename GEOID10_TRACT to GEOID10 on score to allow merging with Census GeoJSON
|
||||
# Rename GEOID10_TRACT to GEOID10 on score to allow merging with Census GeoJSON
|
||||
self.score_usa_df.rename(
|
||||
columns={self.GEOID_TRACT_FIELD_NAME: "GEOID10"},
|
||||
columns={self.TRACT_SHORT_FIELD: self.GEOID_FIELD_NAME},
|
||||
inplace=True,
|
||||
)
|
||||
|
||||
logger.info("Pruning Census GeoJSON")
|
||||
fields = ["GEOID10", "geometry"]
|
||||
fields = [self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME]
|
||||
self.geojson_usa_df = self.geojson_usa_df[fields]
|
||||
|
||||
logger.info("Merging and compressing score CSV with USA GeoJSON")
|
||||
self.geojson_score_usa_high = self.score_usa_df.merge(
|
||||
self.geojson_usa_df, on="GEOID10", how="left"
|
||||
self.geojson_usa_df, on=self.GEOID_FIELD_NAME, how="left"
|
||||
)
|
||||
|
||||
self.geojson_score_usa_high = gpd.GeoDataFrame(
|
||||
self.geojson_score_usa_high, crs="EPSG:4326"
|
||||
)
|
||||
|
||||
logger.info(f"Columns: {self.geojson_score_usa_high.columns}")
|
||||
|
||||
usa_simplified = self.geojson_score_usa_high[
|
||||
["GEOID10", self.TARGET_SCORE_NAME, "geometry"]
|
||||
[
|
||||
self.GEOID_FIELD_NAME,
|
||||
self.TARGET_SCORE_SHORT_FIELD,
|
||||
self.GEOMETRY_FIELD_NAME,
|
||||
]
|
||||
].reset_index(drop=True)
|
||||
|
||||
usa_simplified.rename(
|
||||
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
|
||||
columns={
|
||||
self.TARGET_SCORE_SHORT_FIELD: self.TARGET_SCORE_RENAME_TO
|
||||
},
|
||||
inplace=True,
|
||||
)
|
||||
|
||||
|
@ -104,7 +123,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
|
||||
usa_tracts = gpd.GeoDataFrame(
|
||||
usa_tracts,
|
||||
columns=[self.TARGET_SCORE_RENAME_TO, "geometry"],
|
||||
columns=[self.TARGET_SCORE_RENAME_TO, self.GEOMETRY_FIELD_NAME],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
|
@ -122,7 +141,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
|
||||
self.geojson_score_usa_low = gpd.GeoDataFrame(
|
||||
compressed,
|
||||
columns=[self.TARGET_SCORE_RENAME_TO, "geometry"],
|
||||
columns=[self.TARGET_SCORE_RENAME_TO, self.GEOMETRY_FIELD_NAME],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
|
@ -135,7 +154,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
) -> gpd.GeoDataFrame:
|
||||
# The tract identifier is the first 11 digits of the GEOID
|
||||
block_group_df["tract"] = block_group_df.apply(
|
||||
lambda row: row["GEOID10"][0:11], axis=1
|
||||
lambda row: row[self.GEOID_FIELD_NAME][0:11], axis=1
|
||||
)
|
||||
state_tracts = block_group_df.dissolve(by="tract", aggfunc="mean")
|
||||
return state_tracts
|
||||
|
@ -160,7 +179,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
[
|
||||
self.TARGET_SCORE_RENAME_TO,
|
||||
f"{self.TARGET_SCORE_RENAME_TO}_bucket",
|
||||
"geometry",
|
||||
self.GEOMETRY_FIELD_NAME,
|
||||
]
|
||||
].reset_index(drop=True)
|
||||
state_dissolve = state_attr.dissolve(
|
||||
|
@ -173,11 +192,13 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
) -> gpd.GeoDataFrame:
|
||||
compressed = []
|
||||
for i in range(num_buckets):
|
||||
for j in range(len(state_bucketed_df["geometry"][i].geoms)):
|
||||
for j in range(
|
||||
len(state_bucketed_df[self.GEOMETRY_FIELD_NAME][i].geoms)
|
||||
):
|
||||
compressed.append(
|
||||
[
|
||||
state_bucketed_df[self.TARGET_SCORE_RENAME_TO][i],
|
||||
state_bucketed_df["geometry"][i].geoms[j],
|
||||
state_bucketed_df[self.GEOMETRY_FIELD_NAME][i].geoms[j],
|
||||
]
|
||||
)
|
||||
return compressed
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
from pathlib import Path
|
||||
import json
|
||||
import pandas as pd
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.utils import get_module_logger, zip_files
|
||||
from data_pipeline.score import field_names
|
||||
|
@ -198,16 +200,37 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
self, score_county_state_merged_df: pd.DataFrame
|
||||
) -> pd.DataFrame:
|
||||
logger.info("Rounding Decimals")
|
||||
score_tiles = score_county_state_merged_df[
|
||||
constants.TILES_SCORE_COLUMNS
|
||||
]
|
||||
|
||||
# grab all the keys from tiles score columns
|
||||
tiles_score_column_titles = list(constants.TILES_SCORE_COLUMNS.keys())
|
||||
|
||||
# filter the columns on full score
|
||||
score_tiles = score_county_state_merged_df[tiles_score_column_titles]
|
||||
|
||||
# round decimals
|
||||
decimals = pd.Series(
|
||||
[constants.TILES_ROUND_NUM_DECIMALS]
|
||||
* len(constants.TILES_SCORE_FLOAT_COLUMNS),
|
||||
index=constants.TILES_SCORE_FLOAT_COLUMNS,
|
||||
)
|
||||
score_tiles = score_tiles.round(decimals)
|
||||
|
||||
return score_tiles.round(decimals)
|
||||
# create indexes
|
||||
score_tiles = score_tiles.rename(
|
||||
columns=constants.TILES_SCORE_COLUMNS,
|
||||
inplace=False,
|
||||
)
|
||||
|
||||
# write the json map to disk
|
||||
inverse_tiles_columns = {
|
||||
v: k for k, v in constants.TILES_SCORE_COLUMNS.items()
|
||||
} # reverse dict
|
||||
index_file_path = constants.DATA_SCORE_JSON_INDEX_FILE_PATH
|
||||
index_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(index_file_path, "w", encoding="utf-8") as fp:
|
||||
json.dump(inverse_tiles_columns, fp)
|
||||
|
||||
return score_tiles
|
||||
|
||||
def _create_downloadable_data(
|
||||
self, score_county_state_merged_df: pd.DataFrame
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue