mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 02:51:17 -07:00
Update Side Panel Tile Data (#866)
* Update Side Panel Tile Data * Update Side Panel Tile Data * Correct indicator names to match csv * Replace Score with Rate * Comment out FEMA Loss Rate to troubleshoot * Removes all "FEMA Loss Rate" array elements * Revert FEMA to Score * Remove expected loss rate * Remove RMP and NPL from BASIC array * Attempt to make shape mismatch align - update README typo * Add Score L indicators to TILE_SCORE_FLOAT_COLUMNS * removing cbg references * completes the ticket * Update side panel fields * Update index file writing to create parent dir * Updates from linting * fixing missing field_names for island territories 90th percentile fields * Update downloadable fields and fix field name * Update file fields and tests * Update ordering of fields and leave TODO * Update pickle after re-ordering of file * fixing bugs in etl_score_geo * Repeating index for diesel fix * passing tests * adding pytest.ini Co-authored-by: Vim USDS <vimal.k.shah@omb.eop.gov> Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov> Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
parent
83eb7b0982
commit
9709d08ca3
13 changed files with 328 additions and 141 deletions
|
@ -1,7 +1,6 @@
|
|||
from pathlib import Path
|
||||
import datetime
|
||||
|
||||
import pandas as pd
|
||||
from data_pipeline.config import settings
|
||||
|
||||
from data_pipeline.score import field_names
|
||||
|
@ -38,6 +37,9 @@ FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH = (
|
|||
# Score Tile CSV source path
|
||||
DATA_SCORE_CSV_TILES_PATH = DATA_SCORE_CSV_DIR / "tiles"
|
||||
DATA_SCORE_CSV_TILES_FILE_PATH = DATA_SCORE_CSV_TILES_PATH / "usa.csv"
|
||||
DATA_SCORE_JSON_INDEX_FILE_PATH = (
|
||||
DATA_SCORE_CSV_TILES_PATH / "tile_indexes.json"
|
||||
)
|
||||
|
||||
## Tile path
|
||||
DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
|
||||
|
@ -60,119 +62,214 @@ SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
|
|||
|
||||
# Column subsets
|
||||
CENSUS_COUNTIES_COLUMNS = ["USPS", "GEOID", "NAME"]
|
||||
TILES_SCORE_COLUMNS = [
|
||||
field_names.GEOID_TRACT_FIELD,
|
||||
field_names.STATE_FIELD,
|
||||
field_names.COUNTY_FIELD,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
field_names.SCORE_D + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_D + field_names.TOP_25_PERCENTILE_SUFFIX,
|
||||
field_names.SCORE_E + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_E + field_names.TOP_25_PERCENTILE_SUFFIX,
|
||||
field_names.SCORE_G_COMMUNITIES,
|
||||
field_names.SCORE_G,
|
||||
field_names.SCORE_L_COMMUNITIES,
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
||||
TILES_ROUND_NUM_DECIMALS = 2
|
||||
# Tiles data: full field name, tile index name
|
||||
TILES_SCORE_COLUMNS = {
|
||||
field_names.GEOID_TRACT_FIELD: "GTF",
|
||||
field_names.STATE_FIELD: "SF",
|
||||
field_names.COUNTY_FIELD: "CF",
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "DF_PFS",
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "AF_PFS",
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "HDF_PFS",
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "DSF_PFS",
|
||||
field_names.ENERGY_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "EBF_PFS",
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "EALR_PFS",
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "EBLR_PFS",
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "EPLR_PFS",
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "HBF_PFS",
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LLEF_PFS",
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LIF_PFS",
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LMI_PFS",
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "MHVF_PFS",
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "PM25F_PFS",
|
||||
field_names.HIGH_SCHOOL_ED_FIELD: "HSEF",
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P100_PFS",
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P200_PFS",
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LPF_PFS",
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "NPL_PFS",
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "RMP_PFS",
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "TSDF_PFS",
|
||||
field_names.TOTAL_POP_FIELD: "TPF",
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "TF_PFS",
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "UF_PFS",
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
|
||||
field_names.L_WATER: "L_WTR",
|
||||
field_names.L_WORKFORCE: "L_WKFC",
|
||||
field_names.L_CLIMATE: "L_CLT",
|
||||
field_names.L_ENERGY: "L_ENY",
|
||||
field_names.L_TRANSPORTATION: "L_TRN",
|
||||
field_names.L_HOUSING: "L_HSG",
|
||||
field_names.L_POLLUTION: "L_PLN",
|
||||
field_names.L_HEALTH: "L_HLTH",
|
||||
field_names.SCORE_L_COMMUNITIES: "SL_C",
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX: "SL_PFS",
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD: "PM25LI",
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD: "EBLI",
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD: "DPMLI",
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD: "TPLI",
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD: "LPMHVLI",
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD: "HBLI",
|
||||
field_names.RMP_LOW_INCOME_FIELD: "RMPLI",
|
||||
field_names.SUPERFUND_LOW_INCOME_FIELD: "SFLI",
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD: "HWLI",
|
||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD: "WDLI",
|
||||
field_names.DIABETES_LOW_INCOME_FIELD: "DLI",
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD: "ALI",
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD: "HDLI",
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD: "LLELI",
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD: "LILHSE",
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD: "PLHSE",
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "LMILHSE",
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "ULHSE",
|
||||
field_names.FPL_200_SERIES: "FPL200S",
|
||||
field_names.THRESHOLD_COUNT: "TC",
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "ISPLHSE",
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "IALMILHSE",
|
||||
}
|
||||
|
||||
# columns to round floats to 2 decimals
|
||||
# TODO refactor to use much smaller subset of fields we DON'T want to round
|
||||
TILES_SCORE_FLOAT_COLUMNS = [
|
||||
field_names.SCORE_D + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_D + field_names.TOP_25_PERCENTILE_SUFFIX,
|
||||
field_names.SCORE_E + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_E + field_names.TOP_25_PERCENTILE_SUFFIX,
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
field_names.LINGUISTIC_ISO_FIELD,
|
||||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
TILES_ROUND_NUM_DECIMALS = 2
|
||||
|
||||
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
|
||||
field_names.AMI_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
field_names.DIABETES_FIELD,
|
||||
field_names.ASTHMA_FIELD,
|
||||
field_names.HEART_DISEASE_FIELD,
|
||||
field_names.TRAFFIC_FIELD,
|
||||
field_names.FEMA_RISK_FIELD,
|
||||
field_names.ENERGY_BURDEN_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.WASTEWATER_FIELD,
|
||||
field_names.LEAD_PAINT_FIELD,
|
||||
field_names.DIESEL_FIELD,
|
||||
field_names.PM25_FIELD,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
]
|
||||
|
||||
# For every indicator above, we want to include percentile also.
|
||||
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
|
||||
pd.core.common.flatten(
|
||||
[
|
||||
[p, f"{p}{field_names.PERCENTILE_FIELD_SUFFIX}"]
|
||||
for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
# Finally we augment with the GEOID10, county, and state
|
||||
DOWNLOADABLE_SCORE_COLUMNS = [
|
||||
field_names.GEOID_TRACT_FIELD,
|
||||
field_names.COUNTY_FIELD,
|
||||
field_names.STATE_FIELD,
|
||||
field_names.SCORE_G_COMMUNITIES,
|
||||
# Note: the reverse percentile fields get moved down here because
|
||||
# we put the raw value in the download along with the *reversed* percentile.
|
||||
# All other fields we put in f"{field_name}" and
|
||||
# f"{field_name}{field_names.PERCENTILE_FIELD_SUFFIX}", which doesn't work for the
|
||||
# reversed percentile fields.
|
||||
field_names.SCORE_L_COMMUNITIES,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
field_names.FPL_200_SERIES,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.ENERGY_BURDEN_FIELD,
|
||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
|
||||
field_names.PM25_FIELD,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
|
||||
field_names.DIESEL_FIELD,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
|
||||
field_names.TRAFFIC_FIELD,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
||||
field_names.LEAD_PAINT_FIELD,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD,
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TSDF_FIELD,
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
|
||||
field_names.NPL_FIELD,
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SUPERFUND_LOW_INCOME_FIELD,
|
||||
field_names.RMP_FIELD,
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.RMP_LOW_INCOME_FIELD,
|
||||
field_names.WASTEWATER_FIELD,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
|
||||
field_names.ASTHMA_FIELD,
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||
field_names.DIABETES_FIELD,
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||
field_names.HEART_DISEASE_FIELD,
|
||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||
field_names.LIFE_EXPECTANCY_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LIFE_EXPECTANCY_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
*DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LINGUISTIC_ISO_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.THRESHOLD_COUNT,
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
|
|
@ -3,10 +3,12 @@ import pandas as pd
|
|||
import geopandas as gpd
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.etl.score import constants
|
||||
from data_pipeline.etl.sources.census.etl_utils import (
|
||||
check_census_data_source,
|
||||
)
|
||||
from data_pipeline.etl.score.etl_utils import check_score_data_source
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
@ -31,9 +33,19 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
||||
)
|
||||
|
||||
self.TARGET_SCORE_NAME = "Definition L (percentile)"
|
||||
# Import the shortened name for Score L percentile ("SL_PFS") that's used on the
|
||||
# tiles.
|
||||
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
self.TARGET_SCORE_RENAME_TO = "L_SCORE"
|
||||
|
||||
# Import the shortened name for tract ("GTF") that's used on the tiles.
|
||||
self.TRACT_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||
field_names.GEOID_TRACT_FIELD
|
||||
]
|
||||
self.GEOMETRY_FIELD_NAME = "geometry"
|
||||
|
||||
self.NUMBER_OF_BUCKETS = 10
|
||||
|
||||
self.geojson_usa_df: gpd.GeoDataFrame
|
||||
|
@ -57,45 +69,52 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
logger.info("Reading US GeoJSON (~6 minutes)")
|
||||
self.geojson_usa_df = gpd.read_file(
|
||||
self.CENSUS_USA_GEOJSON,
|
||||
dtype={"GEOID10": "string"},
|
||||
usecols=["GEOID10", "geometry"],
|
||||
dtype={self.GEOID_FIELD_NAME: "string"},
|
||||
usecols=[self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME],
|
||||
low_memory=False,
|
||||
)
|
||||
self.geojson_usa_df.head()
|
||||
|
||||
logger.info("Reading score CSV")
|
||||
self.score_usa_df = pd.read_csv(
|
||||
self.TILE_SCORE_CSV,
|
||||
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
||||
dtype={self.TRACT_SHORT_FIELD: "string"},
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
# rename GEOID10_TRACT to GEOID10 on score to allow merging with Census GeoJSON
|
||||
# Rename GEOID10_TRACT to GEOID10 on score to allow merging with Census GeoJSON
|
||||
self.score_usa_df.rename(
|
||||
columns={self.GEOID_TRACT_FIELD_NAME: "GEOID10"},
|
||||
columns={self.TRACT_SHORT_FIELD: self.GEOID_FIELD_NAME},
|
||||
inplace=True,
|
||||
)
|
||||
|
||||
logger.info("Pruning Census GeoJSON")
|
||||
fields = ["GEOID10", "geometry"]
|
||||
fields = [self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME]
|
||||
self.geojson_usa_df = self.geojson_usa_df[fields]
|
||||
|
||||
logger.info("Merging and compressing score CSV with USA GeoJSON")
|
||||
self.geojson_score_usa_high = self.score_usa_df.merge(
|
||||
self.geojson_usa_df, on="GEOID10", how="left"
|
||||
self.geojson_usa_df, on=self.GEOID_FIELD_NAME, how="left"
|
||||
)
|
||||
|
||||
self.geojson_score_usa_high = gpd.GeoDataFrame(
|
||||
self.geojson_score_usa_high, crs="EPSG:4326"
|
||||
)
|
||||
|
||||
logger.info(f"Columns: {self.geojson_score_usa_high.columns}")
|
||||
|
||||
usa_simplified = self.geojson_score_usa_high[
|
||||
["GEOID10", self.TARGET_SCORE_NAME, "geometry"]
|
||||
[
|
||||
self.GEOID_FIELD_NAME,
|
||||
self.TARGET_SCORE_SHORT_FIELD,
|
||||
self.GEOMETRY_FIELD_NAME,
|
||||
]
|
||||
].reset_index(drop=True)
|
||||
|
||||
usa_simplified.rename(
|
||||
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
|
||||
columns={
|
||||
self.TARGET_SCORE_SHORT_FIELD: self.TARGET_SCORE_RENAME_TO
|
||||
},
|
||||
inplace=True,
|
||||
)
|
||||
|
||||
|
@ -104,7 +123,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
|
||||
usa_tracts = gpd.GeoDataFrame(
|
||||
usa_tracts,
|
||||
columns=[self.TARGET_SCORE_RENAME_TO, "geometry"],
|
||||
columns=[self.TARGET_SCORE_RENAME_TO, self.GEOMETRY_FIELD_NAME],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
|
@ -122,7 +141,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
|
||||
self.geojson_score_usa_low = gpd.GeoDataFrame(
|
||||
compressed,
|
||||
columns=[self.TARGET_SCORE_RENAME_TO, "geometry"],
|
||||
columns=[self.TARGET_SCORE_RENAME_TO, self.GEOMETRY_FIELD_NAME],
|
||||
crs="EPSG:4326",
|
||||
)
|
||||
|
||||
|
@ -135,7 +154,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
) -> gpd.GeoDataFrame:
|
||||
# The tract identifier is the first 11 digits of the GEOID
|
||||
block_group_df["tract"] = block_group_df.apply(
|
||||
lambda row: row["GEOID10"][0:11], axis=1
|
||||
lambda row: row[self.GEOID_FIELD_NAME][0:11], axis=1
|
||||
)
|
||||
state_tracts = block_group_df.dissolve(by="tract", aggfunc="mean")
|
||||
return state_tracts
|
||||
|
@ -160,7 +179,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
[
|
||||
self.TARGET_SCORE_RENAME_TO,
|
||||
f"{self.TARGET_SCORE_RENAME_TO}_bucket",
|
||||
"geometry",
|
||||
self.GEOMETRY_FIELD_NAME,
|
||||
]
|
||||
].reset_index(drop=True)
|
||||
state_dissolve = state_attr.dissolve(
|
||||
|
@ -173,11 +192,13 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
) -> gpd.GeoDataFrame:
|
||||
compressed = []
|
||||
for i in range(num_buckets):
|
||||
for j in range(len(state_bucketed_df["geometry"][i].geoms)):
|
||||
for j in range(
|
||||
len(state_bucketed_df[self.GEOMETRY_FIELD_NAME][i].geoms)
|
||||
):
|
||||
compressed.append(
|
||||
[
|
||||
state_bucketed_df[self.TARGET_SCORE_RENAME_TO][i],
|
||||
state_bucketed_df["geometry"][i].geoms[j],
|
||||
state_bucketed_df[self.GEOMETRY_FIELD_NAME][i].geoms[j],
|
||||
]
|
||||
)
|
||||
return compressed
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
from pathlib import Path
|
||||
import json
|
||||
import pandas as pd
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.utils import get_module_logger, zip_files
|
||||
from data_pipeline.score import field_names
|
||||
|
@ -198,16 +200,37 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
self, score_county_state_merged_df: pd.DataFrame
|
||||
) -> pd.DataFrame:
|
||||
logger.info("Rounding Decimals")
|
||||
score_tiles = score_county_state_merged_df[
|
||||
constants.TILES_SCORE_COLUMNS
|
||||
]
|
||||
|
||||
# grab all the keys from tiles score columns
|
||||
tiles_score_column_titles = list(constants.TILES_SCORE_COLUMNS.keys())
|
||||
|
||||
# filter the columns on full score
|
||||
score_tiles = score_county_state_merged_df[tiles_score_column_titles]
|
||||
|
||||
# round decimals
|
||||
decimals = pd.Series(
|
||||
[constants.TILES_ROUND_NUM_DECIMALS]
|
||||
* len(constants.TILES_SCORE_FLOAT_COLUMNS),
|
||||
index=constants.TILES_SCORE_FLOAT_COLUMNS,
|
||||
)
|
||||
score_tiles = score_tiles.round(decimals)
|
||||
|
||||
return score_tiles.round(decimals)
|
||||
# create indexes
|
||||
score_tiles = score_tiles.rename(
|
||||
columns=constants.TILES_SCORE_COLUMNS,
|
||||
inplace=False,
|
||||
)
|
||||
|
||||
# write the json map to disk
|
||||
inverse_tiles_columns = {
|
||||
v: k for k, v in constants.TILES_SCORE_COLUMNS.items()
|
||||
} # reverse dict
|
||||
index_file_path = constants.DATA_SCORE_JSON_INDEX_FILE_PATH
|
||||
index_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(index_file_path, "w", encoding="utf-8") as fp:
|
||||
json.dump(inverse_tiles_columns, fp)
|
||||
|
||||
return score_tiles
|
||||
|
||||
def _create_downloadable_data(
|
||||
self, score_county_state_merged_df: pd.DataFrame
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -136,7 +136,7 @@ class CensusETL(ExtractTransformLoad):
|
|||
|
||||
def transform(self) -> None:
|
||||
"""Download all census shape files from the Census FTP and extract the geojson
|
||||
to generate national and by state Census Block Group CSVs and GeoJSONs
|
||||
to generate national and by state Census tract CSVs and GeoJSONs
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
@ -225,7 +225,7 @@ class CensusETL(ExtractTransformLoad):
|
|||
logger.info("Writing national geojson file")
|
||||
usa_df.to_file(self.NATIONAL_TRACT_JSON_PATH, driver="GeoJSON")
|
||||
|
||||
logger.info("Census block groups downloading complete")
|
||||
logger.info("Census tract downloading complete")
|
||||
|
||||
def load(self) -> None:
|
||||
"""Create state CSVs, National CSV, and National GeoJSON
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue