mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 12:21:16 -07:00
Update Side Panel Tile Data (#866)
* Update Side Panel Tile Data * Update Side Panel Tile Data * Correct indicator names to match csv * Replace Score with Rate * Comment out FEMA Loss Rate to troubleshoot * Removes all "FEMA Loss Rate" array elements * Revert FEMA to Score * Remove expected loss rate * Remove RMP and NPL from BASIC array * Attempt to make shape mismatch align - update README typo * Add Score L indicators to TILE_SCORE_FLOAT_COLUMNS * removing cbg references * completes the ticket * Update side panel fields * Update index file writing to create parent dir * Updates from linting * fixing missing field_names for island territories 90th percentile fields * Update downloadable fields and fix field name * Update file fields and tests * Update ordering of fields and leave TODO * Update pickle after re-ordering of file * fixing bugs in etl_score_geo * Repeating index for diesel fix * passing tests * adding pytest.ini Co-authored-by: Vim USDS <vimal.k.shah@omb.eop.gov> Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov> Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
parent
83eb7b0982
commit
9709d08ca3
13 changed files with 328 additions and 141 deletions
|
@ -159,7 +159,7 @@ We use Docker to install the necessary libraries in a container that can be run
|
||||||
|
|
||||||
To build the docker container the first time, make sure you're in the root directory of the repository and run `docker-compose build --no-cache`.
|
To build the docker container the first time, make sure you're in the root directory of the repository and run `docker-compose build --no-cache`.
|
||||||
|
|
||||||
Once completed, run `docker-compose up`. Docker will spin up 3 containers: the client container, the static server container and the data container. Once all data is generated, you can see the application using a browser and navigating to `htto://localhost:8000`.
|
Once completed, run `docker-compose up`. Docker will spin up 3 containers: the client container, the static server container and the data container. Once all data is generated, you can see the application using a browser and navigating to `http://localhost:8000`.
|
||||||
|
|
||||||
If you want to run specific data tasks, you can open a terminal window, navigate to the root folder for this repository and then execute any command for the application using this format:
|
If you want to run specific data tasks, you can open a terminal window, navigate to the root folder for this repository and then execute any command for the application using this format:
|
||||||
|
|
||||||
|
@ -322,7 +322,7 @@ score_initial_df = pd.read_csv(score_csv_path, dtype={"GEOID10_TRACT": "string"}
|
||||||
score_initial_df.to_csv(data_path / "data_pipeline" / "etl" / "score" / "tests" / "sample_data" /"score_data_initial.csv", index=False)
|
score_initial_df.to_csv(data_path / "data_pipeline" / "etl" / "score" / "tests" / "sample_data" /"score_data_initial.csv", index=False)
|
||||||
```
|
```
|
||||||
|
|
||||||
Now you can move on to updating inidvidual pickles for the tests. Note that it is helpful to do them in this order:
|
Now you can move on to updating individual pickles for the tests. Note that it is helpful to do them in this order:
|
||||||
|
|
||||||
We have four pickle files that correspond to expected files:
|
We have four pickle files that correspond to expected files:
|
||||||
- `score_data_expected.pkl`: Initial score without counties
|
- `score_data_expected.pkl`: Initial score without counties
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
from data_pipeline.config import settings
|
from data_pipeline.config import settings
|
||||||
|
|
||||||
from data_pipeline.score import field_names
|
from data_pipeline.score import field_names
|
||||||
|
@ -38,6 +37,9 @@ FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH = (
|
||||||
# Score Tile CSV source path
|
# Score Tile CSV source path
|
||||||
DATA_SCORE_CSV_TILES_PATH = DATA_SCORE_CSV_DIR / "tiles"
|
DATA_SCORE_CSV_TILES_PATH = DATA_SCORE_CSV_DIR / "tiles"
|
||||||
DATA_SCORE_CSV_TILES_FILE_PATH = DATA_SCORE_CSV_TILES_PATH / "usa.csv"
|
DATA_SCORE_CSV_TILES_FILE_PATH = DATA_SCORE_CSV_TILES_PATH / "usa.csv"
|
||||||
|
DATA_SCORE_JSON_INDEX_FILE_PATH = (
|
||||||
|
DATA_SCORE_CSV_TILES_PATH / "tile_indexes.json"
|
||||||
|
)
|
||||||
|
|
||||||
## Tile path
|
## Tile path
|
||||||
DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
|
DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles"
|
||||||
|
@ -60,119 +62,214 @@ SCORE_DOWNLOADABLE_ZIP_FILE_PATH = (
|
||||||
|
|
||||||
# Column subsets
|
# Column subsets
|
||||||
CENSUS_COUNTIES_COLUMNS = ["USPS", "GEOID", "NAME"]
|
CENSUS_COUNTIES_COLUMNS = ["USPS", "GEOID", "NAME"]
|
||||||
TILES_SCORE_COLUMNS = [
|
|
||||||
field_names.GEOID_TRACT_FIELD,
|
TILES_ROUND_NUM_DECIMALS = 2
|
||||||
field_names.STATE_FIELD,
|
# Tiles data: full field name, tile index name
|
||||||
field_names.COUNTY_FIELD,
|
TILES_SCORE_COLUMNS = {
|
||||||
field_names.TOTAL_POP_FIELD,
|
field_names.GEOID_TRACT_FIELD: "GTF",
|
||||||
field_names.SCORE_D + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.STATE_FIELD: "SF",
|
||||||
field_names.SCORE_D + field_names.TOP_25_PERCENTILE_SUFFIX,
|
field_names.COUNTY_FIELD: "CF",
|
||||||
field_names.SCORE_E + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "DF_PFS",
|
||||||
field_names.SCORE_E + field_names.TOP_25_PERCENTILE_SUFFIX,
|
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "AF_PFS",
|
||||||
field_names.SCORE_G_COMMUNITIES,
|
field_names.HEART_DISEASE_FIELD
|
||||||
field_names.SCORE_G,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "HDF_PFS",
|
||||||
field_names.SCORE_L_COMMUNITIES,
|
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "DSF_PFS",
|
||||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.ENERGY_BURDEN_FIELD
|
||||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "EBF_PFS",
|
||||||
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "EALR_PFS",
|
||||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "EBLR_PFS",
|
||||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "EPLR_PFS",
|
||||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.HOUSING_BURDEN_FIELD
|
||||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "HBF_PFS",
|
||||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "LLEF_PFS",
|
||||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.LINGUISTIC_ISO_FIELD
|
||||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "LIF_PFS",
|
||||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "LMI_PFS",
|
||||||
|
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "MHVF_PFS",
|
||||||
|
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "PM25F_PFS",
|
||||||
|
field_names.HIGH_SCHOOL_ED_FIELD: "HSEF",
|
||||||
|
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "P100_PFS",
|
||||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "P200_PFS",
|
||||||
]
|
field_names.LEAD_PAINT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "LPF_PFS",
|
||||||
|
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "NPL_PFS",
|
||||||
|
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "RMP_PFS",
|
||||||
|
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "TSDF_PFS",
|
||||||
|
field_names.TOTAL_POP_FIELD: "TPF",
|
||||||
|
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "TF_PFS",
|
||||||
|
field_names.UNEMPLOYMENT_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "UF_PFS",
|
||||||
|
field_names.WASTEWATER_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
|
||||||
|
field_names.L_WATER: "L_WTR",
|
||||||
|
field_names.L_WORKFORCE: "L_WKFC",
|
||||||
|
field_names.L_CLIMATE: "L_CLT",
|
||||||
|
field_names.L_ENERGY: "L_ENY",
|
||||||
|
field_names.L_TRANSPORTATION: "L_TRN",
|
||||||
|
field_names.L_HOUSING: "L_HSG",
|
||||||
|
field_names.L_POLLUTION: "L_PLN",
|
||||||
|
field_names.L_HEALTH: "L_HLTH",
|
||||||
|
field_names.SCORE_L_COMMUNITIES: "SL_C",
|
||||||
|
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX: "SL_PFS",
|
||||||
|
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
|
||||||
|
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
|
||||||
|
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
|
||||||
|
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD: "PM25LI",
|
||||||
|
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD: "EBLI",
|
||||||
|
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD: "DPMLI",
|
||||||
|
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD: "TPLI",
|
||||||
|
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD: "LPMHVLI",
|
||||||
|
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD: "HBLI",
|
||||||
|
field_names.RMP_LOW_INCOME_FIELD: "RMPLI",
|
||||||
|
field_names.SUPERFUND_LOW_INCOME_FIELD: "SFLI",
|
||||||
|
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD: "HWLI",
|
||||||
|
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD: "WDLI",
|
||||||
|
field_names.DIABETES_LOW_INCOME_FIELD: "DLI",
|
||||||
|
field_names.ASTHMA_LOW_INCOME_FIELD: "ALI",
|
||||||
|
field_names.HEART_DISEASE_LOW_INCOME_FIELD: "HDLI",
|
||||||
|
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD: "LLELI",
|
||||||
|
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD: "LILHSE",
|
||||||
|
field_names.POVERTY_LOW_HS_EDUCATION_FIELD: "PLHSE",
|
||||||
|
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "LMILHSE",
|
||||||
|
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "ULHSE",
|
||||||
|
field_names.FPL_200_SERIES: "FPL200S",
|
||||||
|
field_names.THRESHOLD_COUNT: "TC",
|
||||||
|
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
|
||||||
|
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD: "ISPLHSE",
|
||||||
|
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD: "IALMILHSE",
|
||||||
|
}
|
||||||
|
|
||||||
# columns to round floats to 2 decimals
|
# columns to round floats to 2 decimals
|
||||||
|
# TODO refactor to use much smaller subset of fields we DON'T want to round
|
||||||
TILES_SCORE_FLOAT_COLUMNS = [
|
TILES_SCORE_FLOAT_COLUMNS = [
|
||||||
field_names.SCORE_D + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.SCORE_D + field_names.TOP_25_PERCENTILE_SUFFIX,
|
|
||||||
field_names.SCORE_E + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.SCORE_E + field_names.TOP_25_PERCENTILE_SUFFIX,
|
|
||||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.POVERTY_FIELD,
|
|
||||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
|
||||||
field_names.LINGUISTIC_ISO_FIELD,
|
|
||||||
field_names.UNEMPLOYMENT_FIELD,
|
|
||||||
field_names.HOUSING_BURDEN_FIELD,
|
|
||||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.MEDIAN_HOUSE_VALUE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
]
|
]
|
||||||
TILES_ROUND_NUM_DECIMALS = 2
|
|
||||||
|
|
||||||
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [
|
|
||||||
field_names.AMI_FIELD,
|
|
||||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
|
|
||||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
|
||||||
field_names.DIABETES_FIELD,
|
|
||||||
field_names.ASTHMA_FIELD,
|
|
||||||
field_names.HEART_DISEASE_FIELD,
|
|
||||||
field_names.TRAFFIC_FIELD,
|
|
||||||
field_names.FEMA_RISK_FIELD,
|
|
||||||
field_names.ENERGY_BURDEN_FIELD,
|
|
||||||
field_names.HOUSING_BURDEN_FIELD,
|
|
||||||
field_names.WASTEWATER_FIELD,
|
|
||||||
field_names.LEAD_PAINT_FIELD,
|
|
||||||
field_names.DIESEL_FIELD,
|
|
||||||
field_names.PM25_FIELD,
|
|
||||||
field_names.TOTAL_POP_FIELD,
|
|
||||||
]
|
|
||||||
|
|
||||||
# For every indicator above, we want to include percentile also.
|
|
||||||
DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list(
|
|
||||||
pd.core.common.flatten(
|
|
||||||
[
|
|
||||||
[p, f"{p}{field_names.PERCENTILE_FIELD_SUFFIX}"]
|
|
||||||
for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Finally we augment with the GEOID10, county, and state
|
# Finally we augment with the GEOID10, county, and state
|
||||||
DOWNLOADABLE_SCORE_COLUMNS = [
|
DOWNLOADABLE_SCORE_COLUMNS = [
|
||||||
field_names.GEOID_TRACT_FIELD,
|
field_names.GEOID_TRACT_FIELD,
|
||||||
field_names.COUNTY_FIELD,
|
field_names.COUNTY_FIELD,
|
||||||
field_names.STATE_FIELD,
|
field_names.STATE_FIELD,
|
||||||
field_names.SCORE_G_COMMUNITIES,
|
field_names.SCORE_L_COMMUNITIES,
|
||||||
# Note: the reverse percentile fields get moved down here because
|
field_names.TOTAL_POP_FIELD,
|
||||||
# we put the raw value in the download along with the *reversed* percentile.
|
field_names.FPL_200_SERIES,
|
||||||
# All other fields we put in f"{field_name}" and
|
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
||||||
# f"{field_name}{field_names.PERCENTILE_FIELD_SUFFIX}", which doesn't work for the
|
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||||
# reversed percentile fields.
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
|
||||||
|
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||||
|
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
||||||
|
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||||
|
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
|
||||||
|
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||||
|
field_names.ENERGY_BURDEN_FIELD,
|
||||||
|
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
|
||||||
|
field_names.PM25_FIELD,
|
||||||
|
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
|
||||||
|
field_names.DIESEL_FIELD,
|
||||||
|
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
|
||||||
|
field_names.TRAFFIC_FIELD,
|
||||||
|
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
|
||||||
|
field_names.HOUSING_BURDEN_FIELD,
|
||||||
|
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
||||||
|
field_names.LEAD_PAINT_FIELD,
|
||||||
|
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||||
|
field_names.MEDIAN_HOUSE_VALUE_FIELD,
|
||||||
|
field_names.MEDIAN_HOUSE_VALUE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.TSDF_FIELD,
|
||||||
|
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
|
||||||
|
field_names.NPL_FIELD,
|
||||||
|
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.SUPERFUND_LOW_INCOME_FIELD,
|
||||||
|
field_names.RMP_FIELD,
|
||||||
|
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.RMP_LOW_INCOME_FIELD,
|
||||||
|
field_names.WASTEWATER_FIELD,
|
||||||
|
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD,
|
||||||
|
field_names.ASTHMA_FIELD,
|
||||||
|
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||||
|
field_names.DIABETES_FIELD,
|
||||||
|
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||||
|
field_names.HEART_DISEASE_FIELD,
|
||||||
|
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||||
|
field_names.LIFE_EXPECTANCY_FIELD,
|
||||||
|
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
||||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.LIFE_EXPECTANCY_FIELD,
|
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.LINGUISTIC_ISO_FIELD,
|
||||||
*DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL,
|
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.UNEMPLOYMENT_FIELD,
|
||||||
|
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
|
||||||
|
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||||
|
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||||
|
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
|
field_names.THRESHOLD_COUNT,
|
||||||
|
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||||
|
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||||
|
field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||||
|
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||||
]
|
]
|
||||||
|
|
|
@ -3,10 +3,12 @@ import pandas as pd
|
||||||
import geopandas as gpd
|
import geopandas as gpd
|
||||||
|
|
||||||
from data_pipeline.etl.base import ExtractTransformLoad
|
from data_pipeline.etl.base import ExtractTransformLoad
|
||||||
|
from data_pipeline.etl.score import constants
|
||||||
from data_pipeline.etl.sources.census.etl_utils import (
|
from data_pipeline.etl.sources.census.etl_utils import (
|
||||||
check_census_data_source,
|
check_census_data_source,
|
||||||
)
|
)
|
||||||
from data_pipeline.etl.score.etl_utils import check_score_data_source
|
from data_pipeline.etl.score.etl_utils import check_score_data_source
|
||||||
|
from data_pipeline.score import field_names
|
||||||
from data_pipeline.utils import get_module_logger
|
from data_pipeline.utils import get_module_logger
|
||||||
|
|
||||||
logger = get_module_logger(__name__)
|
logger = get_module_logger(__name__)
|
||||||
|
@ -31,9 +33,19 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
self.DATA_PATH / "census" / "geojson" / "us.json"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.TARGET_SCORE_NAME = "Definition L (percentile)"
|
# Import the shortened name for Score L percentile ("SL_PFS") that's used on the
|
||||||
|
# tiles.
|
||||||
|
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||||
|
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX
|
||||||
|
]
|
||||||
self.TARGET_SCORE_RENAME_TO = "L_SCORE"
|
self.TARGET_SCORE_RENAME_TO = "L_SCORE"
|
||||||
|
|
||||||
|
# Import the shortened name for tract ("GTF") that's used on the tiles.
|
||||||
|
self.TRACT_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||||
|
field_names.GEOID_TRACT_FIELD
|
||||||
|
]
|
||||||
|
self.GEOMETRY_FIELD_NAME = "geometry"
|
||||||
|
|
||||||
self.NUMBER_OF_BUCKETS = 10
|
self.NUMBER_OF_BUCKETS = 10
|
||||||
|
|
||||||
self.geojson_usa_df: gpd.GeoDataFrame
|
self.geojson_usa_df: gpd.GeoDataFrame
|
||||||
|
@ -57,45 +69,52 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
logger.info("Reading US GeoJSON (~6 minutes)")
|
logger.info("Reading US GeoJSON (~6 minutes)")
|
||||||
self.geojson_usa_df = gpd.read_file(
|
self.geojson_usa_df = gpd.read_file(
|
||||||
self.CENSUS_USA_GEOJSON,
|
self.CENSUS_USA_GEOJSON,
|
||||||
dtype={"GEOID10": "string"},
|
dtype={self.GEOID_FIELD_NAME: "string"},
|
||||||
usecols=["GEOID10", "geometry"],
|
usecols=[self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME],
|
||||||
low_memory=False,
|
low_memory=False,
|
||||||
)
|
)
|
||||||
self.geojson_usa_df.head()
|
|
||||||
|
|
||||||
logger.info("Reading score CSV")
|
logger.info("Reading score CSV")
|
||||||
self.score_usa_df = pd.read_csv(
|
self.score_usa_df = pd.read_csv(
|
||||||
self.TILE_SCORE_CSV,
|
self.TILE_SCORE_CSV,
|
||||||
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
dtype={self.TRACT_SHORT_FIELD: "string"},
|
||||||
low_memory=False,
|
low_memory=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
# rename GEOID10_TRACT to GEOID10 on score to allow merging with Census GeoJSON
|
# Rename GEOID10_TRACT to GEOID10 on score to allow merging with Census GeoJSON
|
||||||
self.score_usa_df.rename(
|
self.score_usa_df.rename(
|
||||||
columns={self.GEOID_TRACT_FIELD_NAME: "GEOID10"},
|
columns={self.TRACT_SHORT_FIELD: self.GEOID_FIELD_NAME},
|
||||||
inplace=True,
|
inplace=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Pruning Census GeoJSON")
|
logger.info("Pruning Census GeoJSON")
|
||||||
fields = ["GEOID10", "geometry"]
|
fields = [self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME]
|
||||||
self.geojson_usa_df = self.geojson_usa_df[fields]
|
self.geojson_usa_df = self.geojson_usa_df[fields]
|
||||||
|
|
||||||
logger.info("Merging and compressing score CSV with USA GeoJSON")
|
logger.info("Merging and compressing score CSV with USA GeoJSON")
|
||||||
self.geojson_score_usa_high = self.score_usa_df.merge(
|
self.geojson_score_usa_high = self.score_usa_df.merge(
|
||||||
self.geojson_usa_df, on="GEOID10", how="left"
|
self.geojson_usa_df, on=self.GEOID_FIELD_NAME, how="left"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.geojson_score_usa_high = gpd.GeoDataFrame(
|
self.geojson_score_usa_high = gpd.GeoDataFrame(
|
||||||
self.geojson_score_usa_high, crs="EPSG:4326"
|
self.geojson_score_usa_high, crs="EPSG:4326"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.info(f"Columns: {self.geojson_score_usa_high.columns}")
|
||||||
|
|
||||||
usa_simplified = self.geojson_score_usa_high[
|
usa_simplified = self.geojson_score_usa_high[
|
||||||
["GEOID10", self.TARGET_SCORE_NAME, "geometry"]
|
[
|
||||||
|
self.GEOID_FIELD_NAME,
|
||||||
|
self.TARGET_SCORE_SHORT_FIELD,
|
||||||
|
self.GEOMETRY_FIELD_NAME,
|
||||||
|
]
|
||||||
].reset_index(drop=True)
|
].reset_index(drop=True)
|
||||||
|
|
||||||
usa_simplified.rename(
|
usa_simplified.rename(
|
||||||
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
|
columns={
|
||||||
|
self.TARGET_SCORE_SHORT_FIELD: self.TARGET_SCORE_RENAME_TO
|
||||||
|
},
|
||||||
inplace=True,
|
inplace=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -104,7 +123,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
usa_tracts = gpd.GeoDataFrame(
|
usa_tracts = gpd.GeoDataFrame(
|
||||||
usa_tracts,
|
usa_tracts,
|
||||||
columns=[self.TARGET_SCORE_RENAME_TO, "geometry"],
|
columns=[self.TARGET_SCORE_RENAME_TO, self.GEOMETRY_FIELD_NAME],
|
||||||
crs="EPSG:4326",
|
crs="EPSG:4326",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -122,7 +141,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
self.geojson_score_usa_low = gpd.GeoDataFrame(
|
self.geojson_score_usa_low = gpd.GeoDataFrame(
|
||||||
compressed,
|
compressed,
|
||||||
columns=[self.TARGET_SCORE_RENAME_TO, "geometry"],
|
columns=[self.TARGET_SCORE_RENAME_TO, self.GEOMETRY_FIELD_NAME],
|
||||||
crs="EPSG:4326",
|
crs="EPSG:4326",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -135,7 +154,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
) -> gpd.GeoDataFrame:
|
) -> gpd.GeoDataFrame:
|
||||||
# The tract identifier is the first 11 digits of the GEOID
|
# The tract identifier is the first 11 digits of the GEOID
|
||||||
block_group_df["tract"] = block_group_df.apply(
|
block_group_df["tract"] = block_group_df.apply(
|
||||||
lambda row: row["GEOID10"][0:11], axis=1
|
lambda row: row[self.GEOID_FIELD_NAME][0:11], axis=1
|
||||||
)
|
)
|
||||||
state_tracts = block_group_df.dissolve(by="tract", aggfunc="mean")
|
state_tracts = block_group_df.dissolve(by="tract", aggfunc="mean")
|
||||||
return state_tracts
|
return state_tracts
|
||||||
|
@ -160,7 +179,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
[
|
[
|
||||||
self.TARGET_SCORE_RENAME_TO,
|
self.TARGET_SCORE_RENAME_TO,
|
||||||
f"{self.TARGET_SCORE_RENAME_TO}_bucket",
|
f"{self.TARGET_SCORE_RENAME_TO}_bucket",
|
||||||
"geometry",
|
self.GEOMETRY_FIELD_NAME,
|
||||||
]
|
]
|
||||||
].reset_index(drop=True)
|
].reset_index(drop=True)
|
||||||
state_dissolve = state_attr.dissolve(
|
state_dissolve = state_attr.dissolve(
|
||||||
|
@ -173,11 +192,13 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
) -> gpd.GeoDataFrame:
|
) -> gpd.GeoDataFrame:
|
||||||
compressed = []
|
compressed = []
|
||||||
for i in range(num_buckets):
|
for i in range(num_buckets):
|
||||||
for j in range(len(state_bucketed_df["geometry"][i].geoms)):
|
for j in range(
|
||||||
|
len(state_bucketed_df[self.GEOMETRY_FIELD_NAME][i].geoms)
|
||||||
|
):
|
||||||
compressed.append(
|
compressed.append(
|
||||||
[
|
[
|
||||||
state_bucketed_df[self.TARGET_SCORE_RENAME_TO][i],
|
state_bucketed_df[self.TARGET_SCORE_RENAME_TO][i],
|
||||||
state_bucketed_df["geometry"][i].geoms[j],
|
state_bucketed_df[self.GEOMETRY_FIELD_NAME][i].geoms[j],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
return compressed
|
return compressed
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import json
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from data_pipeline.etl.base import ExtractTransformLoad
|
from data_pipeline.etl.base import ExtractTransformLoad
|
||||||
from data_pipeline.utils import get_module_logger, zip_files
|
from data_pipeline.utils import get_module_logger, zip_files
|
||||||
from data_pipeline.score import field_names
|
from data_pipeline.score import field_names
|
||||||
|
@ -198,16 +200,37 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
self, score_county_state_merged_df: pd.DataFrame
|
self, score_county_state_merged_df: pd.DataFrame
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
logger.info("Rounding Decimals")
|
logger.info("Rounding Decimals")
|
||||||
score_tiles = score_county_state_merged_df[
|
|
||||||
constants.TILES_SCORE_COLUMNS
|
# grab all the keys from tiles score columns
|
||||||
]
|
tiles_score_column_titles = list(constants.TILES_SCORE_COLUMNS.keys())
|
||||||
|
|
||||||
|
# filter the columns on full score
|
||||||
|
score_tiles = score_county_state_merged_df[tiles_score_column_titles]
|
||||||
|
|
||||||
|
# round decimals
|
||||||
decimals = pd.Series(
|
decimals = pd.Series(
|
||||||
[constants.TILES_ROUND_NUM_DECIMALS]
|
[constants.TILES_ROUND_NUM_DECIMALS]
|
||||||
* len(constants.TILES_SCORE_FLOAT_COLUMNS),
|
* len(constants.TILES_SCORE_FLOAT_COLUMNS),
|
||||||
index=constants.TILES_SCORE_FLOAT_COLUMNS,
|
index=constants.TILES_SCORE_FLOAT_COLUMNS,
|
||||||
)
|
)
|
||||||
|
score_tiles = score_tiles.round(decimals)
|
||||||
|
|
||||||
return score_tiles.round(decimals)
|
# create indexes
|
||||||
|
score_tiles = score_tiles.rename(
|
||||||
|
columns=constants.TILES_SCORE_COLUMNS,
|
||||||
|
inplace=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# write the json map to disk
|
||||||
|
inverse_tiles_columns = {
|
||||||
|
v: k for k, v in constants.TILES_SCORE_COLUMNS.items()
|
||||||
|
} # reverse dict
|
||||||
|
index_file_path = constants.DATA_SCORE_JSON_INDEX_FILE_PATH
|
||||||
|
index_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with open(index_file_path, "w", encoding="utf-8") as fp:
|
||||||
|
json.dump(inverse_tiles_columns, fp)
|
||||||
|
|
||||||
|
return score_tiles
|
||||||
|
|
||||||
def _create_downloadable_data(
|
def _create_downloadable_data(
|
||||||
self, score_county_state_merged_df: pd.DataFrame
|
self, score_county_state_merged_df: pd.DataFrame
|
||||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -136,7 +136,7 @@ class CensusETL(ExtractTransformLoad):
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
"""Download all census shape files from the Census FTP and extract the geojson
|
"""Download all census shape files from the Census FTP and extract the geojson
|
||||||
to generate national and by state Census Block Group CSVs and GeoJSONs
|
to generate national and by state Census tract CSVs and GeoJSONs
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
|
@ -225,7 +225,7 @@ class CensusETL(ExtractTransformLoad):
|
||||||
logger.info("Writing national geojson file")
|
logger.info("Writing national geojson file")
|
||||||
usa_df.to_file(self.NATIONAL_TRACT_JSON_PATH, driver="GeoJSON")
|
usa_df.to_file(self.NATIONAL_TRACT_JSON_PATH, driver="GeoJSON")
|
||||||
|
|
||||||
logger.info("Census block groups downloading complete")
|
logger.info("Census tract downloading complete")
|
||||||
|
|
||||||
def load(self) -> None:
|
def load(self) -> None:
|
||||||
"""Create state CSVs, National CSV, and National GeoJSON
|
"""Create state CSVs, National CSV, and National GeoJSON
|
||||||
|
|
|
@ -291,14 +291,27 @@ POVERTY_LOW_HS_EDUCATION_FIELD = (
|
||||||
" and has low HS education"
|
" and has low HS education"
|
||||||
)
|
)
|
||||||
|
|
||||||
LOW_READING_LOW_HS_EDUCATION_FIELD = (
|
LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
||||||
f"At or above the {PERCENTILE}th percentile for low 3rd grade reading proficiency"
|
f"At or above the {PERCENTILE}th percentile for low median household income as a "
|
||||||
" and has low HS education"
|
f"percent of area median income and has low HS education"
|
||||||
)
|
)
|
||||||
|
|
||||||
LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
# Workforce for island areas
|
||||||
f"At or below the {PERCENTILE}th percentile for low median household income as a "
|
ISLAND_AREAS_SUFFIX = " in 2009 (island areas)"
|
||||||
f"percent of area median income and has low HS education"
|
ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = (
|
||||||
|
f"At or above the {PERCENTILE}th percentile for unemployment"
|
||||||
|
f" and has low HS education{ISLAND_AREAS_SUFFIX}"
|
||||||
|
)
|
||||||
|
|
||||||
|
ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD = (
|
||||||
|
f"At or above the {PERCENTILE}th percentile for households at or below 100% federal poverty level"
|
||||||
|
f" and has low HS education{ISLAND_AREAS_SUFFIX}"
|
||||||
|
)
|
||||||
|
|
||||||
|
ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = (
|
||||||
|
f"At or above the {PERCENTILE}th percentile for low median household income as a "
|
||||||
|
f"percent of area median income"
|
||||||
|
f" and has low HS education{ISLAND_AREAS_SUFFIX}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Not currently used in a factor
|
# Not currently used in a factor
|
||||||
|
@ -317,6 +330,10 @@ HEALTHY_FOOD_LOW_INCOME_FIELD = (
|
||||||
f"At or above the {PERCENTILE}th percentile for low "
|
f"At or above the {PERCENTILE}th percentile for low "
|
||||||
f"access to healthy food and is low income"
|
f"access to healthy food and is low income"
|
||||||
)
|
)
|
||||||
|
LOW_READING_LOW_HS_EDUCATION_FIELD = (
|
||||||
|
f"At or above the {PERCENTILE}th percentile for low 3rd grade reading proficiency"
|
||||||
|
" and has low HS education"
|
||||||
|
)
|
||||||
|
|
||||||
THRESHOLD_COUNT = "Total threshold criteria exceeded"
|
THRESHOLD_COUNT = "Total threshold criteria exceeded"
|
||||||
|
|
||||||
|
|
|
@ -117,7 +117,7 @@ class ScoreL(Score):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.df[field_names.THRESHOLD_COUNT] += self.df[columns_for_subset].sum(
|
self.df[field_names.THRESHOLD_COUNT] += self.df[columns_for_subset].sum(
|
||||||
axis=1
|
axis=1, skipna=True
|
||||||
)
|
)
|
||||||
|
|
||||||
def add_columns(self) -> pd.DataFrame:
|
def add_columns(self) -> pd.DataFrame:
|
||||||
|
@ -162,7 +162,7 @@ class ScoreL(Score):
|
||||||
non_workforce_factors
|
non_workforce_factors
|
||||||
].any(axis=1)
|
].any(axis=1)
|
||||||
|
|
||||||
self.df["Definition L (percentile)"] = self.df[
|
self.df[field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX] = self.df[
|
||||||
field_names.SCORE_L_COMMUNITIES
|
field_names.SCORE_L_COMMUNITIES
|
||||||
].astype(int)
|
].astype(int)
|
||||||
|
|
||||||
|
@ -586,12 +586,16 @@ class ScoreL(Score):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Now, calculate workforce criteria for island territories.
|
# Now, calculate workforce criteria for island territories.
|
||||||
|
island_areas_workforce_eligibility_columns = [
|
||||||
|
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||||
|
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||||
|
]
|
||||||
|
|
||||||
# F a couple of values, create a combined field and criteria field.
|
|
||||||
# First, combine unemployment.
|
# First, combine unemployment.
|
||||||
(
|
(
|
||||||
self.df,
|
self.df,
|
||||||
unemployment_island_areas_criteria_field_name,
|
island_areas_unemployment_criteria_field_name,
|
||||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||||
df=self.df,
|
df=self.df,
|
||||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||||
|
@ -603,7 +607,7 @@ class ScoreL(Score):
|
||||||
# Next, combine poverty.
|
# Next, combine poverty.
|
||||||
(
|
(
|
||||||
self.df,
|
self.df,
|
||||||
poverty_island_areas_criteria_field_name,
|
island_areas_poverty_criteria_field_name,
|
||||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||||
df=self.df,
|
df=self.df,
|
||||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
||||||
|
@ -614,12 +618,12 @@ class ScoreL(Score):
|
||||||
|
|
||||||
# Also check whether low area median income is 90th percentile or higher
|
# Also check whether low area median income is 90th percentile or higher
|
||||||
# within the islands.
|
# within the islands.
|
||||||
low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name = (
|
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name = (
|
||||||
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
||||||
f"{field_names.PERCENTILE}th percentile"
|
f"{field_names.PERCENTILE}th percentile"
|
||||||
)
|
)
|
||||||
self.df[
|
self.df[
|
||||||
low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name
|
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name
|
||||||
] = (
|
] = (
|
||||||
self.df[
|
self.df[
|
||||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||||
|
@ -628,17 +632,40 @@ class ScoreL(Score):
|
||||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
workforce_combined_criteria_for_island_areas = (
|
island_areas_high_scool_achievement_rate_threshold = (
|
||||||
self.df[unemployment_island_areas_criteria_field_name]
|
|
||||||
| self.df[poverty_island_areas_criteria_field_name]
|
|
||||||
| self.df[
|
|
||||||
low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name
|
|
||||||
]
|
|
||||||
) & (
|
|
||||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.df[
|
||||||
|
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD
|
||||||
|
] = (
|
||||||
|
self.df[island_areas_unemployment_criteria_field_name]
|
||||||
|
& island_areas_high_scool_achievement_rate_threshold
|
||||||
|
)
|
||||||
|
|
||||||
|
self.df[field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||||
|
self.df[island_areas_poverty_criteria_field_name]
|
||||||
|
& island_areas_high_scool_achievement_rate_threshold
|
||||||
|
)
|
||||||
|
|
||||||
|
self.df[
|
||||||
|
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD
|
||||||
|
] = (
|
||||||
|
self.df[
|
||||||
|
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name
|
||||||
|
]
|
||||||
|
& island_areas_high_scool_achievement_rate_threshold
|
||||||
|
)
|
||||||
|
|
||||||
|
workforce_combined_criteria_for_island_areas = self.df[
|
||||||
|
island_areas_workforce_eligibility_columns
|
||||||
|
].any(axis="columns")
|
||||||
|
|
||||||
|
self._increment_total_eligibility_exceeded(
|
||||||
|
island_areas_workforce_eligibility_columns
|
||||||
|
)
|
||||||
|
|
||||||
percent_of_island_tracts_highlighted = (
|
percent_of_island_tracts_highlighted = (
|
||||||
100
|
100
|
||||||
* workforce_combined_criteria_for_island_areas.sum()
|
* workforce_combined_criteria_for_island_areas.sum()
|
||||||
|
|
2
data/data-pipeline/pytest.ini
Normal file
2
data/data-pipeline/pytest.ini
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[pytest]
|
||||||
|
norecursedirs = .git data
|
Loading…
Add table
Add a link
Reference in a new issue