mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-31 07:51:16 -07:00
YAML Config for Downloadable Assets (#1252)
* starting yaml config load work * working version for downloadable file * yaml file update * checkpoint * sort if needed * refactoring * moving config * checkpoint * old files * skipping downloadble tests for now * more modularization * more refactor, new excel yml * pylint * completed tabs * Update excel.yml * remvoing obsolete tests * addressing PR feedback * addressing changes * confirmed change in yaml breaks tests * safety bump * PR review * adding tests back * pylint * Incorporating latest score fields from Emma * incorporating newest fields from Emma * passing tests * adding shapefile aws sync * missing test * passing tests
This commit is contained in:
parent
ec139b113f
commit
6425beb9f4
12 changed files with 725 additions and 170 deletions
1
.github/workflows/combine-tilefy.yml
vendored
1
.github/workflows/combine-tilefy.yml
vendored
|
@ -68,3 +68,4 @@ jobs:
|
||||||
aws s3 rm s3://justice40-data/data-pipeline/data/score/tiles --recursive
|
aws s3 rm s3://justice40-data/data-pipeline/data/score/tiles --recursive
|
||||||
aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline/data/score/tiles --recursive --acl public-read
|
aws s3 cp ./data_pipeline/data/score/tiles/ s3://justice40-data/data-pipeline/data/score/tiles --recursive --acl public-read
|
||||||
aws s3 sync ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline/data/score/geojson --acl public-read --delete
|
aws s3 sync ./data_pipeline/data/score/geojson/ s3://justice40-data/data-pipeline/data/score/geojson --acl public-read --delete
|
||||||
|
aws s3 sync ./data_pipeline/data/score/shapefile/ s3://justice40-data/data-pipeline/data/score/shapefile --acl public-read --delete
|
||||||
|
|
250
data/data-pipeline/data_pipeline/content/config/csv.yml
Normal file
250
data/data-pipeline/data_pipeline/content/config/csv.yml
Normal file
|
@ -0,0 +1,250 @@
|
||||||
|
---
|
||||||
|
global_config:
|
||||||
|
sort_by_label: Census tract ID
|
||||||
|
rounding_num:
|
||||||
|
float: 2
|
||||||
|
loss_rate_percentage: 4
|
||||||
|
fields:
|
||||||
|
- score_name: GEOID10_TRACT
|
||||||
|
label: Census tract ID
|
||||||
|
format: string
|
||||||
|
- score_name: County Name
|
||||||
|
label: County Name
|
||||||
|
format: string
|
||||||
|
- score_name: State/Territory
|
||||||
|
label: State/Territory
|
||||||
|
format: string
|
||||||
|
- score_name: Total threshold criteria exceeded
|
||||||
|
label: Total threshold criteria exceeded
|
||||||
|
format: int64
|
||||||
|
- score_name: Definition M (communities)
|
||||||
|
label: Definition M (communities)
|
||||||
|
format: bool
|
||||||
|
- score_name: Total population
|
||||||
|
label: Total population
|
||||||
|
format: float
|
||||||
|
- score_name: Is low income and has a low percent of higher ed students?
|
||||||
|
label: Is low income and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
label: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Expected agricultural loss rate (Natural Hazards Risk Index)
|
||||||
|
label: Expected agricultural loss rate (Natural Hazards Risk Index)
|
||||||
|
format: loss_rate_percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Expected building loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
label: Expected building loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Expected building loss rate (Natural Hazards Risk Index)
|
||||||
|
label: Expected building loss rate (Natural Hazards Risk Index)
|
||||||
|
format: loss_rate_percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Expected population loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
label: Expected population loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Expected population loss rate (Natural Hazards Risk Index)
|
||||||
|
label: Expected population loss rate (Natural Hazards Risk Index)
|
||||||
|
format: loss_rate_percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Energy burden (percentile)
|
||||||
|
label: Energy burden (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Energy burden
|
||||||
|
label: Energy burden
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: PM2.5 in the air (percentile)
|
||||||
|
label: PM2.5 in the air (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: PM2.5 in the air
|
||||||
|
label: PM2.5 in the air
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Diesel particulate matter exposure (percentile)
|
||||||
|
label: Diesel particulate matter exposure (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Diesel particulate matter exposure
|
||||||
|
label: Diesel particulate matter exposure
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Traffic proximity and volume (percentile)
|
||||||
|
label: Traffic proximity and volume (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Traffic proximity and volume
|
||||||
|
label: Traffic proximity and volume
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Housing burden (percent) (percentile)
|
||||||
|
label: Housing burden (percent) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Housing burden (percent)
|
||||||
|
label: Housing burden (percent)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Percent pre-1960s housing (lead paint indicator) (percentile)
|
||||||
|
label: Percent pre-1960s housing (lead paint indicator) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent pre-1960s housing (lead paint indicator)
|
||||||
|
label: Percent pre-1960s housing (lead paint indicator)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Median value ($) of owner-occupied housing units (percentile)
|
||||||
|
label: Median value ($) of owner-occupied housing units (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Median value ($) of owner-occupied housing units
|
||||||
|
label: Median value ($) of owner-occupied housing units
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Proximity to hazardous waste sites (percentile)
|
||||||
|
label: Proximity to hazardous waste sites (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Proximity to hazardous waste sites
|
||||||
|
label: Proximity to hazardous waste sites
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Proximity to NPL sites (percentile)
|
||||||
|
label: Proximity to NPL sites (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Proximity to NPL sites
|
||||||
|
label: Proximity to NPL sites
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Proximity to Risk Management Plan (RMP) facilities (percentile)
|
||||||
|
label: Proximity to Risk Management Plan (RMP) facilities (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Proximity to Risk Management Plan (RMP) facilities
|
||||||
|
label: Proximity to Risk Management Plan (RMP) facilities
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Wastewater discharge (percentile)
|
||||||
|
label: Wastewater discharge (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Wastewater discharge
|
||||||
|
label: Wastewater discharge
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Current asthma among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
label: Current asthma among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Current asthma among adults aged greater than or equal to 18 years
|
||||||
|
label: Current asthma among adults aged greater than or equal to 18 years
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
label: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years
|
||||||
|
label: Diagnosed diabetes among adults aged greater than or equal to 18 years
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
label: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Coronary heart disease among adults aged greater than or equal to 18 years
|
||||||
|
label: Coronary heart disease among adults aged greater than or equal to 18 years
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Low life expectancy (percentile)
|
||||||
|
label: Low life expectancy (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Life expectancy (years)
|
||||||
|
label: Life expectancy (years)
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Low median household income as a percent of area median income (percentile)
|
||||||
|
label: Low median household income as a percent of area median income (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Median household income as a percent of area median income
|
||||||
|
label: Median household income as a percent of area median income
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Linguistic isolation (percent) (percentile)
|
||||||
|
label: Linguistic isolation (percent) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Linguistic isolation (percent)
|
||||||
|
label: Linguistic isolation (percent)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Unemployment (percent) (percentile)
|
||||||
|
label: Unemployment (percent) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Unemployment (percent)
|
||||||
|
label: Unemployment (percent)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Percent of individuals below 200% Federal Poverty Line (percentile)
|
||||||
|
label: Percent of individuals below 200% Federal Poverty Line (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent of individuals < 100% Federal Poverty Line (percentile)
|
||||||
|
label: Percent of individuals < 100% Federal Poverty Line (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent of individuals below 200% Federal Poverty Line
|
||||||
|
label: Percent of individuals below 200% Federal Poverty Line
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent of individuals < 100% Federal Poverty Line
|
||||||
|
label: Percent of individuals < 100% Federal Poverty Line
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent individuals age 25 or over with less than high school degree (percentile)
|
||||||
|
label: Percent individuals age 25 or over with less than high school degree (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent individuals age 25 or over with less than high school degree
|
||||||
|
label: Percent individuals age 25 or over with less than high school degree
|
||||||
|
format: percentage
|
||||||
|
- score_name: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
|
||||||
|
label: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
|
||||||
|
label: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?
|
||||||
|
label: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?
|
||||||
|
format: bool
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?
|
||||||
|
label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?
|
||||||
|
format: bool
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
|
||||||
|
label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
|
||||||
|
format: bool
|
255
data/data-pipeline/data_pipeline/content/config/excel.yml
Normal file
255
data/data-pipeline/data_pipeline/content/config/excel.yml
Normal file
|
@ -0,0 +1,255 @@
|
||||||
|
---
|
||||||
|
global_config:
|
||||||
|
sort_by_label: Census tract ID
|
||||||
|
rounding_num:
|
||||||
|
float: 2
|
||||||
|
loss_rate_percentage: 4
|
||||||
|
excel_config:
|
||||||
|
default_column_width: 30
|
||||||
|
sheets:
|
||||||
|
- main:
|
||||||
|
label: "Data"
|
||||||
|
fields:
|
||||||
|
- score_name: GEOID10_TRACT
|
||||||
|
label: Census tract ID
|
||||||
|
format: string
|
||||||
|
- score_name: County Name
|
||||||
|
label: County Name
|
||||||
|
format: string
|
||||||
|
- score_name: State/Territory
|
||||||
|
label: State/Territory
|
||||||
|
format: string
|
||||||
|
- score_name: Total threshold criteria exceeded
|
||||||
|
label: Total threshold criteria exceeded
|
||||||
|
format: int64
|
||||||
|
- score_name: Definition M (communities)
|
||||||
|
label: Definition M (communities)
|
||||||
|
format: bool
|
||||||
|
- score_name: Total population
|
||||||
|
label: Total population
|
||||||
|
format: float
|
||||||
|
- score_name: Is low income and has a low percent of higher ed students?
|
||||||
|
label: Is low income and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
label: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Expected agricultural loss rate (Natural Hazards Risk Index)
|
||||||
|
label: Expected agricultural loss rate (Natural Hazards Risk Index)
|
||||||
|
format: loss_rate_percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Expected building loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
label: Expected building loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Expected building loss rate (Natural Hazards Risk Index)
|
||||||
|
label: Expected building loss rate (Natural Hazards Risk Index)
|
||||||
|
format: loss_rate_percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Expected population loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
label: Expected population loss rate (Natural Hazards Risk Index) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Expected population loss rate (Natural Hazards Risk Index)
|
||||||
|
label: Expected population loss rate (Natural Hazards Risk Index)
|
||||||
|
format: loss_rate_percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Energy burden (percentile)
|
||||||
|
label: Energy burden (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Energy burden
|
||||||
|
label: Energy burden
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: PM2.5 in the air (percentile)
|
||||||
|
label: PM2.5 in the air (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: PM2.5 in the air
|
||||||
|
label: PM2.5 in the air
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Diesel particulate matter exposure (percentile)
|
||||||
|
label: Diesel particulate matter exposure (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Diesel particulate matter exposure
|
||||||
|
label: Diesel particulate matter exposure
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Traffic proximity and volume (percentile)
|
||||||
|
label: Traffic proximity and volume (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Traffic proximity and volume
|
||||||
|
label: Traffic proximity and volume
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Housing burden (percent) (percentile)
|
||||||
|
label: Housing burden (percent) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Housing burden (percent)
|
||||||
|
label: Housing burden (percent)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Percent pre-1960s housing (lead paint indicator) (percentile)
|
||||||
|
label: Percent pre-1960s housing (lead paint indicator) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent pre-1960s housing (lead paint indicator)
|
||||||
|
label: Percent pre-1960s housing (lead paint indicator)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Median value ($) of owner-occupied housing units (percentile)
|
||||||
|
label: Median value ($) of owner-occupied housing units (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Median value ($) of owner-occupied housing units
|
||||||
|
label: Median value ($) of owner-occupied housing units
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Proximity to hazardous waste sites (percentile)
|
||||||
|
label: Proximity to hazardous waste sites (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Proximity to hazardous waste sites
|
||||||
|
label: Proximity to hazardous waste sites
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Proximity to NPL sites (percentile)
|
||||||
|
label: Proximity to NPL sites (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Proximity to NPL sites
|
||||||
|
label: Proximity to NPL sites
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Proximity to Risk Management Plan (RMP) facilities (percentile)
|
||||||
|
label: Proximity to Risk Management Plan (RMP) facilities (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Proximity to Risk Management Plan (RMP) facilities
|
||||||
|
label: Proximity to Risk Management Plan (RMP) facilities
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Wastewater discharge (percentile)
|
||||||
|
label: Wastewater discharge (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Wastewater discharge
|
||||||
|
label: Wastewater discharge
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Current asthma among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
label: Current asthma among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Current asthma among adults aged greater than or equal to 18 years
|
||||||
|
label: Current asthma among adults aged greater than or equal to 18 years
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
label: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Diagnosed diabetes among adults aged greater than or equal to 18 years
|
||||||
|
label: Diagnosed diabetes among adults aged greater than or equal to 18 years
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
label: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Coronary heart disease among adults aged greater than or equal to 18 years
|
||||||
|
label: Coronary heart disease among adults aged greater than or equal to 18 years
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Low life expectancy (percentile)
|
||||||
|
label: Low life expectancy (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Life expectancy (years)
|
||||||
|
label: Life expectancy (years)
|
||||||
|
format: float
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Low median household income as a percent of area median income (percentile)
|
||||||
|
label: Low median household income as a percent of area median income (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Median household income as a percent of area median income
|
||||||
|
label: Median household income as a percent of area median income
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Linguistic isolation (percent) (percentile)
|
||||||
|
label: Linguistic isolation (percent) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Linguistic isolation (percent)
|
||||||
|
label: Linguistic isolation (percent)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Unemployment (percent) (percentile)
|
||||||
|
label: Unemployment (percent) (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Unemployment (percent)
|
||||||
|
label: Unemployment (percent)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?
|
||||||
|
format: bool
|
||||||
|
- score_name: Percent of individuals below 200% Federal Poverty Line (percentile)
|
||||||
|
label: Percent of individuals below 200% Federal Poverty Line (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent of individuals < 100% Federal Poverty Line (percentile)
|
||||||
|
label: Percent of individuals < 100% Federal Poverty Line (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent of individuals below 200% Federal Poverty Line
|
||||||
|
label: Percent of individuals below 200% Federal Poverty Line
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent of individuals < 100% Federal Poverty Line
|
||||||
|
label: Percent of individuals < 100% Federal Poverty Line
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent individuals age 25 or over with less than high school degree (percentile)
|
||||||
|
label: Percent individuals age 25 or over with less than high school degree (percentile)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percent individuals age 25 or over with less than high school degree
|
||||||
|
label: Percent individuals age 25 or over with less than high school degree
|
||||||
|
format: percentage
|
||||||
|
- score_name: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
|
||||||
|
label: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
|
||||||
|
label: Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)
|
||||||
|
format: percentage
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?
|
||||||
|
label: Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?
|
||||||
|
format: bool
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?
|
||||||
|
label: Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?
|
||||||
|
format: bool
|
||||||
|
- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
|
||||||
|
label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
|
||||||
|
format: bool
|
|
@ -39,6 +39,7 @@ class ExtractTransformLoad:
|
||||||
# Directories
|
# Directories
|
||||||
DATA_PATH: pathlib.Path = APP_ROOT / "data"
|
DATA_PATH: pathlib.Path = APP_ROOT / "data"
|
||||||
TMP_PATH: pathlib.Path = DATA_PATH / "tmp"
|
TMP_PATH: pathlib.Path = DATA_PATH / "tmp"
|
||||||
|
CONTENT_CONFIG: pathlib.Path = APP_ROOT / "content" / "config"
|
||||||
|
|
||||||
# Parameters
|
# Parameters
|
||||||
GEOID_FIELD_NAME: str = "GEOID10"
|
GEOID_FIELD_NAME: str = "GEOID10"
|
||||||
|
|
|
@ -272,104 +272,3 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
||||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX,
|
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||||
field_names.COLLEGE_ATTENDANCE_FIELD,
|
field_names.COLLEGE_ATTENDANCE_FIELD,
|
||||||
]
|
]
|
||||||
|
|
||||||
# Finally we augment with the GEOID10, county, and state
|
|
||||||
DOWNLOADABLE_SCORE_COLUMNS = [
|
|
||||||
field_names.GEOID_TRACT_FIELD,
|
|
||||||
field_names.COUNTY_FIELD,
|
|
||||||
field_names.STATE_FIELD,
|
|
||||||
field_names.THRESHOLD_COUNT,
|
|
||||||
field_names.SCORE_M_COMMUNITIES,
|
|
||||||
field_names.TOTAL_POP_FIELD,
|
|
||||||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES,
|
|
||||||
field_names.COLLEGE_ATTENDANCE_FIELD,
|
|
||||||
field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD,
|
|
||||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
|
|
||||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
|
|
||||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
|
|
||||||
field_names.ENERGY_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.ENERGY_BURDEN_FIELD,
|
|
||||||
field_names.PM25_EXPOSURE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.PM25_FIELD,
|
|
||||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.DIESEL_FIELD,
|
|
||||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.TRAFFIC_FIELD,
|
|
||||||
field_names.HOUSING_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.HOUSING_BURDEN_FIELD,
|
|
||||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.LEAD_PAINT_FIELD,
|
|
||||||
field_names.MEDIAN_HOUSE_VALUE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.MEDIAN_HOUSE_VALUE_FIELD,
|
|
||||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.TSDF_FIELD,
|
|
||||||
field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.NPL_FIELD,
|
|
||||||
field_names.RMP_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.RMP_FIELD,
|
|
||||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.WASTEWATER_FIELD,
|
|
||||||
field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.ASTHMA_FIELD,
|
|
||||||
field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.DIABETES_FIELD,
|
|
||||||
field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.HEART_DISEASE_FIELD,
|
|
||||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.LIFE_EXPECTANCY_FIELD,
|
|
||||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
|
|
||||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.LINGUISTIC_ISO_FIELD,
|
|
||||||
field_names.UNEMPLOYMENT_LOW_HS_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.UNEMPLOYMENT_FIELD,
|
|
||||||
field_names.POVERTY_LOW_HS_LOW_HIGHER_ED_FIELD,
|
|
||||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
|
||||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
|
|
||||||
field_names.HIGH_SCHOOL_ED_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
|
||||||
field_names.COMBINED_UNEMPLOYMENT_2010,
|
|
||||||
field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
|
||||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
|
||||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
|
||||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
|
||||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009
|
|
||||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009
|
|
||||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
|
||||||
]
|
|
||||||
|
|
|
@ -6,7 +6,13 @@ import pandas as pd
|
||||||
|
|
||||||
from data_pipeline.etl.base import ExtractTransformLoad
|
from data_pipeline.etl.base import ExtractTransformLoad
|
||||||
from data_pipeline.etl.score.etl_utils import floor_series
|
from data_pipeline.etl.score.etl_utils import floor_series
|
||||||
from data_pipeline.utils import get_module_logger, zip_files
|
from data_pipeline.utils import (
|
||||||
|
get_module_logger,
|
||||||
|
zip_files,
|
||||||
|
load_yaml_dict_from_file,
|
||||||
|
column_list_from_yaml_object_fields,
|
||||||
|
load_dict_from_yaml_object_fields,
|
||||||
|
)
|
||||||
from data_pipeline.score import field_names
|
from data_pipeline.score import field_names
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,6 +46,23 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
self.output_score_tiles_df: pd.DataFrame
|
self.output_score_tiles_df: pd.DataFrame
|
||||||
self.output_downloadable_df: pd.DataFrame
|
self.output_downloadable_df: pd.DataFrame
|
||||||
|
|
||||||
|
# Define some constants for the YAML file
|
||||||
|
# TODO: Implement this as a marshmallow schema.
|
||||||
|
# TODO: Ticket: https://github.com/usds/justice40-tool/issues/1327
|
||||||
|
self.yaml_fields_type_percentage_label = "percentage"
|
||||||
|
self.yaml_fields_type_loss_rate_percentage_label = (
|
||||||
|
"loss_rate_percentage"
|
||||||
|
)
|
||||||
|
self.yaml_fields_type_float_label = "float"
|
||||||
|
self.yaml_fields_type_string_label = "string"
|
||||||
|
self.yaml_fields_type_boolean_label = "bool"
|
||||||
|
self.yaml_fields_type_integer_label = "int64"
|
||||||
|
self.yaml_excel_sheet_label = "label"
|
||||||
|
self.yaml_global_config_rounding_num = "rounding_num"
|
||||||
|
self.yaml_global_config_rounding_num_float = "float"
|
||||||
|
self.yaml_global_config_sort_by_label = "sort_by_label"
|
||||||
|
# End YAML definition constants
|
||||||
|
|
||||||
def _extract_counties(self, county_path: Path) -> pd.DataFrame:
|
def _extract_counties(self, county_path: Path) -> pd.DataFrame:
|
||||||
logger.info("Reading Counties CSV")
|
logger.info("Reading Counties CSV")
|
||||||
return pd.read_csv(
|
return pd.read_csv(
|
||||||
|
@ -300,18 +323,27 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
return score_tiles
|
return score_tiles
|
||||||
|
|
||||||
def _create_downloadable_data(
|
def _create_downloadable_data(
|
||||||
self, score_county_state_merged_df: pd.DataFrame
|
self, score_df: pd.DataFrame, fields_object: dict, config_object: dict
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
df = score_county_state_merged_df[
|
|
||||||
constants.DOWNLOADABLE_SCORE_COLUMNS
|
df = score_df[
|
||||||
|
column_list_from_yaml_object_fields(
|
||||||
|
yaml_object=fields_object,
|
||||||
|
target_field="score_name",
|
||||||
|
)
|
||||||
].copy(deep=True)
|
].copy(deep=True)
|
||||||
|
|
||||||
df_of_float_columns = df.select_dtypes(include=["float64"])
|
column_type_dict = load_dict_from_yaml_object_fields(
|
||||||
|
yaml_object=fields_object,
|
||||||
|
object_key="score_name",
|
||||||
|
object_value="format",
|
||||||
|
)
|
||||||
|
|
||||||
for column in df_of_float_columns.columns:
|
for column in df.columns:
|
||||||
# TODO: create a schema for fields to make it more explicit and safe which
|
if (
|
||||||
# fields are percentages.
|
column_type_dict[column]
|
||||||
if any(x in column for x in constants.PERCENT_PREFIXES_SUFFIXES):
|
== self.yaml_fields_type_percentage_label
|
||||||
|
):
|
||||||
# Convert percentages from fractions between 0 and 1 to an integer
|
# Convert percentages from fractions between 0 and 1 to an integer
|
||||||
# from 0 to 100.
|
# from 0 to 100.
|
||||||
df_100 = df[column] * 100
|
df_100 = df[column] * 100
|
||||||
|
@ -320,26 +352,67 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
).astype("Int64")
|
).astype("Int64")
|
||||||
df[column] = df_int
|
df[column] = df_int
|
||||||
|
|
||||||
elif column in constants.FEMA_ROUND_NUM_COLUMNS:
|
elif (
|
||||||
|
column_type_dict[column]
|
||||||
|
== self.yaml_fields_type_loss_rate_percentage_label
|
||||||
|
):
|
||||||
# Convert loss rates by multiplying by 100 (they are percents)
|
# Convert loss rates by multiplying by 100 (they are percents)
|
||||||
# and then rounding appropriately.
|
# and then rounding appropriately.
|
||||||
df_100 = df[column] * 100
|
df_100 = df[column] * 100
|
||||||
df[column] = floor_series(
|
df[column] = floor_series(
|
||||||
series=df_100.astype(float64),
|
series=df_100.astype(float64),
|
||||||
number_of_decimals=constants.TILES_FEMA_ROUND_NUM_DECIMALS,
|
number_of_decimals=config_object[
|
||||||
|
self.yaml_global_config_rounding_num
|
||||||
|
][self.yaml_fields_type_loss_rate_percentage_label],
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
elif column_type_dict[column] == self.yaml_fields_type_float_label:
|
||||||
# Round all other floats.
|
# Round the floats.
|
||||||
df[column] = floor_series(
|
df[column] = floor_series(
|
||||||
series=df[column].astype(float64),
|
series=df[column].astype(float64),
|
||||||
number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS,
|
number_of_decimals=config_object[
|
||||||
|
self.yaml_global_config_rounding_num
|
||||||
|
][self.yaml_global_config_rounding_num_float],
|
||||||
)
|
)
|
||||||
|
|
||||||
# sort by tract id
|
elif column_type_dict[column] == self.yaml_fields_type_string_label:
|
||||||
df_sorted = df.sort_values(self.GEOID_TRACT_FIELD_NAME)
|
pass
|
||||||
|
|
||||||
return df_sorted
|
elif (
|
||||||
|
column_type_dict[column] == self.yaml_fields_type_boolean_label
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
|
||||||
|
elif (
|
||||||
|
column_type_dict[column] == self.yaml_fields_type_integer_label
|
||||||
|
):
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unrecognized type: `{column_type_dict[column]}`"
|
||||||
|
)
|
||||||
|
|
||||||
|
# rename fields
|
||||||
|
column_rename_dict = load_dict_from_yaml_object_fields(
|
||||||
|
yaml_object=fields_object,
|
||||||
|
object_key="score_name",
|
||||||
|
object_value="label",
|
||||||
|
)
|
||||||
|
renamed_df = df.rename(
|
||||||
|
columns=column_rename_dict,
|
||||||
|
inplace=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# sort if needed
|
||||||
|
if config_object.get(self.yaml_global_config_sort_by_label):
|
||||||
|
final_df = renamed_df.sort_values(
|
||||||
|
config_object[self.yaml_global_config_sort_by_label]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
final_df = renamed_df
|
||||||
|
|
||||||
|
return final_df
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
logger.info("Transforming data sources for Score + County CSVs")
|
logger.info("Transforming data sources for Score + County CSVs")
|
||||||
|
@ -358,14 +431,11 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
self.output_score_tiles_df = self._create_tile_data(
|
self.output_score_tiles_df = self._create_tile_data(
|
||||||
output_score_county_state_merged_df
|
output_score_county_state_merged_df
|
||||||
)
|
)
|
||||||
self.output_downloadable_df = self._create_downloadable_data(
|
|
||||||
output_score_county_state_merged_df
|
|
||||||
)
|
|
||||||
self.output_score_county_state_merged_df = (
|
self.output_score_county_state_merged_df = (
|
||||||
output_score_county_state_merged_df
|
output_score_county_state_merged_df
|
||||||
)
|
)
|
||||||
|
|
||||||
def _load_score_csv(
|
def _load_score_csv_full(
|
||||||
self, score_county_state_merged: pd.DataFrame, score_csv_path: Path
|
self, score_county_state_merged: pd.DataFrame, score_csv_path: Path
|
||||||
) -> None:
|
) -> None:
|
||||||
logger.info("Saving Full Score CSV with County Information")
|
logger.info("Saving Full Score CSV with County Information")
|
||||||
|
@ -379,8 +449,16 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
def _load_excel_from_df(
|
def _load_excel_from_df(
|
||||||
self, excel_df: pd.DataFrame, excel_path: Path
|
self, excel_df: pd.DataFrame, excel_path: Path
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
||||||
|
# open excel yaml config
|
||||||
|
excel_csv_config = load_yaml_dict_from_file(
|
||||||
|
self.CONTENT_CONFIG / "excel.yml"
|
||||||
|
)
|
||||||
|
|
||||||
# Define Excel Columns Column Width
|
# Define Excel Columns Column Width
|
||||||
num_excel_cols_width = 30
|
num_excel_cols_width = excel_csv_config["global_config"][
|
||||||
|
"excel_config"
|
||||||
|
]["default_column_width"]
|
||||||
|
|
||||||
# Create a Pandas Excel writer using XlsxWriter as the engine.
|
# Create a Pandas Excel writer using XlsxWriter as the engine.
|
||||||
with pd.ExcelWriter( # pylint: disable=abstract-class-instantiated
|
with pd.ExcelWriter( # pylint: disable=abstract-class-instantiated
|
||||||
|
@ -389,13 +467,23 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
engine="xlsxwriter",
|
engine="xlsxwriter",
|
||||||
) as writer:
|
) as writer:
|
||||||
|
|
||||||
|
for sheet in excel_csv_config["sheets"]:
|
||||||
|
excel_df = self._create_downloadable_data(
|
||||||
|
score_df=self.output_score_county_state_merged_df,
|
||||||
|
fields_object=sheet["fields"],
|
||||||
|
config_object=excel_csv_config["global_config"],
|
||||||
|
)
|
||||||
# Convert the dataframe to an XlsxWriter Excel object. We also turn off the
|
# Convert the dataframe to an XlsxWriter Excel object. We also turn off the
|
||||||
# index column at the left of the output dataframe.
|
# index column at the left of the output dataframe.
|
||||||
excel_df.to_excel(writer, sheet_name="Data", index=False)
|
excel_df.to_excel(
|
||||||
|
writer,
|
||||||
|
sheet_name=sheet[self.yaml_excel_sheet_label],
|
||||||
|
index=False,
|
||||||
|
)
|
||||||
|
|
||||||
# Get the xlsxwriter workbook and worksheet objects.
|
# Get the xlsxwriter workbook and worksheet objects.
|
||||||
workbook = writer.book
|
workbook = writer.book
|
||||||
worksheet = writer.sheets["Data"]
|
worksheet = writer.sheets[sheet[self.yaml_excel_sheet_label]]
|
||||||
|
|
||||||
# set header format
|
# set header format
|
||||||
header_format = workbook.add_format(
|
header_format = workbook.add_format(
|
||||||
|
@ -418,33 +506,33 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
tile_score_path.parent.mkdir(parents=True, exist_ok=True)
|
tile_score_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")
|
score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")
|
||||||
|
|
||||||
def _load_downloadable_zip(
|
def _load_downloadable_zip(self, downloadable_info_path: Path) -> None:
|
||||||
self, downloadable_df: pd.DataFrame, downloadable_info_path: Path
|
|
||||||
) -> None:
|
|
||||||
logger.info("Saving Downloadable CSV")
|
logger.info("Saving Downloadable CSV")
|
||||||
|
|
||||||
downloadable_info_path.mkdir(parents=True, exist_ok=True)
|
downloadable_info_path.mkdir(parents=True, exist_ok=True)
|
||||||
csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
|
csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
|
||||||
excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
|
excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
|
||||||
zip_path = constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH
|
zip_path = constants.SCORE_DOWNLOADABLE_ZIP_FILE_PATH
|
||||||
|
# TODO: reinstate when PDF is added back
|
||||||
# pdf_path = constants.SCORE_DOWNLOADABLE_PDF_FILE_PATH
|
# pdf_path = constants.SCORE_DOWNLOADABLE_PDF_FILE_PATH
|
||||||
|
|
||||||
# Rename score column
|
|
||||||
downloadable_df_copy = downloadable_df.rename(
|
|
||||||
columns={
|
|
||||||
DISADVANTAGED_COMMUNITIES_FIELD: "Identified as disadvantaged (v0.1)"
|
|
||||||
},
|
|
||||||
inplace=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info("Writing downloadable excel")
|
logger.info("Writing downloadable excel")
|
||||||
self._load_excel_from_df(downloadable_df_copy, excel_path)
|
self._load_excel_from_df(
|
||||||
|
excel_df=self.output_score_county_state_merged_df,
|
||||||
|
excel_path=excel_path,
|
||||||
|
)
|
||||||
|
|
||||||
logger.info("Writing downloadable csv")
|
logger.info("Writing downloadable csv")
|
||||||
downloadable_df_copy[self.GEOID_TRACT_FIELD_NAME] = (
|
# open yaml config
|
||||||
'"' + downloadable_df_copy[self.GEOID_TRACT_FIELD_NAME] + '"'
|
downloadable_csv_config = load_yaml_dict_from_file(
|
||||||
|
self.CONTENT_CONFIG / "csv.yml"
|
||||||
)
|
)
|
||||||
downloadable_df_copy.to_csv(csv_path, index=False)
|
downloadable_df = self._create_downloadable_data(
|
||||||
|
score_df=self.output_score_county_state_merged_df,
|
||||||
|
fields_object=downloadable_csv_config["fields"],
|
||||||
|
config_object=downloadable_csv_config["global_config"],
|
||||||
|
)
|
||||||
|
downloadable_df.to_csv(csv_path, index=False)
|
||||||
|
|
||||||
logger.info("Compressing files")
|
logger.info("Compressing files")
|
||||||
files_to_compress = [
|
files_to_compress = [
|
||||||
|
@ -454,13 +542,11 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
zip_files(zip_path, files_to_compress)
|
zip_files(zip_path, files_to_compress)
|
||||||
|
|
||||||
def load(self) -> None:
|
def load(self) -> None:
|
||||||
self._load_score_csv(
|
self._load_score_csv_full(
|
||||||
self.output_score_county_state_merged_df,
|
self.output_score_county_state_merged_df,
|
||||||
constants.FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH,
|
constants.FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH,
|
||||||
)
|
)
|
||||||
self._load_tile_csv(
|
self._load_tile_csv(
|
||||||
self.output_score_tiles_df, constants.DATA_SCORE_CSV_TILES_FILE_PATH
|
self.output_score_tiles_df, constants.DATA_SCORE_CSV_TILES_FILE_PATH
|
||||||
)
|
)
|
||||||
self._load_downloadable_zip(
|
self._load_downloadable_zip(constants.SCORE_DOWNLOADABLE_DIR)
|
||||||
self.output_downloadable_df, constants.SCORE_DOWNLOADABLE_DIR
|
|
||||||
)
|
|
||||||
|
|
|
@ -41,6 +41,11 @@ def etl(monkeypatch, root):
|
||||||
etl = PostScoreETL()
|
etl = PostScoreETL()
|
||||||
monkeypatch.setattr(etl, "DATA_PATH", root)
|
monkeypatch.setattr(etl, "DATA_PATH", root)
|
||||||
monkeypatch.setattr(etl, "TMP_PATH", tmp_path)
|
monkeypatch.setattr(etl, "TMP_PATH", tmp_path)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
etl,
|
||||||
|
"CONTENT_CONFIG",
|
||||||
|
Path.cwd() / "data_pipeline" / "content" / "config",
|
||||||
|
)
|
||||||
|
|
||||||
return etl
|
return etl
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -7,6 +7,7 @@ import pandas.api.types as ptypes
|
||||||
import pandas.testing as pdt
|
import pandas.testing as pdt
|
||||||
|
|
||||||
from data_pipeline.etl.score import constants
|
from data_pipeline.etl.score import constants
|
||||||
|
from data_pipeline.utils import load_yaml_dict_from_file
|
||||||
|
|
||||||
# See conftest.py for all fixtures used in these tests
|
# See conftest.py for all fixtures used in these tests
|
||||||
|
|
||||||
|
@ -92,8 +93,13 @@ def test_create_tile_data(etl, score_data_expected, tile_data_expected):
|
||||||
def test_create_downloadable_data(
|
def test_create_downloadable_data(
|
||||||
etl, score_data_expected, downloadable_data_expected
|
etl, score_data_expected, downloadable_data_expected
|
||||||
):
|
):
|
||||||
|
downloadable_csv_config = load_yaml_dict_from_file(
|
||||||
|
etl.CONTENT_CONFIG / "csv.yml"
|
||||||
|
)
|
||||||
output_downloadable_df_actual = etl._create_downloadable_data(
|
output_downloadable_df_actual = etl._create_downloadable_data(
|
||||||
score_data_expected
|
score_data_expected,
|
||||||
|
fields_object=downloadable_csv_config["fields"],
|
||||||
|
config_object=downloadable_csv_config["global_config"],
|
||||||
)
|
)
|
||||||
pdt.assert_frame_equal(
|
pdt.assert_frame_equal(
|
||||||
output_downloadable_df_actual,
|
output_downloadable_df_actual,
|
||||||
|
@ -101,9 +107,9 @@ def test_create_downloadable_data(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_load_score_csv(etl, score_data_expected):
|
def test_load_score_csv_full(etl, score_data_expected):
|
||||||
reload(constants)
|
reload(constants)
|
||||||
etl._load_score_csv(
|
etl._load_score_csv_full(
|
||||||
score_data_expected,
|
score_data_expected,
|
||||||
constants.FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH,
|
constants.FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH,
|
||||||
)
|
)
|
||||||
|
@ -112,26 +118,25 @@ def test_load_score_csv(etl, score_data_expected):
|
||||||
|
|
||||||
def test_load_tile_csv(etl, tile_data_expected):
|
def test_load_tile_csv(etl, tile_data_expected):
|
||||||
reload(constants)
|
reload(constants)
|
||||||
etl._load_score_csv(
|
etl._load_score_csv_full(
|
||||||
tile_data_expected, constants.DATA_SCORE_CSV_TILES_FILE_PATH
|
tile_data_expected, constants.DATA_SCORE_CSV_TILES_FILE_PATH
|
||||||
)
|
)
|
||||||
assert constants.DATA_SCORE_CSV_TILES_FILE_PATH.is_file()
|
assert constants.DATA_SCORE_CSV_TILES_FILE_PATH.is_file()
|
||||||
|
|
||||||
|
|
||||||
def test_load_downloadable_zip(etl, monkeypatch, downloadable_data_expected):
|
def test_load_downloadable_zip(etl, monkeypatch, score_data_expected):
|
||||||
reload(constants)
|
reload(constants)
|
||||||
STATIC_FILES_PATH = (
|
static_files_path = (
|
||||||
Path.cwd() / "data_pipeline" / "files"
|
Path.cwd() / "data_pipeline" / "files"
|
||||||
) # need to monkeypatch to real dir
|
) # need to monkeypatch to real dir
|
||||||
monkeypatch.setattr(constants, "FILES_PATH", STATIC_FILES_PATH)
|
monkeypatch.setattr(constants, "FILES_PATH", static_files_path)
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
constants,
|
constants,
|
||||||
"SCORE_DOWNLOADABLE_PDF_FILE_PATH",
|
"SCORE_DOWNLOADABLE_PDF_FILE_PATH",
|
||||||
STATIC_FILES_PATH / constants.SCORE_DOWNLOADABLE_PDF_FILE_NAME,
|
static_files_path / constants.SCORE_DOWNLOADABLE_PDF_FILE_NAME,
|
||||||
)
|
|
||||||
etl._load_downloadable_zip(
|
|
||||||
downloadable_data_expected, constants.SCORE_DOWNLOADABLE_DIR
|
|
||||||
)
|
)
|
||||||
|
etl.output_score_county_state_merged_df = score_data_expected
|
||||||
|
etl._load_downloadable_zip(constants.SCORE_DOWNLOADABLE_DIR)
|
||||||
assert constants.SCORE_DOWNLOADABLE_DIR.is_dir()
|
assert constants.SCORE_DOWNLOADABLE_DIR.is_dir()
|
||||||
assert constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH.is_file()
|
assert constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH.is_file()
|
||||||
assert constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH.is_file()
|
assert constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH.is_file()
|
||||||
|
|
|
@ -9,6 +9,7 @@ import zipfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import urllib3
|
import urllib3
|
||||||
import requests
|
import requests
|
||||||
|
import yaml
|
||||||
|
|
||||||
from data_pipeline.config import settings
|
from data_pipeline.config import settings
|
||||||
|
|
||||||
|
@ -322,6 +323,57 @@ def zip_directory(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_yaml_dict_from_file(yaml_file_path: Path) -> dict:
|
||||||
|
"""Load a YAML file specified in path into a Python dictionary.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
yaml_file_path (int): the path to the YAML file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: the parsed YAML object as a Python dictionary
|
||||||
|
"""
|
||||||
|
with open(yaml_file_path, encoding="UTF-8") as file:
|
||||||
|
yaml_dict = yaml.load(file, Loader=yaml.FullLoader)
|
||||||
|
return yaml_dict
|
||||||
|
|
||||||
|
|
||||||
|
def column_list_from_yaml_object_fields(
|
||||||
|
yaml_object: dict, target_field: str
|
||||||
|
) -> list:
|
||||||
|
"""Creates a list of the columns from a YAML score configuration file fields list.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
yaml_object (dict): raw dictionary returned from reading the YAML score configuration file
|
||||||
|
target_field (str): the dict field to extract
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list: a list of all the fields that match the target field
|
||||||
|
"""
|
||||||
|
yaml_list = []
|
||||||
|
for field in yaml_object:
|
||||||
|
yaml_list.append(field[target_field])
|
||||||
|
return yaml_list
|
||||||
|
|
||||||
|
|
||||||
|
def load_dict_from_yaml_object_fields(
|
||||||
|
yaml_object: dict, object_key: str, object_value: str
|
||||||
|
) -> dict:
|
||||||
|
"""Creates a dictionary with a configurable key and value from a YAML score configuration file fields list.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
yaml_object (dict): raw dictionary returned from reading the YAML score configuratio nfile
|
||||||
|
object_key (str): key for the dictionary
|
||||||
|
object_value (str): value for the dictionary
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: a dict with the specified keys and values
|
||||||
|
"""
|
||||||
|
yaml_dict = {}
|
||||||
|
for field in yaml_object:
|
||||||
|
yaml_dict[field[object_key]] = field[object_value]
|
||||||
|
return yaml_dict
|
||||||
|
|
||||||
|
|
||||||
def get_excel_column_name(index: int) -> str:
|
def get_excel_column_name(index: int) -> str:
|
||||||
"""Map a numeric index to the appropriate column in Excel. E.g., column #95 is "CR".
|
"""Map a numeric index to the appropriate column in Excel. E.g., column #95 is "CR".
|
||||||
Only works for the first 1000 columns.
|
Only works for the first 1000 columns.
|
||||||
|
|
|
@ -25,6 +25,7 @@ us = "^2.0.2"
|
||||||
xlsxwriter = "^2.0.0"
|
xlsxwriter = "^2.0.0"
|
||||||
ipdb = "^0.13.9"
|
ipdb = "^0.13.9"
|
||||||
pylint = "^2.11.1"
|
pylint = "^2.11.1"
|
||||||
|
PyYAML = "^6.0"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
black = {version = "^21.6b0", allow-prereleases = true}
|
black = {version = "^21.6b0", allow-prereleases = true}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue