From 7fcecaee428fe76c92024363ae34af92a7808b8d Mon Sep 17 00:00:00 2001 From: Lucas Merrill Brown Date: Fri, 10 Dec 2021 10:16:22 -0500 Subject: [PATCH] Issue 970: reverse percentiles for AMI and life expectancy (#1018) * switching to low * fixing score-etl-post * updating comments * fixing comparison * create separate field for clarity * comment fix * removing healthy food * fixing bug in score post * running black and adding comment * Update pickles and add a helpful notes to README Co-authored-by: Shelby Switzer --- data/data-pipeline/README.md | 4 +- .../data_pipeline/etl/score/constants.py | 22 ++-- .../data_pipeline/etl/score/etl_score.py | 21 ++-- .../tests/sample_data/score_data_initial.csv | 6 +- .../snapshots/downloadable_data_expected.pkl | Bin 3809 -> 3831 bytes .../tests/snapshots/score_data_expected.pkl | Bin 14202 -> 43082 bytes .../snapshots/score_transformed_expected.pkl | Bin 13825 -> 42699 bytes .../tests/snapshots/tile_data_expected.pkl | Bin 3243 -> 3267 bytes ...pare_two_score_files_for_differences.ipynb | 61 ++++++++--- .../data_pipeline/score/field_names.py | 30 ++++-- .../data_pipeline/score/score_l.py | 100 +++++++++--------- 11 files changed, 144 insertions(+), 100 deletions(-) diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md index 716f8ce9..15c1a959 100644 --- a/data/data-pipeline/README.md +++ b/data/data-pipeline/README.md @@ -309,7 +309,7 @@ If you update the score in any way, it is necessary to create new pickles so tha It starts with the `data_pipeline/etl/score/tests/sample_data/score_data_initial.csv`, which is the first two rows of the `score/full/usa.csv`. -To update this file, run a full score generation and then update the file as follows: +To update this file, run a full score generation, then open a Python shell from the `data-pipeline` directory (e.g. `poetry run python3`), and then update the file with the following commands: ``` import pickle from pathlib import Path @@ -322,6 +322,8 @@ score_initial_df = pd.read_csv(score_csv_path, dtype={"GEOID10_TRACT": "string"} score_initial_df.to_csv(data_path / "data_pipeline" / "etl" / "score" / "tests" / "sample_data" /"score_data_initial.csv", index=False) ``` +Now you can move on to updating inidvidual pickles for the tests. Note that it is helpful to do them in this order: + We have four pickle files that correspond to expected files: - `score_data_expected.pkl`: Initial score without counties - `score_transformed_expected.pkl`: Intermediate score with `etl._extract_score` and `etl. _transform_score` applied. There's no file for this intermediate process, so we need to capture the pickle mid-process. diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index d3338a82..17b218d2 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -81,7 +81,7 @@ TILES_SCORE_COLUMNS = [ field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, @@ -89,7 +89,7 @@ TILES_SCORE_COLUMNS = [ field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD + field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.POVERTY_LESS_THAN_200_FPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, @@ -115,7 +115,7 @@ TILES_SCORE_FLOAT_COLUMNS = [ field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.FEMA_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, @@ -123,7 +123,7 @@ TILES_SCORE_FLOAT_COLUMNS = [ field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD + field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.POVERTY_LESS_THAN_200_FPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, @@ -137,7 +137,6 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [ field_names.DIABETES_FIELD, field_names.ASTHMA_FIELD, field_names.HEART_DISEASE_FIELD, - field_names.LIFE_EXPECTANCY_FIELD, field_names.TRAFFIC_FIELD, field_names.FEMA_RISK_FIELD, field_names.ENERGY_BURDEN_FIELD, @@ -149,11 +148,11 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC = [ field_names.TOTAL_POP_FIELD, ] -# For every indicator above, we want to include percentile and min-max normalized variants also +# For every indicator above, we want to include percentile also. DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list( pd.core.common.flatten( [ - [p, f"{p} (percentile)"] + [p, f"{p}{field_names.PERCENTILE_FIELD_SUFFIX}"] for p in DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_BASIC ] ) @@ -165,8 +164,15 @@ DOWNLOADABLE_SCORE_COLUMNS = [ field_names.COUNTY_FIELD, field_names.STATE_FIELD, field_names.SCORE_G_COMMUNITIES, + # Note: the reverse percentile fields get moved down here because + # we put the raw value in the download along with the *reversed* percentile. + # All other fields we put in f"{field_name}" and + # f"{field_name}{field_names.PERCENTILE_FIELD_SUFFIX}", which doesn't work for the + # reversed percentile fields. field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD, - field_names.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD + field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.LIFE_EXPECTANCY_FIELD, + field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, *DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL, ] diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 38ce2ace..ff8c6aaf 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -404,9 +404,7 @@ class ScoreETL(ExtractTransformLoad): field_names.POVERTY_LESS_THAN_150_FPL_FIELD, field_names.POVERTY_LESS_THAN_200_FPL_FIELD, field_names.AMI_FIELD, - field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD, field_names.MEDIAN_INCOME_FIELD, - field_names.LIFE_EXPECTANCY_FIELD, field_names.ENERGY_BURDEN_FIELD, field_names.FEMA_RISK_FIELD, field_names.URBAN_HEURISTIC_FIELD, @@ -439,7 +437,6 @@ class ScoreETL(ExtractTransformLoad): field_names.CENSUS_UNEMPLOYMENT_FIELD_2010, field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010, field_names.CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009, - field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009, field_names.EXTREME_HEAT_FIELD, field_names.HEALTHY_FOOD_FIELD, field_names.IMPENETRABLE_SURFACES_FIELD, @@ -468,7 +465,19 @@ class ScoreETL(ExtractTransformLoad): ReversePercentile( field_name=field_names.READING_FIELD, low_field_name=field_names.LOW_READING_FIELD, - ) + ), + ReversePercentile( + field_name=field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD, + low_field_name=field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD, + ), + ReversePercentile( + field_name=field_names.LIFE_EXPECTANCY_FIELD, + low_field_name=field_names.LOW_LIFE_EXPECTANCY_FIELD, + ), + ReversePercentile( + field_name=field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009, + low_field_name=field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009, + ), ] columns_to_keep = ( @@ -505,10 +514,6 @@ class ScoreETL(ExtractTransformLoad): max_value = df_copy[numeric_column].max(skipna=True) - logger.info( - f"For data set {numeric_column}, the min value is {min_value} and the max value is {max_value}." - ) - df_copy[f"{numeric_column}{field_names.MIN_MAX_FIELD_SUFFIX}"] = ( df_copy[numeric_column] - min_value ) / (max_value - min_value) diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index d04ba002..c986e88d 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10_TRACT,Persistent Poverty Census Tract,Housing burden (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged >=18 years,Coronary heart disease among adults aged >=18 years,Cancer (excluding skin cancer) among adults aged >=18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged >=18 years,Physical health not good for >=14 days among adults aged >=18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals < 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income (% of AMI),Median household income in the past 12 months,Life expectancy (years),Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter,Particulate matter (PM2.5),Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to TSDF sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployed civilians (percent),Housing + Transportation Costs % Income for the Regional Typical Household,Median value ($) of owner-occupied housing units,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Housing burden (percent) (percentile),Housing burden (percent) (min-max normalized),Total population (percentile),Total population (min-max normalized),Median household income (% of state median household income) (percentile),Median household income (% of state median household income) (min-max normalized),Current asthma among adults aged >=18 years (percentile),Current asthma among adults aged >=18 years (min-max normalized),Coronary heart disease among adults aged >=18 years (percentile),Coronary heart disease among adults aged >=18 years (min-max normalized),Cancer (excluding skin cancer) among adults aged >=18 years (percentile),Cancer (excluding skin cancer) among adults aged >=18 years (min-max normalized),Current lack of health insurance among adults aged 18-64 years (percentile),Current lack of health insurance among adults aged 18-64 years (min-max normalized),Diagnosed diabetes among adults aged >=18 years (percentile),Diagnosed diabetes among adults aged >=18 years (min-max normalized),Physical health not good for >=14 days among adults aged >=18 years (percentile),Physical health not good for >=14 days among adults aged >=18 years (min-max normalized),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 100% Federal Poverty Line (min-max normalized),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (min-max normalized),Percent of individuals < 200% Federal Poverty Line (percentile),Percent of individuals < 200% Federal Poverty Line (min-max normalized),Area Median Income (State or metropolitan) (percentile),Area Median Income (State or metropolitan) (min-max normalized),Median household income (% of AMI) (percentile),Median household income (% of AMI) (min-max normalized),Median household income in the past 12 months (percentile),Median household income in the past 12 months (min-max normalized),Life expectancy (years) (percentile),Life expectancy (years) (min-max normalized),Energy burden (percentile),Energy burden (min-max normalized),FEMA Risk Index Expected Annual Loss Score (percentile),FEMA Risk Index Expected Annual Loss Score (min-max normalized),Urban Heuristic Flag (percentile),Urban Heuristic Flag (min-max normalized),Air toxics cancer risk (percentile),Air toxics cancer risk (min-max normalized),Respiratory hazard index (percentile),Respiratory hazard index (min-max normalized),Diesel particulate matter (percentile),Diesel particulate matter (min-max normalized),Particulate matter (PM2.5) (percentile),Particulate matter (PM2.5) (min-max normalized),Ozone (percentile),Ozone (min-max normalized),Traffic proximity and volume (percentile),Traffic proximity and volume (min-max normalized),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to Risk Management Plan (RMP) facilities (min-max normalized),Proximity to TSDF sites (percentile),Proximity to TSDF sites (min-max normalized),Proximity to NPL sites (percentile),Proximity to NPL sites (min-max normalized),Wastewater discharge (percentile),Wastewater discharge (min-max normalized),Percent pre-1960s housing (lead paint indicator) (percentile),Percent pre-1960s housing (lead paint indicator) (min-max normalized),Individuals under 5 years old (percentile),Individuals under 5 years old (min-max normalized),Individuals over 64 years old (percentile),Individuals over 64 years old (min-max normalized),Linguistic isolation (percent) (percentile),Linguistic isolation (percent) (min-max normalized),Percent of households in linguistic isolation (percentile),Percent of households in linguistic isolation (min-max normalized),Poverty (Less than 200% of federal poverty line) (percentile),Poverty (Less than 200% of federal poverty line) (min-max normalized),Percent individuals age 25 or over with less than high school degree (percentile),Percent individuals age 25 or over with less than high school degree (min-max normalized),Unemployed civilians (percent) (percentile),Unemployed civilians (percent) (min-max normalized),Housing + Transportation Costs % Income for the Regional Typical Household (percentile),Housing + Transportation Costs % Income for the Regional Typical Household (min-max normalized),Median value ($) of owner-occupied housing units (percentile),Median value ($) of owner-occupied housing units (min-max normalized),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected building loss rate (Natural Hazards Risk Index) (min-max normalized),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (min-max normalized),Expected population loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (min-max normalized),Score A,Score B,Socioeconomic Factors,Sensitive populations,Environmental effects,Exposures,Pollution Burden,Population Characteristics,Score C,Score D,Score E,"Low AMI, Low HS graduation",Meets socioeconomic criteria,Meets burden criteria,Score F (communities),Score G (communities),Score G,Score G (percentile),Score H (communities),Score H,Score I (communities),Score I,Score I (percentile),NMTC (communities),Score K (communities),Climate Factor (Definition L),Energy Factor (Definition L),Transportation Factor (Definition L),Housing Factor (Definition L),Pollution Factor (Definition L),Water Factor (Definition L),Health Factor (Definition L),Workforce Factor (Definition L),Definition L (communities),Any Non-Workforce Factor (Definition L),Definition L (percentile),Score A (percentile),Score A (top 25th percentile),Score A (top 30th percentile),Score A (top 35th percentile),Score A (top 40th percentile),Score B (percentile),Score B (top 25th percentile),Score B (top 30th percentile),Score B (top 35th percentile),Score B (top 40th percentile),Score C (percentile),Score C (top 25th percentile),Score C (top 30th percentile),Score C (top 35th percentile),Score C (top 40th percentile),Score D (percentile),Score D (top 25th percentile),Score D (top 30th percentile),Score D (top 35th percentile),Score D (top 40th percentile),Score E (percentile),Score E (top 25th percentile),Score E (top 30th percentile),Score E (top 35th percentile),Score E (top 40th percentile) -01073001100,True,0.3555555555555555,4897.0,0.7327449738800064,11.6,8.0,6.6,15.3,18.5,15.6,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,0.6445941476491375,37030.0,70.3,0.05,37.016020762747445,1.0,51.1530304943,0.735568574566,0.63998588,10.3951975342,39.404630719,253.995131498,2.03499777769,0.708723792992,0.134193041308,1.0556674669,0.205868653936,0.0308352052277,0.185011231366,0.0,0.0,0.407205697528,0.0911016949153,0.0092071611253196,38.0,85500.0,0.0199399484122346,0.0100179814652433,0.0003727376748275,0.7759834149819756,0.2585858585858585,0.6522319452040524,0.0696873532467162,0.2579768630616088,0.1567394572745385,0.8778835393887099,0.4078947368421052,0.7543109840773348,0.2112676056338028,0.4831470520823502,0.3,0.5996548516266466,0.2028753993610224,0.9542880640681668,0.4214285714285714,0.7464709927060441,0.3172043010752688,0.6168263717723972,0.150375939849624,0.7284793525682426,0.318796992481203,0.6286362018742833,0.3744360902255639,0.2971496201383377,0.2232322025800286,0.1757741606170599,0.1364573208439614,0.159739042112665,0.1395180645004889,0.0288201562824969,0.3398058252427184,0.8503016535630522,0.1111111111111111,0.8804885569871426,0.3701602076274744,0.5990586969676596,1.0,0.9853145934585578,0.0300384933188293,0.9694864048338367,0.1696726613313424,0.7707211349008275,0.1053235274194042,0.9307172437981412,0.5130755332417333,0.2444726227893863,0.2834163202031902,0.4679683805148301,0.0081194347824311,0.9043675287131808,0.1192079731904979,0.437416985702008,0.0016327034632494,0.7570383102846576,0.0172289593763426,0.924320063066614,9.412822248180045e-06,0.4488762142875745,0.205868653936,0.1217933692736411,0.0793595720750642,0.6929577831601427,0.185011231366,0.1279192436010707,0.0,0.1287405266036409,0.0,0.665989530432065,0.407205697528,0.4562465817642003,0.0911016949153,0.0304587252654599,0.0092071611253196,0.0933347766081238,0.2372881355932203,0.1107833945360314,0.0379401628742081,0.7913088977138564,0.0640255252302748,0.6494845360824743,0.0687832451226217,0.8598534256706785,0.0093618689816499,0.5611180560981327,0.3038554467503745,0.274954028134698,0.3142234653449515,0.694403820410807,0.7281133967159299,0.716876871280889,0.2945887467398247,0.2111838590774037,0.1532200824308956,0.4113194992089544,False,True,True,True,True,1,1,True,1,True,1,1,True,True,False,False,False,False,False,False,False,False,False,False,0,0.5707151600385447,False,False,False,False,0.5743417454488632,False,False,False,False,0.420866651834208,False,False,False,False,0.5840820897465948,False,False,False,False,0.3836627861551683,False,False,False,False -01073001400,True,0.2601092896174863,1906.0,0.7136694633528574,11.0,9.4,7.2,18.4,20.4,16.6,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,0.6278134628440127,36066.0,71.0,0.07,47.948511946777465,1.0,54.6950518653,0.76056054321,0.9098084377,10.498270137,39.3676601307,3015.87969265,1.81382525188,3.24085850684,0.214095348703,0.365101735929,0.628839590444,0.0582371458552,0.178908709339,0.0245098039215686,0.0165289256198,0.425498426023,0.148840688108,0.1150121065375302,44.0,67800.0,0.0995560141720271,0.0108783755900761,0.0018653260326052,0.5663862883099391,0.1891703924490809,0.0873245305622835,0.0271235644860611,0.2380414312617702,0.1524802086740484,0.8045192865135969,0.3684210526315788,0.8943994283480067,0.2507042253521127,0.6245163203947635,0.33,0.7302382332717638,0.2523961661341853,0.9743093661943348,0.4666666666666666,0.8111121597389815,0.3440860215053763,0.8631193240981165,0.2816032887975334,0.7965704042494299,0.3679342240493319,0.7848791996731208,0.4835560123329907,0.2971496201383377,0.2232322025800286,0.1615528584392014,0.1326629858757091,0.1480323983644555,0.1356231464796244,0.0390929078646344,0.3567961165048544,0.9660879441830278,0.1666666666666666,0.9676082963519111,0.4794851194677746,0.5990586969676596,1.0,0.9898725863654276,0.0324014302096176,0.9773807960068304,0.1759137053158734,0.9050177328254302,0.1497290027189099,0.9346157477978174,0.5213510451938572,0.2429887631355303,0.2826912370192468,0.943152489696642,0.0964083008791804,0.8841706383311196,0.1062519253676046,0.761049040289606,0.0074660410167418,0.8677110190900331,0.027487565893985,0.8909234242304798,3.255416928678718e-06,0.8349740865171759,0.628839590444,0.5187970414355288,0.1498830619032175,0.6589707529234055,0.178908709339,0.5804662877639023,0.0245098039215686,0.5016472641091752,0.0165289256198,0.6940255749147068,0.425498426023,0.6771362346016616,0.148840688108,0.9015492846415304,0.1150121065375302,0.2296362947527941,0.288135593220339,0.050687553715134,0.0290456994515583,0.983216164001288,0.3197166420251072,0.6757017645468937,0.0746906926653988,0.9859745848181268,0.0468739884070143,0.6855809047581842,0.469949864514998,0.6007989306039736,0.5860780273742789,0.8477656416916828,0.8321713526379463,0.8373694489891917,0.5934384789891263,0.4969272521601087,0.1806062834078359,0.683912734046348,False,True,True,True,True,1,1,True,1,True,1,1,True,True,True,True,True,False,False,False,True,True,True,True,1,0.7019884365966091,False,True,True,True,0.7102898663958122,False,True,True,True,0.9565888835084873,True,True,True,True,0.6881136547126078,False,False,True,True,0.7675469437716488,True,True,True,True +GEOID10_TRACT,Persistent Poverty Census Tract,Housing burden (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged >=18 years,Coronary heart disease among adults aged >=18 years,Cancer (excluding skin cancer) among adults aged >=18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged >=18 years,Physical health not good for >=14 days among adults aged >=18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals < 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter,Particulate matter (PM2.5),Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to TSDF sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployed civilians (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployed civilians (percent) in 2009,Unemployed civilians (percent) in 2010,Percent of individuals < 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Housing burden (percent) (value urban only),Housing burden (percent) (percentile urban only),Housing burden (percent) (value rural only),Housing burden (percent) (percentile rural only),Housing burden (percent) (percentile urban/rural),Housing burden (percent) (min-max normalized),Total population (percentile),Total population (value urban only),Total population (percentile urban only),Total population (value rural only),Total population (percentile rural only),Total population (percentile urban/rural),Total population (min-max normalized),Median household income (% of state median household income) (percentile),Median household income (% of state median household income) (value urban only),Median household income (% of state median household income) (percentile urban only),Median household income (% of state median household income) (value rural only),Median household income (% of state median household income) (percentile rural only),Median household income (% of state median household income) (percentile urban/rural),Median household income (% of state median household income) (min-max normalized),Current asthma among adults aged >=18 years (percentile),Current asthma among adults aged >=18 years (value urban only),Current asthma among adults aged >=18 years (percentile urban only),Current asthma among adults aged >=18 years (value rural only),Current asthma among adults aged >=18 years (percentile rural only),Current asthma among adults aged >=18 years (percentile urban/rural),Current asthma among adults aged >=18 years (min-max normalized),Coronary heart disease among adults aged >=18 years (percentile),Coronary heart disease among adults aged >=18 years (value urban only),Coronary heart disease among adults aged >=18 years (percentile urban only),Coronary heart disease among adults aged >=18 years (value rural only),Coronary heart disease among adults aged >=18 years (percentile rural only),Coronary heart disease among adults aged >=18 years (percentile urban/rural),Coronary heart disease among adults aged >=18 years (min-max normalized),Cancer (excluding skin cancer) among adults aged >=18 years (percentile),Cancer (excluding skin cancer) among adults aged >=18 years (value urban only),Cancer (excluding skin cancer) among adults aged >=18 years (percentile urban only),Cancer (excluding skin cancer) among adults aged >=18 years (value rural only),Cancer (excluding skin cancer) among adults aged >=18 years (percentile rural only),Cancer (excluding skin cancer) among adults aged >=18 years (percentile urban/rural),Cancer (excluding skin cancer) among adults aged >=18 years (min-max normalized),Current lack of health insurance among adults aged 18-64 years (percentile),Current lack of health insurance among adults aged 18-64 years (value urban only),Current lack of health insurance among adults aged 18-64 years (percentile urban only),Current lack of health insurance among adults aged 18-64 years (value rural only),Current lack of health insurance among adults aged 18-64 years (percentile rural only),Current lack of health insurance among adults aged 18-64 years (percentile urban/rural),Current lack of health insurance among adults aged 18-64 years (min-max normalized),Diagnosed diabetes among adults aged >=18 years (percentile),Diagnosed diabetes among adults aged >=18 years (value urban only),Diagnosed diabetes among adults aged >=18 years (percentile urban only),Diagnosed diabetes among adults aged >=18 years (value rural only),Diagnosed diabetes among adults aged >=18 years (percentile rural only),Diagnosed diabetes among adults aged >=18 years (percentile urban/rural),Diagnosed diabetes among adults aged >=18 years (min-max normalized),Physical health not good for >=14 days among adults aged >=18 years (percentile),Physical health not good for >=14 days among adults aged >=18 years (value urban only),Physical health not good for >=14 days among adults aged >=18 years (percentile urban only),Physical health not good for >=14 days among adults aged >=18 years (value rural only),Physical health not good for >=14 days among adults aged >=18 years (percentile rural only),Physical health not good for >=14 days among adults aged >=18 years (percentile urban/rural),Physical health not good for >=14 days among adults aged >=18 years (min-max normalized),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 100% Federal Poverty Line (value urban only),Percent of individuals < 100% Federal Poverty Line (percentile urban only),Percent of individuals < 100% Federal Poverty Line (value rural only),Percent of individuals < 100% Federal Poverty Line (percentile rural only),Percent of individuals < 100% Federal Poverty Line (percentile urban/rural),Percent of individuals < 100% Federal Poverty Line (min-max normalized),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (value urban only),Percent of individuals < 150% Federal Poverty Line (percentile urban only),Percent of individuals < 150% Federal Poverty Line (value rural only),Percent of individuals < 150% Federal Poverty Line (percentile rural only),Percent of individuals < 150% Federal Poverty Line (percentile urban/rural),Percent of individuals < 150% Federal Poverty Line (min-max normalized),Percent of individuals < 200% Federal Poverty Line (percentile),Percent of individuals < 200% Federal Poverty Line (value urban only),Percent of individuals < 200% Federal Poverty Line (percentile urban only),Percent of individuals < 200% Federal Poverty Line (value rural only),Percent of individuals < 200% Federal Poverty Line (percentile rural only),Percent of individuals < 200% Federal Poverty Line (percentile urban/rural),Percent of individuals < 200% Federal Poverty Line (min-max normalized),Area Median Income (State or metropolitan) (percentile),Area Median Income (State or metropolitan) (value urban only),Area Median Income (State or metropolitan) (percentile urban only),Area Median Income (State or metropolitan) (value rural only),Area Median Income (State or metropolitan) (percentile rural only),Area Median Income (State or metropolitan) (percentile urban/rural),Area Median Income (State or metropolitan) (min-max normalized),Median household income in the past 12 months (percentile),Median household income in the past 12 months (value urban only),Median household income in the past 12 months (percentile urban only),Median household income in the past 12 months (value rural only),Median household income in the past 12 months (percentile rural only),Median household income in the past 12 months (percentile urban/rural),Median household income in the past 12 months (min-max normalized),Energy burden (percentile),Energy burden (value urban only),Energy burden (percentile urban only),Energy burden (value rural only),Energy burden (percentile rural only),Energy burden (percentile urban/rural),Energy burden (min-max normalized),FEMA Risk Index Expected Annual Loss Score (percentile),FEMA Risk Index Expected Annual Loss Score (value urban only),FEMA Risk Index Expected Annual Loss Score (percentile urban only),FEMA Risk Index Expected Annual Loss Score (value rural only),FEMA Risk Index Expected Annual Loss Score (percentile rural only),FEMA Risk Index Expected Annual Loss Score (percentile urban/rural),FEMA Risk Index Expected Annual Loss Score (min-max normalized),Urban Heuristic Flag (percentile),Urban Heuristic Flag (value urban only),Urban Heuristic Flag (percentile urban only),Urban Heuristic Flag (value rural only),Urban Heuristic Flag (percentile rural only),Urban Heuristic Flag (percentile urban/rural),Urban Heuristic Flag (min-max normalized),Air toxics cancer risk (percentile),Air toxics cancer risk (value urban only),Air toxics cancer risk (percentile urban only),Air toxics cancer risk (value rural only),Air toxics cancer risk (percentile rural only),Air toxics cancer risk (percentile urban/rural),Air toxics cancer risk (min-max normalized),Respiratory hazard index (percentile),Respiratory hazard index (value urban only),Respiratory hazard index (percentile urban only),Respiratory hazard index (value rural only),Respiratory hazard index (percentile rural only),Respiratory hazard index (percentile urban/rural),Respiratory hazard index (min-max normalized),Diesel particulate matter (percentile),Diesel particulate matter (value urban only),Diesel particulate matter (percentile urban only),Diesel particulate matter (value rural only),Diesel particulate matter (percentile rural only),Diesel particulate matter (percentile urban/rural),Diesel particulate matter (min-max normalized),Particulate matter (PM2.5) (percentile),Particulate matter (PM2.5) (value urban only),Particulate matter (PM2.5) (percentile urban only),Particulate matter (PM2.5) (value rural only),Particulate matter (PM2.5) (percentile rural only),Particulate matter (PM2.5) (percentile urban/rural),Particulate matter (PM2.5) (min-max normalized),Ozone (percentile),Ozone (value urban only),Ozone (percentile urban only),Ozone (value rural only),Ozone (percentile rural only),Ozone (percentile urban/rural),Ozone (min-max normalized),Traffic proximity and volume (percentile),Traffic proximity and volume (value urban only),Traffic proximity and volume (percentile urban only),Traffic proximity and volume (value rural only),Traffic proximity and volume (percentile rural only),Traffic proximity and volume (percentile urban/rural),Traffic proximity and volume (min-max normalized),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to Risk Management Plan (RMP) facilities (value urban only),Proximity to Risk Management Plan (RMP) facilities (percentile urban only),Proximity to Risk Management Plan (RMP) facilities (value rural only),Proximity to Risk Management Plan (RMP) facilities (percentile rural only),Proximity to Risk Management Plan (RMP) facilities (percentile urban/rural),Proximity to Risk Management Plan (RMP) facilities (min-max normalized),Proximity to TSDF sites (percentile),Proximity to TSDF sites (value urban only),Proximity to TSDF sites (percentile urban only),Proximity to TSDF sites (value rural only),Proximity to TSDF sites (percentile rural only),Proximity to TSDF sites (percentile urban/rural),Proximity to TSDF sites (min-max normalized),Proximity to NPL sites (percentile),Proximity to NPL sites (value urban only),Proximity to NPL sites (percentile urban only),Proximity to NPL sites (value rural only),Proximity to NPL sites (percentile rural only),Proximity to NPL sites (percentile urban/rural),Proximity to NPL sites (min-max normalized),Wastewater discharge (percentile),Wastewater discharge (value urban only),Wastewater discharge (percentile urban only),Wastewater discharge (value rural only),Wastewater discharge (percentile rural only),Wastewater discharge (percentile urban/rural),Wastewater discharge (min-max normalized),Percent pre-1960s housing (lead paint indicator) (percentile),Percent pre-1960s housing (lead paint indicator) (value urban only),Percent pre-1960s housing (lead paint indicator) (percentile urban only),Percent pre-1960s housing (lead paint indicator) (value rural only),Percent pre-1960s housing (lead paint indicator) (percentile rural only),Percent pre-1960s housing (lead paint indicator) (percentile urban/rural),Percent pre-1960s housing (lead paint indicator) (min-max normalized),Individuals under 5 years old (percentile),Individuals under 5 years old (value urban only),Individuals under 5 years old (percentile urban only),Individuals under 5 years old (value rural only),Individuals under 5 years old (percentile rural only),Individuals under 5 years old (percentile urban/rural),Individuals under 5 years old (min-max normalized),Individuals over 64 years old (percentile),Individuals over 64 years old (value urban only),Individuals over 64 years old (percentile urban only),Individuals over 64 years old (value rural only),Individuals over 64 years old (percentile rural only),Individuals over 64 years old (percentile urban/rural),Individuals over 64 years old (min-max normalized),Linguistic isolation (percent) (percentile),Linguistic isolation (percent) (value urban only),Linguistic isolation (percent) (percentile urban only),Linguistic isolation (percent) (value rural only),Linguistic isolation (percent) (percentile rural only),Linguistic isolation (percent) (percentile urban/rural),Linguistic isolation (percent) (min-max normalized),Percent of households in linguistic isolation (percentile),Percent of households in linguistic isolation (value urban only),Percent of households in linguistic isolation (percentile urban only),Percent of households in linguistic isolation (value rural only),Percent of households in linguistic isolation (percentile rural only),Percent of households in linguistic isolation (percentile urban/rural),Percent of households in linguistic isolation (min-max normalized),Poverty (Less than 200% of federal poverty line) (percentile),Poverty (Less than 200% of federal poverty line) (value urban only),Poverty (Less than 200% of federal poverty line) (percentile urban only),Poverty (Less than 200% of federal poverty line) (value rural only),Poverty (Less than 200% of federal poverty line) (percentile rural only),Poverty (Less than 200% of federal poverty line) (percentile urban/rural),Poverty (Less than 200% of federal poverty line) (min-max normalized),Percent individuals age 25 or over with less than high school degree (percentile),Percent individuals age 25 or over with less than high school degree (value urban only),Percent individuals age 25 or over with less than high school degree (percentile urban only),Percent individuals age 25 or over with less than high school degree (value rural only),Percent individuals age 25 or over with less than high school degree (percentile rural only),Percent individuals age 25 or over with less than high school degree (percentile urban/rural),Percent individuals age 25 or over with less than high school degree (min-max normalized),Unemployed civilians (percent) (percentile),Unemployed civilians (percent) (value urban only),Unemployed civilians (percent) (percentile urban only),Unemployed civilians (percent) (value rural only),Unemployed civilians (percent) (percentile rural only),Unemployed civilians (percent) (percentile urban/rural),Unemployed civilians (percent) (min-max normalized),Median value ($) of owner-occupied housing units (percentile),Median value ($) of owner-occupied housing units (value urban only),Median value ($) of owner-occupied housing units (percentile urban only),Median value ($) of owner-occupied housing units (value rural only),Median value ($) of owner-occupied housing units (percentile rural only),Median value ($) of owner-occupied housing units (percentile urban/rural),Median value ($) of owner-occupied housing units (min-max normalized),Percent enrollment in college or graduate school (percentile),Percent enrollment in college or graduate school (value urban only),Percent enrollment in college or graduate school (percentile urban only),Percent enrollment in college or graduate school (value rural only),Percent enrollment in college or graduate school (percentile rural only),Percent enrollment in college or graduate school (percentile urban/rural),Percent enrollment in college or graduate school (min-max normalized),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected building loss rate (Natural Hazards Risk Index) (value urban only),Expected building loss rate (Natural Hazards Risk Index) (percentile urban only),Expected building loss rate (Natural Hazards Risk Index) (value rural only),Expected building loss rate (Natural Hazards Risk Index) (percentile rural only),Expected building loss rate (Natural Hazards Risk Index) (percentile urban/rural),Expected building loss rate (Natural Hazards Risk Index) (min-max normalized),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (value urban only),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile urban only),Expected agricultural loss rate (Natural Hazards Risk Index) (value rural only),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile rural only),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile urban/rural),Expected agricultural loss rate (Natural Hazards Risk Index) (min-max normalized),Expected population loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (value urban only),Expected population loss rate (Natural Hazards Risk Index) (percentile urban only),Expected population loss rate (Natural Hazards Risk Index) (value rural only),Expected population loss rate (Natural Hazards Risk Index) (percentile rural only),Expected population loss rate (Natural Hazards Risk Index) (percentile urban/rural),Expected population loss rate (Natural Hazards Risk Index) (min-max normalized),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (value urban only),Percent individuals age 25 or over with less than high school degree in 2009 (percentile urban only),Percent individuals age 25 or over with less than high school degree in 2009 (value rural only),Percent individuals age 25 or over with less than high school degree in 2009 (percentile rural only),Percent individuals age 25 or over with less than high school degree in 2009 (percentile urban/rural),Percent individuals age 25 or over with less than high school degree in 2009 (min-max normalized),Percentage households below 100% of federal poverty line in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (value urban only),Percentage households below 100% of federal poverty line in 2009 (percentile urban only),Percentage households below 100% of federal poverty line in 2009 (value rural only),Percentage households below 100% of federal poverty line in 2009 (percentile rural only),Percentage households below 100% of federal poverty line in 2009 (percentile urban/rural),Percentage households below 100% of federal poverty line in 2009 (min-max normalized),Unemployed civilians (percent) in 2009 (percentile),Unemployed civilians (percent) in 2009 (value urban only),Unemployed civilians (percent) in 2009 (percentile urban only),Unemployed civilians (percent) in 2009 (value rural only),Unemployed civilians (percent) in 2009 (percentile rural only),Unemployed civilians (percent) in 2009 (percentile urban/rural),Unemployed civilians (percent) in 2009 (min-max normalized),Unemployed civilians (percent) in 2010 (percentile),Unemployed civilians (percent) in 2010 (value urban only),Unemployed civilians (percent) in 2010 (percentile urban only),Unemployed civilians (percent) in 2010 (value rural only),Unemployed civilians (percent) in 2010 (percentile rural only),Unemployed civilians (percent) in 2010 (percentile urban/rural),Unemployed civilians (percent) in 2010 (min-max normalized),Percent of individuals < 100% Federal Poverty Line in 2010 (percentile),Percent of individuals < 100% Federal Poverty Line in 2010 (value urban only),Percent of individuals < 100% Federal Poverty Line in 2010 (percentile urban only),Percent of individuals < 100% Federal Poverty Line in 2010 (value rural only),Percent of individuals < 100% Federal Poverty Line in 2010 (percentile rural only),Percent of individuals < 100% Federal Poverty Line in 2010 (percentile urban/rural),Percent of individuals < 100% Federal Poverty Line in 2010 (min-max normalized),Total population in 2009 (percentile),Total population in 2009 (value urban only),Total population in 2009 (percentile urban only),Total population in 2009 (value rural only),Total population in 2009 (percentile rural only),Total population in 2009 (percentile urban/rural),Total population in 2009 (min-max normalized),Summer days above 90F (percentile),Summer days above 90F (value urban only),Summer days above 90F (percentile urban only),Summer days above 90F (value rural only),Summer days above 90F (percentile rural only),Summer days above 90F (percentile urban/rural),Summer days above 90F (min-max normalized),Percent low access to healthy food (percentile),Percent low access to healthy food (value urban only),Percent low access to healthy food (percentile urban only),Percent low access to healthy food (value rural only),Percent low access to healthy food (percentile rural only),Percent low access to healthy food (percentile urban/rural),Percent low access to healthy food (min-max normalized),Percent impenetrable surface areas (percentile),Percent impenetrable surface areas (value urban only),Percent impenetrable surface areas (percentile urban only),Percent impenetrable surface areas (value rural only),Percent impenetrable surface areas (percentile rural only),Percent impenetrable surface areas (percentile urban/rural),Percent impenetrable surface areas (min-max normalized),Low third grade reading proficiency (percentile),Third grade reading proficiency (value urban only),Low third grade reading proficiency (percentile urban only),Third grade reading proficiency (value rural only),Low third grade reading proficiency (percentile rural only),Low third grade reading proficiency (percentile urban/rural),Low median household income as a percent of area median income (percentile),Median household income as a percent of area median income (value urban only),Low median household income as a percent of area median income (percentile urban only),Median household income as a percent of area median income (value rural only),Low median household income as a percent of area median income (percentile rural only),Low median household income as a percent of area median income (percentile urban/rural),Low life expectancy (percentile),Life expectancy (years) (value urban only),Low life expectancy (percentile urban only),Life expectancy (years) (value rural only),Low life expectancy (percentile rural only),Low life expectancy (percentile urban/rural),Low median household income as a percent of territory median income in 2009 (percentile),Median household income as a percent of territory median income in 2009 (value urban only),Low median household income as a percent of territory median income in 2009 (percentile urban only),Median household income as a percent of territory median income in 2009 (value rural only),Low median household income as a percent of territory median income in 2009 (percentile rural only),Low median household income as a percent of territory median income in 2009 (percentile urban/rural),Total population in 2009 (island areas) and 2019 (states and PR),Score A,Score B,Socioeconomic Factors,Sensitive populations,Environmental effects,Exposures,Pollution Burden,Population Characteristics,Score C,Score D,Score E,"Low AMI, Low HS graduation",Meets socioeconomic criteria,Meets burden criteria,Score F (communities),Score G (communities),Score G,Score G (percentile),Score H (communities),Score H,Score I (communities),Score I,Score I (percentile),NMTC (communities),Score K (communities),Total threshold criteria exceeded,Is low income,At or above the 90th percentile for expected population loss rate and is low income,At or above the 90th percentile for expected agriculture loss rate and is low income,At or above the 90th percentile for expected building loss rate and is low income,Climate Factor (Definition L),At or above the 90th percentile for PM2.5 exposure and is low income,At or above the 90th percentile for energy burden and is low income,Energy Factor (Definition L),At or above the 90th percentile for diesel particulate matter and is low income,At or above the 90th percentile for traffic proximity and is low income,Transportation Factor (Definition L),At or above the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income,At or above the 90th percentile for housing burden and is low income,Housing Factor (Definition L),At or above the 90th percentile for proximity to RMP sites and is low income,At or above the 90th percentile for proximity to superfund sites and is low income,At or above the 90th percentile for proximity to hazardous waste facilities and is low income,Pollution Factor (Definition L),At or above the 90th percentile for wastewater discharge and is low income,Water Factor (Definition L),At or above the 90th percentile for diabetes and is low income,At or above the 90th percentile for asthma and is low income,At or above the 90th percentile for heart disease and is low income,At or above the 90th percentile for low life expectancy and is low income,Health Factor (Definition L),At or above the 90th percentile for households in linguistic isolation and has low HS education,At or above the 90th percentile for households at or below 100% federal poverty level and has low HS education,At or below the 90th percentile for low median household income as a percent of area median income and has low HS education,At or above the 90th percentile for unemployment and has low HS education,Unemployed civilians (percent) in 2009 (island areas) and 2010 (states and PR),Unemployed civilians (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Workforce Factor (Definition L),Definition L (communities),Any Non-Workforce Factor (Definition L),Definition L (percentile),Score A (percentile),Score A (top 25th percentile),Score A (top 30th percentile),Score A (top 35th percentile),Score A (top 40th percentile),Score B (percentile),Score B (top 25th percentile),Score B (top 30th percentile),Score B (top 35th percentile),Score B (top 40th percentile),Score C (percentile),Score C (top 25th percentile),Score C (top 30th percentile),Score C (top 35th percentile),Score C (top 40th percentile),Score D (percentile),Score D (top 25th percentile),Score D (top 30th percentile),Score D (top 35th percentile),Score D (top 40th percentile),Score E (percentile),Score E (top 25th percentile),Score E (top 30th percentile),Score E (top 35th percentile),Score E (top 40th percentile) +01073001100,True,0.2752043596730245,4897.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,51.1530304943,0.735568574566,0.63998588,10.3951975342,39.404630719,253.995131498,2.03499777769,0.708723792992,0.134193041308,1.0556674669,0.205868653936,0.0308352052277,0.185011231366,0.0,0.0,0.407205697528,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.0004047858,0.0052243632,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2752043596730245,0.5700318443928049,,,0.5700318443928049,0.2752043596730245,0.6512411994432508,4897.0,0.6248587667340021,,,0.6248587667340021,0.0696873532467162,0.2608335861859708,0.7327449738800064,0.2830143458759886,,,0.2830143458759886,0.1567394572745385,0.8509696039125366,11.2,0.835724928163468,,,0.835724928163468,0.3795180722891565,0.7264920810941454,7.2,0.7844637979353648,,,0.7844637979353648,0.1875,0.4789587420739856,6.7,0.5678012699989358,,,0.5678012699989358,0.303921568627451,0.6191105803406409,16.6,0.6018127638440527,,,0.6018127638440527,0.2094488188976378,0.965388552418323,19.3,0.9599666536592288,,,0.9599666536592288,0.4275862068965517,0.697012994398476,15.1,0.7279967363155841,,,0.7279967363155841,0.3440233236151603,0.62043226838371,0.150375939849624,0.6145691950863328,,,0.6145691950863328,0.150375939849624,0.7319580582099551,0.318796992481203,0.7209861695730607,,,0.7209861695730607,0.318796992481203,0.6304939233619051,0.3744360902255639,0.6277982991151962,,,0.6277982991151962,0.3744360902255639,0.305080182775983,57447.0,0.2657396925099514,,,0.2657396925099514,0.2232322025800286,0.1531195524384805,37030.0,0.1572054644434553,,,0.1572054644434553,0.1395180645004889,0.864954517474865,0.049,0.9005173954895489,,,0.9005173954895489,0.0037065052950075,0.6038301323911519,18.7674524286,0.6383816255626487,,,0.6383816255626487,0.1876745242859999,0.5972204988211937,1.0,0.5000085573944445,,,0.5000085573944445,1.0,0.9847958474680182,51.1530304943,0.9819551446670092,,,0.9819551446670092,0.0300384933188293,0.968365553602812,0.735568574566,0.962352336928608,,,0.962352336928608,0.1696726613313424,0.7632321085543794,0.63998588,0.70386920047937,,,0.70386920047937,0.1053235274194042,0.928945948563286,10.3951975342,0.914819538763832,,,0.914819538763832,0.5130755332417333,0.2507228532296667,39.404630719,0.2360646695853011,,,0.2360646695853011,0.2834163202031902,0.4608804184722397,253.995131498,0.3487028727629068,,,0.3487028727629068,0.0081194347824311,0.9022445642626452,2.03499777769,0.8805423357414318,,,0.8805423357414318,0.1192079731904979,0.4244672369292306,0.708723792992,0.2870715924264731,,,0.2870715924264731,0.0016327034632494,0.753733057661383,0.134193041308,0.7050707022289178,,,0.7050707022289178,0.0172289593763426,0.9223869977156124,1.0556674669,0.9078447963253958,,,0.9078447963253958,9.412822248180045e-06,0.4454669531492817,0.205868653936,0.4334577327353032,,,0.4334577327353032,0.205868653936,0.1202416183565086,0.0308352052277,0.108484267470127,,,0.108484267470127,0.0793595720750642,0.695774381427278,0.185011231366,0.7452494265073442,,,0.7452494265073442,0.185011231366,0.1272208772721677,0.0,0.0997988895955446,,,0.0997988895955446,0.0,0.1284712368751773,0.0,0.0986150580340329,,,0.0986150580340329,0.0,0.668247726382076,0.407205697528,0.6576848015886603,,,0.6576848015886603,0.407205697528,0.4189274017467249,0.0821917808219178,0.4443872826422471,,,0.4443872826422471,0.0821917808219178,0.0297871177547618,0.0092071611253196,0.0285699569046924,,,0.0285699569046924,0.0092071611253196,0.1124511830921203,85500.0,0.1074913548122025,,,0.1074913548122025,0.0379401628742081,0.7459685121012851,0.0890751899397432,0.6989610478672336,,,0.6989610478672336,0.0890751899397432,0.7846412062513758,0.0004047858,0.82138883658833,,,0.82138883658833,0.01597287904858,0.5120452999319701,0.0052243632,0.5028314457028648,,,0.5028314457028648,0.0095124530398832,0.6143028498159407,2.8039e-06,0.6689315237296548,,,0.6689315237296548,0.0013204531501148,,,,,,,,,,,,,,,,,,,,,,0.9349594607528132,0.1536983669548511,0.9315960352498582,,,0.9315960352498582,0.1536983669548511,0.8950599559730369,0.3189099613330878,0.8870754376621368,,,0.8870754376621368,0.3189099613330878,,,,,,,,0.7537922665342821,62.666668,0.7585193484443679,,,0.7585193484443679,0.4423529401799308,0.8019598155467721,0.068036923,0.7905320987865615,,,0.7905320987865615,0.068036923,0.4126953421856217,0.171,0.2701910981682835,,,0.2701910981682835,0.1775700879261655,0.990724418702258,58.143433,0.9890918535191758,,,0.9890918535191758,0.8209741753282674,0.6445941476491375,0.7947837521917007,,,0.7947837521917007,0.97046998263836,70.3,0.9652438793658,,,0.9652438793658,,,,,,,4897.0,0.5435875640644005,0.2799472837363994,0.3784219111378495,0.3144122923519848,0.6896597619436307,0.7261571216484003,0.7139913350801438,0.3464171017449171,0.2473388089694474,0.1547617998296523,0.3781718392172476,False,True,True,True,True,1,1,True,1,True,1,1,True,True,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,False,0.3189099613330878,False,False,False,False,False,0,0.5532898204078324,False,False,False,False,0.5490856986899564,False,False,False,False,0.5480683083104825,False,False,False,False,0.5438440021080796,False,False,False,False,0.3320495668977446,False,False,False,False +01073001400,True,0.1823529411764705,1906.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,54.6950518653,0.76056054321,0.9098084377,10.498270137,39.3676601307,3015.87969265,1.81382525188,3.24085850684,0.214095348703,0.365101735929,0.628839590444,0.0582371458552,0.178908709339,0.0245098039215686,0.0165289256198,0.425498426023,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.0008951111,0.0067284885,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.1823529411764705,0.2730613650055943,,,0.2730613650055943,0.1823529411764705,0.0867961243766976,1906.0,0.0702656897319135,,,0.0702656897319135,0.0271235644860611,0.2409731608418613,0.7136694633528574,0.263412880473859,,,0.263412880473859,0.1524802086740484,0.8385794307486707,11.1,0.8242488204618823,,,0.8242488204618823,0.3734939759036144,0.9217563763541756,9.1,0.934238532761006,,,0.934238532761006,0.2391304347826086,0.6048579715089994,7.3,0.6840345524850119,,,0.6840345524850119,0.3333333333333333,0.7894025988796952,21.4,0.765821774451027,,,0.765821774451027,0.2850393700787401,0.9878088657624612,22.4,0.9854641881585016,,,0.9854641881585016,0.4988505747126436,0.8447283118655634,17.4,0.8547216999538827,,,0.8547216999538827,0.411078717201166,0.8689250707460116,0.2816032887975334,0.8593419809294734,,,0.8593419809294734,0.2816032887975334,0.8013233263612626,0.3679342240493319,0.7879907224465252,,,0.7879907224465252,0.3679342240493319,0.7892247330790578,0.4835560123329907,0.7755862898376428,,,0.7755862898376428,0.4835560123329907,0.305080182775983,57447.0,0.2657396925099514,,,0.2657396925099514,0.2232322025800286,0.1410656612748476,36066.0,0.1454103468346629,,,0.1454103468346629,0.1356231464796244,0.970802270706518,0.07,0.9745685506050604,,,0.9745685506050604,0.0052950075642965,0.5282998116553705,17.3011023381,0.565061869961834,,,0.565061869961834,0.173011023381,0.5972204988211937,1.0,0.5000085573944445,,,0.5000085573944445,1.0,0.9895233034972276,54.6950518653,0.98758774182503,,,0.98758774182503,0.0324014302096176,0.9765534529502324,0.76056054321,0.9720766991953432,,,0.9720766991953432,0.1759137053158734,0.9016362174902248,0.9098084377,0.8766135935627462,,,0.8766135935627462,0.1497290027189099,0.9329441224077584,10.498270137,0.9196283911889414,,,0.9196283911889414,0.5213510451938572,0.2492010569566841,39.3676601307,0.2346340790789065,,,0.2346340790789065,0.2826912370192468,0.941127799514774,3015.87969265,0.9280004126050854,,,0.9280004126050854,0.0964083008791804,0.8815556546533155,1.81382525188,0.8555140890882322,,,0.8555140890882322,0.1062519253676046,0.7527465845056148,3.24085850684,0.6879172801040846,,,0.6879172801040846,0.0074660410167418,0.8655558708666099,0.214095348703,0.8377272571643783,,,0.8377272571643783,0.027487565893985,0.8882148073250197,0.365101735929,0.8688263024295902,,,0.8688263024295902,3.255416928678718e-06,0.8313806570181485,0.628839590444,0.7904954291779368,,,0.7904954291779368,0.628839590444,0.5164051837137336,0.0582371458552,0.4798079227582429,,,0.4798079227582429,0.1498830619032175,0.6618964608586371,0.178908709339,0.715890026363543,,,0.715890026363543,0.178908709339,0.5796575164471435,0.0245098039215686,0.5161060212798873,,,0.5161060212798873,0.0245098039215686,0.5008648531776597,0.0165289256198,0.424872462081008,,,0.424872462081008,0.0165289256198,0.6964500479723247,0.425498426023,0.6838429143698428,,,0.6838429143698428,0.425498426023,0.751664847161572,0.1742543171114599,0.7508831498439483,,,0.7508831498439483,0.1742543171114599,0.9067171316918536,0.1150121065375302,0.9060831344539256,,,0.9060831344539256,0.1150121065375302,0.0519655139795408,67800.0,0.0554747149655904,,,0.0554747149655904,0.0290456994515583,0.6434691260334525,0.0771549125979505,0.5807502400219449,,,0.5807502400219449,0.0771549125979505,0.9166162227602904,0.0008951111,0.9302158396521918,,,0.9302158396521918,0.0353246959378835,0.5755532434271079,0.0067284885,0.5614035087719298,,,0.5614035087719298,0.0122511928214689,0.558933421571466,2.3791e-06,0.6145407246401615,,,0.6145407246401615,0.0011202645354936,,,,,,,,,,,,,,,,,,,,,,0.6917513228236646,0.0804953560371517,0.6841770738494838,,,0.6841770738494838,0.0804953560371517,0.8737301229199994,0.2950894905920146,0.8632982287353754,,,0.8632982287353754,0.2950894905920146,,,,,,,,0.7501654807214959,61.666668,0.7551581487546324,,,0.7551581487546324,0.435294116816609,0.8647617479139218,0.087159691,0.8519741908983552,,,0.8519741908983552,0.087159691,0.6268497920495212,0.34900002,0.5359536815260283,,,0.5359536815260283,0.3624091475885,0.9537899773356836,93.77919,0.9454938966323262,,,0.9454938966323262,0.8355751594025679,0.6278134628440127,0.8102169783752192,,,0.8102169783752192,0.959938777375042,71.0,0.9533526317781056,,,0.9533526317781056,,,,,,,1906.0,0.7240574475669483,0.523497018864787,0.6395630962236926,0.5859863870065047,0.8438907148737418,0.8318309921361502,0.8358508997153473,0.6127747416150986,0.5121883191018196,0.1843255189540058,0.6553216288775894,False,True,True,True,True,1,1,True,1,True,1,1,True,True,9,True,False,False,True,True,True,True,True,True,True,True,False,False,False,False,False,False,False,False,False,True,False,True,True,True,False,False,False,True,0.0804953560371517,False,0.2950894905920146,False,False,True,True,True,1,0.7460439723787516,False,True,True,True,0.7500818777292576,True,True,True,True,0.9394564656693726,True,True,True,True,0.6605181011067418,False,False,True,True,0.7347062877528682,False,True,True,True diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index be501bd1db85081371ac70a95e88a85c84aba782..1aa07d41fbe2b5f75f28d4390230baa219bc5dcf 100644 GIT binary patch delta 656 zcmaDT`(2i$fo1BOjVxKr^@o?+EXsC$WzS!GUzTIlT900IXB9a7FM6%glp zZ~t-b{kDtS-`RtNAMm_!fYI`Oui5k4F5AQCZ^jSLx7Z)HFEjgk<|OMS`;J$Kj!wDt z*gpJ~!Ds7pukBX^EOct++G@W;db-AU-(&Um2NzU6*mC}s{mIG91&euK*q=QE1U|+N zX<%UHpkq0yqwL8$`_6+)!W8Gcw}*(UIe;B%tmd%RB5byb^=o?#yQA$(uD*ikKO+N` zS8@n;Qx3|pe{O%mDa(qz{;j?06DGME`|}+pmaSn!a712rszCi+`?~EMJUe9H z*)J5*I$VCl$f54>H!e|MLx*3R?oFH=^w>TpZX@H>=MU_?c$l=?#a7zC3EUyq`g)`N zsric}wyu3`ucBJ?=Dfgr`zI$9_J5jl(LN*`1diJ;{mjKWY2y?764{eku1~MpYfrl~ zk@?97`|BqjuD)C!d)@qn%z0yfYtFa)=aHxEvy*a6O5~r|+oqno z)ZTQ*UcI1q_Uc)$?5FVwtUg@z#C})x#;IzuZ|$W%>hDXy7>FO(#Rb zPpLSF{>pWh{jcJ%Z2GKQZ+5=5Pn&#w;o4ho?WcvTJ@Nh86MIdedBIGs7MpWeilwk}EemaNK6v9K+qp3;?NqCPDxJ delta 616 zcmew^`%sppfo1BojVxKr^|Pbuk~cP8u>Y5!=crO~&Yl4X9`L+zfYCdyC|s{+xoi)k zzZpL~-(r8*zRc|FnUk!S?CnAht>D`G*gl|fW7e8^ukE8NT`v>_ZMR=hc2D1o_ptq% zO;7Kr7~i#Tn16A$gMm8O0SpXk z4muCBE#`~AvFEH1vYx;1CBy&*1&6a|KtRdi7weak0m?7zpG%$Uc|Gq9ME~SKi2ehC z4hmaOe`0T*Yd@uWV&EUzbwC-r3KZ2?8ta-vsWEYkj@ZzSrFH z!fMmk_L+IBoDE;Svlq)b2t?QH&+ik;U%2#wee8iv+PAN~uum{mQ@$qu$XP+F)5LMVm~iH#5Z}@ z9sAcB!IPzTyt1FhC$Rc()f4+&)f=a($-cFp**ZOL|H>EkqKY&3KmYp1e)gu5A>pS~ z97KQRI?Mi7arp7IwmtvETl<6)k3HgFytNm&<=8HI|B3x}|L09i&!RTxuof{fDfv!T z$=p+z0TV?LJpcdz diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index 1416ef23d5c941cef16a203dd56591770f8b0ed7..30ce72173bd800ea4e2bdcd7b6dd23e62353363a 100644 GIT binary patch literal 43082 zcmeI5cU%<5`}h%iY-oZVT|0sxMO17<5dO-P^t6{O@01v6?zlnPZA`*Cpyr8OC=(NxDjzq4Sq%wX!^1 zwq<&HZr=EGgF06!AEP4E6Z?l#TOwrgF=QrHcXR8y&87TxdaY8Ooo+JLNumFui(uh_ zyGNweOPq}@P&Bb@>#xttCGj@eImVhgy+(`MZ!*@%&C@G1YLhO*=&3g3ICxkim^GsyLnE!Am3>>C^hT4a$a&TXbCvPy=Ds?FNv6ca#DV|e5B*;p!k(Es61%=mKwvbUF3Y!W_#263 zCgTb+jI}ZhN^G7Q`>nc8rIb_cigUxYR-r1GMYX$*qJBjL6pg_7ipGj&R?hLFouMEU zZFCum_KHqaBNUyjn-EAFRh>2iS7s(-h@ww|n|neP_qSEAdw9G_k!f9B754=9w^v<2 zph=NuU0qf8glZD1vvFN`ock=3!r$ng5m*s*MG!mYZdA-8r&X!q=2nICal^miPZiET z`bXA(l$=@0{Ij`S#cx~<6|&S{AZNmc*oHqG=K^kll9ji8IC)FAA)=N$rOEu+27hb^ z?f~ut?gH)s{tVm?JODfdJPbSnJPJGpJPteoJPAAnJPrI6_#5zd;2Gdq;2*$z;5p!V z;054C;3Z%I@G|fU@G9^c@H+4Y@D}hk@D8vDSPZ-iECt>NJ^+>h9|9i%9|NBN{{sFE z{0I0H_#F5G_%HA!@D=bi@D1=kAeBt^Mj&oqN!W!4OLJ$*vPw0@5}wWyF{*`|Tk%Qa zz+Wr#QR-0Ya{{e>EMM}qwgkOyw_&L4(mj+#e>#Q*)1QuEzyC&kTEh3X@Vz;F_X=CS zjQRwGnahQkx5o#=_wFF)58ngBo(11NI|Cg-W4k~7?Ns%1NEaPA)i?VBIjBww-57tnG@;tU;D5}vnM?{y*)Cln$>LPjV<@kk-eXV z4tQ8QY zVzl~tqc0*CtwwJ?+x_|8KnRm(&E*)-5BseU<@j z+Vy8)>imhQMc%TM)R`yx1?7KQ|GlS2`nl1c7gMA%cIM#^Tc?|&x1xWqr$1|&v>z)aa=v>A0Xlg0p9W~u1JU}p@>e4L zeQON7jOv)4M5mM#pp@u#gH@;mos1aWtLBu4h&#Rom99CL()>Q!UeqEa=I%q$`uOEf zqI(IN3FlXwL3J;rEr|H`COVn%SN$LBfuqYCOq6rpKfPT08elTy+8xbmO;y}5SJ zv!~JU+wG2d_B)5Vgf5&jw)hd6duaWTHY*>S+h;ZY_xkhFt2(hbdj z-~OOXv_7uj9~UYTeO!6& z{`R+|ua{QwD@H#q@7XIg{Jzcp_dzjfC~sVhQf3?;vOHfrU%2wz{dw!p%4%w6oktc@ zc;>ff%Ck1XVlm)!3bISc>GJdk^9;cv*oAO2UjWX!T7Xxi_$=j7HuO#93F z_+j_K{?!JxK8hBde>dyQO7nI%tMQ&&A6MS+sn@W**A5}ir^A%ZMjS$?SFdZjb4wX& z-$7e`*5fha9-nj>Ex%SfzZ`YAvmj$*SMmP9m2Z0NWMT7>YP4l*n`1qGG@zH;ZZ23f zxCp5}+*;*gNuka0&j!z#39EC8QTn8@YhS&0M>Jo!^6QH0R@+_uB)U0z-M66~j-#L5 zZ+2FwSYfK?(|FXHf^!q|I`nUZz3hMlU4!?%wL|hH^jD3mWq#Yl+sBnJta`3-`I@Py=cyq@-M`i+HcG8BwQ;G<`Gr?h)NEP$IV3x4Y&NFqJyCyh%FH zK1M#h8=W?_Dn;jYCqmb4D?{A#m!y`D)KBK$L+#hk&;0I;`0*K6e)7AK#pBj5K*1;b z4c^{iCEC~h`(FZQJVZZverE_*mD`*jt$lWAwnx2(=+w<&`KsT={mGU0%+JUjzptp@ zUt^nO{FC@=zrN&|bF-&iN1bx#t=P2ufzA2XnHLME9@}>f<*jBBenvizI}Yr!(|rDGZ-2y*(n;a|cWure>z{~D@w`%kzR|uPys%U}U%2wz^Ua?88`#(B}Uaz0N*R!TY zpPQTI{5LzaW1CC;_Mq)Avii3Sye3*7zkG$hr*9JK$7b9`z6GOKCdb~hIlpdns?UE1 z>)b^bdN-f@&K>dNd9FP7eEraxyuux4ucA|ns^)%Guhi!JeO~XJ;RU(Z(dI#8oAs$F zetgE2=Uxw3+=W~>ed?EJgo6ut_1z1&xS-5c@!5m%mjy&?La zgJ9#YQ?l*4H2AD`#$*H{)ltMdvZkKYO!i{J8QT{V`=#vFa=I{?y0C zo%XLnZz4wz|MzAInr)iMeUIaENM7gaGO|JI*wx~b3^4|shcrkL~boA5L z@AU7~WG?!1L8$J-4MoV1cxvd$JGX7F|IEG^;#rh^7Zs0-AC!#Gleqrle&@u(#g8%$nz_92?B zSdo09SbRL=%5(LVt+{qQb$vPVZ~XYy7f;Js_-vqa4*;Enm@YjrjhZD=&7v z@NMTq$l;|=JzAHd_5P=753f>+xYsA^89w~*bW|yt^J5KFxR3aF&6RJ^wP44zLDx~o zCC{&3yLJuje1HDhZ@;^bd`crP5Sz;o_xk4+RPds9{C$)hdUI5Iq_{u1@?3p0XXfo$ zro4)hb3Tph)S%er`f8o5Ma$0)%tsxvPp8f4C_cV$S6pLynMk0$jWqd!;Onw9_4Bb)2XFJtrD3@Cbp zT8_9q>c%JH_B$%?l6<$i-n=P0>Oe#CHkuI9W7Y)U611X~N7W4v^3jV)f8|d)Scqy3 zZZX%R=Vf#w?qH3Cam8ql-+*6ITVFt3s#cwqbrA1QV@7_kwc$PF^J;gqPF2d#{4Y9P z-_oZ9eQ|VD%@JQdKszUg4zG6Q9y$+{em(YxFWyi0qC_)L1PYn8P z!&y}D`1PU7@6JU;g}>fL-=m6{%pP{?6&u5b2j zG(wej?OEHqXy)`U&&P*dMDluv5AT{*f_U!+5R>tdjoJ2Q~nD02=}u0pA6_2lNCs1~vgU1vUdV2jZKE^rt1z3)l+S z8b|=!0NVoF0owyR06PLZ0lk5pfj&TAU>Bet&>z?p7yt|e1_6VC-GCv$?!X?vp1@FG zFJNzAA7Eb~-0E_JA;>KZeh&vm03(6@fl_IB*1TB=AFEI&c(F2Fw7;ftf%Om<7xRDu7DhXy6#23YY^_ z12w>0;733$PzTfl4ZyL$alrAwJm3W2MBpUgWZ=iZDZr_~Pk_^a(}6R9Gl8>!vw?Gf zbAkT{{1j*e&I5i1oDcjQxB$2ixCm$hE(R_EE(Lx8Tn1bYTmf7OTm}3RxEiTZlfYBJ)4*SWzX5*-o&lZ({sGJfo&%l-UI1PM zUIG>XF9WXtuL7?DuLExYZvt-t3xT(RcYsB}V&GliJzxp26nG!_09Xbr2R;No0zL*l z0saa63-~wiAK+8qGvIUJ3*f)Nm%#r4UjbhO-vIvuTAt~mo=~Gc?!c9jcM!0Ok0@w!F z7T6Bh9@qic5!ean4eSi`0r~>F0R4dez^=dmU?4CE7!2$N3;}ir_5k(-h5~y5djtCb z`vUs`5ikrG4vYXs0{a7_fYHDKz!+dGa3C-a7!OPUCISZmlYq&mcX8~se=K$vd{}1>n&Pa1C%Ra2@a~;CkTKz;A%x z0yhA^18xLv0&WIw0e%ns0r(?uEAS?;5Lf~%2mT3s2K*n;(MK}lYFVDhw0`)P_leBN zscqgh?;Nx>&4fDLTt%~1H@DiBKY$kfyr-*iwdk|e$grv){S%1q2JQv^0^A2Y2)qY$ z*mdr->&%bMdkiGAvhWlOjhcwm7}WYalOo?(Gh8LhkmblsigVPH=J*kMclU%ExR~Y9 zeZ7gA2hh}dx`R@kNscFV{uweIp0V&wIyKKCmii4p_pi8WtZw<6p7oK5XM&h)9_LP| zPR&R#+f{Es)S%e@fdT2MgTo_JO~%$q*hr;LPpb7ql4dNa)#nkBq*`at5vf`kp7LO9 z8ly4j@Kg&T!=TM1)r5C0sg+~#&L*QrszxtU5xJUNgG#1X;+Yi2J_%%|Ql=&pxDct( zs4@wqTCT|<3GWVsCX3MNWqOjx;g`W0eIpH8EoF;Lr&r|22w9E>*DTA#Gct4pp0klj z^y?eglgJ}w)a;euNR3vbmTB_{1^z)#WGZzec9Te{-jOo3oYWHDZ54s#SUgZl=zlr8MxA5A5mJy_?18uKks=Y_&#*+k!onLF!4| zGP});Oj6|OlydB8s7tNU6WJO~CXuDlQV!`xWa5c5B3goQq^Y=3HshpHj#XwF@MIvO z4}qtfbReS1Oj3)hup~jeQjPP?PN5Jd3Iz$* z>JWJQ7~Ek7le%1`R)%L8;lz|pkZGw-K=oK-v;Im_N8%ns&8w1Aod$OXnI6x}!jqhm zYECboR+9_uz(kVgEC!Xuw|5 z5+UZELg2xJvz;2B2un{gZ*PV3G}~Z~i&CdC4@1^tl%K^Ws;gQCB^|bbQ1O(c+5n@V zcRY!6K(D|)qth0b&w@UZ3nEwpiQCZMQiXN5!h;nN6hh$KrL7*P#AAet(Wp>nD+ug2 zJkO5EB(t?7_Fk(rHJOvE(&XVZlw+T$@HncoZb|@T{aBgGfCtp}ovE&@8HZEbPa~Hb za+SCs)kMnQ2DMU;BgjOARBJUV6`cmuP>25}v4PmSY%LyLsXR1WV>0$+`T{l6j2<3U zRBylujVn!*>G3e9B4X&g)>-u-?1(;0QCYT@%5Sr*)Z)FE;uU)oLTcf7R|J7d3moiF zlQ9g`QDL-7n+#H=8D}0a1@fUnMMwMbbWku5PYLX0X(O)NI@jzB;pSSX!cDn5>k0!r zQw%vdR9B|=A{jVgiO_&(ld-L(KC00&xtva44eW*T@ZLv*dnhBM%*iFyc$}7HsBktJ zw0MlcJ3~CG<2-Dgs!-x#kV-a^z(3Jxg~u>FlqpHI98a|DB{ZbV@Q#hht=M-_8ZAc< z=3@&ygvBefNP?sXal8-CBfRM&1e}rsq^iY3nN~>;**WH;6F6>RGNMC_z;O`MMU^UQ zH1HLcGk07Ay*tsURe4xCKv){ujI|Q1BBB*&=^pA(1*{^0(rxNW7sT!G6E2gZRQu(~ z#uI9dHbVr{ zbA}_SC{Lb5I$1Y+m~+MqR=AutgQcZ^r5eM;ILsk6yde#9;%t`_+GwX#@Ps(dDMdmf zKxNEg1*_6#v9!gzq>)St#5kpnHx*(kaS4Ny5{Z>k#giH-l~O2_BCar#6XBMAG2rFd{OH)jx8^%Pxzr(6{!=2=UYtx(*7~&F@ zJZUl9C5nYo#g%5Wg6?v&S=vXu6{j$1_P$Hh^QIn3s<xCJqgcw~T+u=%DF!&B&CwhXx35GgvG40hNuHQTI8xrqt*lr(VFW8I&Kki|5?Ps+F#Q@iqJ=k> zf_H-Otr**z`uC&5 z013`Uo&a^=hzJFMUlq$Sc2u$CI*L@uIH)a0kkq9dRLtidVsQkF+XS zWrC!YF;IgV<5Pb-g?OFXN%&btj#vHcN%FWg$ez5AUnNw+c9O(OScQTm7Gk@$tG!aZ zzHLN53d(U~Q}~g`V_r7$?ECEOqO2zI6lHC1C7_x~oaP|F+mO!oo@M2BpYM9t-C9Kgt;mJW(0wj2@v_Rl+D% zC@O0dOIe&NTF69ufHT^7bD%v-Q^ix>oIv{sit$9VtDvw@$i&pMTq#LCt3-fggBTBY z5iG*%#&#@m=8<@g7ds0{@i@_6NLI*)BFb3~k)oVcfbDbM_L+Adj01gz?C14g8@`zX z9Pf4Fm*MeU7k)`0&)HSXa)Y>HmbjlZI~bpJ=9lO7Sl=YL%_VhG1;?9_j+f{0bd2L# zgreZIK`fV7&LEbm*vhw!i7a!~UV5~6qwZsOUJNJ10V2XY@eC0uA{0DnRk7UbKvk@A z-5oTMi9wJ^L2-xEQ+BK;8rgF=kFc1-d5TDKI8QlqRV>G`9L`h3sw!4Fmc#9rW1qyZ z9L`ga*WteQXKryk9w;Ww}8RV6l?Cez%@P zq!67D#|eXFS`{p|Br=S*R_z7nRe^h76BJc;5=lbDe!!a+s6 z0q*PQ3`S0jBONc#6XqDlwFq@brww8Ssd5IfRK-@lZA{3c9M{Dg^<+!xD6J;cIywoJ zlLoJsh=?iB7OsNnXGAg?sX5he?0nGIrvin%P{j9Unn zD(13)Q>xg9>dcX*bX~krk78bt;NZ&{IMI)DUJp+eBss55C>ttmAS;xvF_5J)*>!DZ z#{UH8b@FCQEThkf_i=DCW>DpH@nlVEh1zw7vec%zw*5>NRb-}9QRmI1 z0gSBFPG{u!IXR@cF`#2FF-rRA`IS0Il zg_BL`mV<^$o%J%hdGagMwKWKhT5qKxtaN#M4PjM7zFmJYc`9>lJ-nHl0ChP15-3i# zrc_=ZPlmoz6jK8giMztQ2vr!7SC5vBo746MStfSY&x}qLQNk}! z3Kh9lz?9<2n0`{rR@BJeKRQ@BD@4vz&QgG!wVoL}dP~*de7ph!IDKD|%cxS$#*1B} z6Y;93oaJsw@#0j@QXnZ_90lyNXG!s*H1NjDnc1dv?_t}7`c%b9*Xs51WK%^lkpw=v^th5vD8_}aYSjgjN&&jG^H z6!ew~9*_QV`lbnIlTR@1%kA4E8LQb@=n&l1&s#)^> z5=~)z=Oa*n*Ku7fi7oshPL3Btq?F)sDf4uIw3^sS>OiF|=Q&y_t8i~AE#+ne6NVmA zO52Z#@Ow{%Vq&j^y_nc5$8DkISr#TH_6nOTWx17!iM?`Gm9h#mF_Bo9y}M^(Vy`rB zOakKZi)!`uUp31KN|4B#X?b_ty&bHF-A$xc$#$uuRkBL=akMl$B5_pl9R9cV<5%^I zy`Y!l@&ve5$UG{850dCL0;Fi-=^sN}XB|6GE-;i81y>r%Qasev9$>~t{3iTy_)Y1Y zw<}lldwlHUBk=NN-k#$tqK_Q_G1V+5h^l5ucadZW^Ql)39LxHwI;A?$$YDhV)7_V_>ftt!|O9-n%|g?S0TZF1_cC$65jNhDR_*< zKO_yt{jDawE(Hmc=wS*EqQSG9Fosbb zDRZI#&W*YGNPV}B#?O+WudM9SJ%#O{POXOI* zfELwM{t73WD>Q2m+6_{6T8}f<3)M+D2^FqgQtS+{&J(>Fld!HtsduS(4;=B`5blAa zq&qnQp>T|$;~qemjEW@led0>oU6pEU?;nMR7!CG1m)o91y_4)7`NCKU& zx%Kf0_Xx04xH^wW)TsSL)ih(PsW{-nhdgkY8_qcv^@1}9!dtJ&#jBLDjuUY)_V!@* zair~bk@DSa%7a-E{AS^w?V1JK1ROw7}bcPK7{|6h4WKPS3f$fBNG-nK{oq-@NZL-#hc2$-Q2o4^)u^ z{Dlz1N)aT4x4vWDfbxZc@YW61joiUess1aIvQQIgWkIuhuaw3#BGnDr(u0a(XYKwwCH>B*RW~$tWtW|`w<&- z`A{RT*s2PcMDD)NgEHBl528TJk$=hNVVB^!&PsfOjzH^?SNt@2ad>W^7CI|xV%NH! zGo4&G2~TF-O~qv$>HDT6tOI)zkDB{+++{}fL$C94!q zKMH(rS^Ys&5~I7t?C%YAtcB9ZXPO3UTA;qw3-m^kpnKKsH`G&i_>16hW-F#R3qCWS zzu|L2`V#qf5mPIb6AX~l_{>Z=eG8NbiZ^XjQXqL?%ZR___n>8RK{4T^5lFb?qJ|6) zIL53}A9*+fUizvkj}%#S81sv$zLM4p6t`&edV>yFH*9i7`uQZ(+D#|1DL0|IN~K+N zs1jrguV!1eGXpD_dEW+7q@3?IkMppnPw=OBmOiV-6(!%K?`kPreq}OLIb?708 zY&_zXDDnu5otsO=?o5DvV&%72^Et4rJKmE5Ef8t&_I{^GKg2HiEL7V~gL#j?VPf1Y zgj;AQs0TcR#)hF!d(wtr(g1&@Q+^Vd!goI2mY;=SdfdI{7btyj$u@sm8|_DB;*zoi7px;Fu-rt6hxavm^Z&pUh)z6`^gep&Ep90XZDJ%V(@ z-Mv@zIS@qsoo=T6VgcmZPjOp4IJxj}7kqrOT$gvwK@>Ih>aWWSa3?D%^v3leh~Lyq zVO@I$pMUtcB~E?fFUIk)B=}gDKl>rSbG;-J{HIyPHBVe)So>X%Vt1YDZ6p{?HnxOThGTX z--Za!cQXzQi!E}bji=dPS7&ZSn!AbWINKTdNE&ehBU-rAEBvWQU(x>>#8YTq{ zTHY$&WW`DzNMZ_I;e}=5OyJ!;OuZXNm(AUp@vf1bN)Lf+&9rY7r$oguld*r_KCN1 z=fHCEf<>p@6l861Q;g7OgM@nE=~9Vd&`_6T9@{elSF5I;ZqXfugo3^t*(+1<_lwS( zni-y{MK1tOMky=q$=v_keDjHNHJy{SAS(WDr0#e(xZu~QMR-1d_?TesmDvx&PotM? z-0NX6Ureg*bs3!MrRPls7sHW+#Ywe>JQ#R>kr3cG3qCSs4T+J9prH1C-3Q<6khaS6 z-rmTEpdW15h3FlVV49R^d$eE*tm5q5+jA;m66{&?3e|v%A8_U*bD-dOMkXnmK#xbtaQT~Y5T{%`X1O#4vlfN&9$^E(q~FXcm3jtUr?0g?&gz46 zw%z3oyC-0|roVo#+(WQ2k*HB^Vu7uZyF2~gMr_C`FbtX^aiEal5_Q978kCkq`?eOn zfK*a>OUV)&UelX}|LA%FUa}_5XNUV?!ID}T_L&7Pn`Ls>ukHdPJ*DN6?&om6-jn*w_T-!^?y1Ck1r$-1=M&rf`!X}5T z&^moxq_T%tZKmHQWP9*HNJMxLHGrf}at`yRhKGiP(Y$FvAykrCNH{H=w3TE<^$j`c zNAeE|Bhdo=NREC1+*$8nl56NWdx0)oL?q!PjYtlrTunr$9X4~d9jb6P z#WZI=rY9-vD{oSZi((hWOd9KC3(1^QBOhK*^!<-Q_!2)H{%(>B(5&UTS;s4*k z{%z+x|B18eKl^6*dryiabIVbRTq#HVy94M!ycmix(nKfYWzfG)+aRs@d_+xftX`Kj zfvuJ|3l2KzP4h#Ed7DJ#_EY`B0%CY$bS_s7#pIDtUcNFCFHk^x3l%X%^rcv-`f>4E Myigg2W=eei105(D=>Px# diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index a91b146483374e9d44e89e8dcaa7910d30675b1f..7cf90e944b7a76b621e1ba831c8e65334931a3d2 100644 GIT binary patch literal 42699 zcmeI5cU%<5`}nbS$JiuSrp}b<{Nx%f>6fLI<&Y+0-AI{U7ES-X*wpPK(^Qj}v1WQsw$@~9TUA`C z){|PbOr`V7k*Q_bq*iA#)(=-{JT31a@NTtP#bb`^UQwDF{p5> zDHE`R(zT>qqsFxvomT!uvmSLhQ^@^(&S3TdP=xnT&3AGxl z5A9=`H`lzihOycdQ$j+*;D7Ol{x1$;&&(T*U0=^Xz`u8pzkfi0zdvr5-tp6xEvy}N~bGgdjxEd;D zslPzZgbm?o18xv*fs*Cn{u~d}Z3wSZozi6fY=b|x19t#-0(SxT0Dl7R2Oa<(0v-k) z0UiY&10DyS0Gophf|*uXzgS9lCQL-=yk^p z!)2H5p)C5-B{YcsbP4_KSL)LSzITA{t>C+7=<;RM$3N6uF4(+%NDzGQ1#*7yJs|X1 z(CxFc&=EAg*VA85)jEfC(E-zavM-Cc;p>J~-y zD?+RC;5&Ed@1ZMihYqe^-Dgg-Z&5|k?zZ{mThZF4-bW@J4GjIT_}Nu_DlJtDNr_FYpNeIF6pxukmA%Lg=}pZ0t^`|yHq zLVqpp`C#)Px6sW$FI|&U>ovMHyJh~3E%(roy`O{(dRRAfd&ZrD{=SFMh^xB4^RjHO$Ibamx3P3e25(YOp%Pkp-*wEB9}&m$JEMsGgZ{a$#%7L;7K z;Wq7eCs1_aY4^$ZjzhcL+?uqcKFgTzlMVOAJ+1f@HS$qR+0f`J^7UzQaCEaVwIE!tXw^q!~2M%}%L ztol> zXZJimTll6J&3%&d&7)UaMeF0rZ{ODOgnQvu)N;+{WrL2dKw;CCj+nOfcU1S}%#7g6 zXROADQL~-~m9;sC7VY1^?a;N~MeF0r|B~|Ijk=AGqFc&i+a}IBg0}P-wxUzNa^yAe z*qrqjA6Sh)SJCL6;rkz;gJ=J4gvLA&t&b~zCBo0A_TbB?p6N++N@*cViSC%JLZ#?r z_}ISg(;g!3_!d~U=3Gjv`)GS{>)@EX4@K+amp_T_#cL*=UvUQ2zmT>t{F|HTWacle z{vA;a^;M3a<$E?P%<;K{hWD+bURiusv_7u9UPpb-qcr++5nV9ceSPq3F$!oEIN((H zeZ(D~=N619tRG&CW_&Se%XRHIJ0xxWTg?q4npJbd;*Tq#mdT#$VGy>ij|xblxe|IM0I^AhSfc~xuA zp;yt|2ZxWDt`?)=5B&e?ccvV1_g}Mq39UJQV=-Ddz1E?<)5PoJ%5(MAI3AV|q$))r zzql9VuD@@!e~c?5mn1bUMa{OIN$S)4zG(Zn^4$IHuZdqRt>#;Tepue8Z)(_mtNrhT z64Fr7tOTXZIy`K7fq1@f<+=OwcAu2HYi6HE7EvkyU9A9F$Q>`a-z{<$ z{+oFq_tv6ck%iy?uYT#cWk=AA-)_&%ZFrdWSJmT(-3KFU4sCZ7Ek6Ht)|r*&?QT`Z zdv1MPdBew^Blcc9ggl;(P_`U(2%TQNuEowR<)~9-~cPz1((l;o{_CqwaZJo4jL=aiuI zDdX3^dgqR4zHsH&mDI1fyVgl`bL_frLOLHuKUKfkP2HxLq1iu`$5&`r??0Qk6(I|` z{^NeP$XUqMM=m~M($0L0w*H=Zy!*z-R>yB;lNts3sve`@;B_mKmW%t7E5CKy_HN3< zU!Zm^{Fjt3S%Zj=?jO4H>Ji%f=e^nw(PP9tKJ@o3=u=e1opgIVRigj*l_9g7PIf6GfTC9^=?oh8b7Z5 zlbS~tPJXr*8PZ>rC3zn~UoTyLwPMdhl(o2U?Ut+7$G5|Bo39Cqeuy@9ekcF)9P#}h zSHA8=z07l55$g1^mAve`A~g2b{Wl8h{f-X5D0*3N_blQbKW}Vu=AY5M&!Am*JQAnY zIV0LWu6$s~ik=^KJcka4Pk6Pm`8jm7o$qIjr`n>rlUTmh86evZa1pdsPUT~w<=J_qQO0P1Xm#L@&B>rm)mCh zR-n0;`g^305#K*>5VA zWa;OS?5wfnxEl9F{mGSYlJVu61_wu=UPqp`+hzI?dG~92+SIlTo!6ZRS+}hmanD~8 z+dNV~S#S?^TE8Ik+cVfI zIzL+b?9iOX4IZLXH%AnxeiQd6SKgx_BX{Dy;(>pTZ=UgY!Y=~{kY~=#nQP?eb>@_yW9aQB_&^Iu#0Bd&b%_3ytcOWc9}x5gOyevcz4>dp3bCnN5m#C}Vsg!$dI zI)7|=Z+h1fwwh6c< zS|7iBrM{im6vznqbUx!2L=q2pWjcNafCr3VFWV*Kpk%@%<53o_oC^`tReXmYw<;)!EZ7A!NlyIzCmeKP*t@ zZ<#;qC$vS^c)nluX3_X@ss!bnkB{Muvn_!%yD1 zZFT);&c$Gl;_SPqWWtc4N%%a8>p$*yuKeYi?T*XVr6G-B-<`c9l10mN_5J_vlm~?K zcl*jQt^CU$q6LQ*Y&zfJ9^zhq+nC*G>bkQJ(HzB!q!T6L;~7_;tFL^`wd1MlE0ABa z$G1LzT5ffHudQ1@_lk@PRDZR6MaMPb`**Iq*!9ByIv;`$FMZm$T^U;McdG8lYGsIf zeX@b!{r68tm7%#m)K-Ogi;ve_`9?hocgz@i9d%jq{OYx9*U-*)7p(o}+xy77EaC#O zxg2q?e{MmAFX|4tkCH-ej!BOY_a|4Lt1mx4Z_hI2Rg{$Taa`9%C05s0>t!uoes*vH z>YRN#ZEhFw@r5fNTO;uLs=ddNW{P*mT>*ut?&rbVYV3H7(ptX$M4SJ}>iTWaNZr#f zzkZBzhAtjECG3&t_`sDv@=gDB;zy{>sM}+1d?aqaz49)}_bS($H)Tg1 zXhPmblY)ECp5#-CRWc?x=hTpqHLu)5 zCqFu$G5Ej(bPH`=x6S7gn)QwB_>~pKD8AW=p}%f8iwYmVK6F`LgyPywY})_q5!9pj zdj89$Cy-xI&W7hfmyv0@u`&6g#A-a_jt{|~PQGf~aT~>7C|t4hRw>#oKj+hC(QWj4 z+_9aT+{)0dQTfK>^KPSlt!7knZ~O=aA13GdWZy=kRB6|qb-0W2XMS;hNa#f*Z*chV zt{J6>_iix0ONVajSY$`qL2D2-q0d z1lSb#Ht-#w2e28iIj{w=C9o9`-$bN8ZGfJ@w!n5k0@xnd0oW1P3D_Cf1=tnn1?&d& z2KoTI1AT#hz#c$7zFGI3v&PXSK@e*yjq{0(>pcoz6OumE@tcpi8GcoBFBSO~lfyaK!myav1uyaBukyag-* z-Ui+Q76VIwcY*hSrNA=aec%INIj{ow5cmlA82AME2k=keU%ri*$)jrvpv)&SN7)&kZBx&!L~>jK{b)&tfDHUKsRHUc&VHUTyT zz72c_=mBg7Yz}MzYzb@yYz=G!^aQpAwgVEt_P`Fnj=)a9&cH6fu0StfH=sAr2iP6x z3-kl_0Qv(1fPuguU{7E$uotj5un#Z<*caFj*dI6mI1q?{p};U;I4}Yj35)_p0|x

<^d-GCj+Mdrvg6& zP6JK{egvEWoC%x-%m>Z}&H>H^&IA4*@MEA6I3M^4Z~^dB;6mUc;9{T&_!)2sa4GO} z;47=| zMWmXH?Go|a2Bl6tTDL7Pdc39npI zE63v9Ovc8k8of+KX zrp+T1_y;|asnn6!O(LcGMaa~0QcHM|6XhyHCe<|EIHj79(?53;DLv3)p$bpApqhlM zR_PVEnL2}((!f(bppS2_o))8fL@H(3YK;!J1$!!k)RVYnHk%odsL0bP<=E3ums+DI zvNf7aB1@yC9MY4>#FIlrv;^WvQ*ong#!00dugo;y2`)r`0#BjnOhl8Jq!w3UNrE9t zHO@C1g@PR@6ewJicbJxx5s*ExkTWTCy5L6Ukb12qSEEwuWon!zzCu}s!>Ct~L@rJr zA|Q~!Zq+MrqBMw7liKV&NMV!FJ31;pj7V1M#u0S?A)+Scl5#x`NtjxVy+RDp=yXI1 zHFL;hd?!ttfipUWG-z>0QOb#El`Pw2Y!;@}5_-)aED1Ib-7Bd49}dw zi7A^T(^8#)>aoU_kxEiW;vPfI6_Qh(26qOT9?vzx6L%7=ek8mS;{*MIaf53Oour}S z(FS+QtSoFpt`^svqr|-xPjVv0Yg7hmdQ)H`^Rr%KZl8I+QV!j}Rk#^m$?=KZh%A|0 ziJgh9F&Ue)3Z$AQKT% zt<|VhbQ(}Y9sZld24d^7wRm);^3ZIJ$=HYK3)Ji>dU#Y(y#XgQt~5cW$HSb8h@taZ zSEUbONAzck%Cfaoew$^b7VpaxuiT>$QVS1pMG&a8z`+hN8ACxG6~-!QlR>I96U+mq zKt5Ee=wdsbb_xdIX>NTjZNzm~%{3cCxVaXpa0~9vy3zoT6hlr9)s^YJNCr+=BE&!1 zWb9z6k7~3`E~nF11AC!7y!X-I9?A$Qb8<;F9;am)Dx6IQEgoa=&Jd64I1k&UDwKE_ zq>_y!@K1DF;V}#kWlB;l$CC&93JvKpykjGBEB9TLM#~X|`Pc#vVMCNzBtg=HINk^6 z5nl8W0#3<6Qq|(2Osk}a>>Tsa2^_aD8PPdL;5dlsqDmDt8u$pynLDn5-koUFsyr<1 zFDwmh###wh5zz{?bPu(w0#=a#={EJC3*vV83YW=Is(o{06A86OnO!^X^MmsDNf=rXE>6I^5jW`gLSiqIY-Q3 zh09?xSXv^TY77(OP`lLdhBVZHvt3STqa9Mg6XH0B6bX$0&X~mtmeXdjv<-1dBbgM4 zaY!9+D#SQ(34@ao2~MfvNsSbz6bhw?E6ijC*JWn1w5GbW@l5I@I;EC3h2ks&l4IAg zI9ZhBtTvuR8s@A{p(?iSWyUk(WCaUchTm6?rVh0R6|2H0|{<(J`!Pj`Mvp-9+N%yO5wVwSkCG&>kScH@`l^;k##2d6kb>n0?{<264a zSs}lPC}+7sigH$g9#XAgyyqiSg4chYg`O|tc+f{!j>m`o!qP%s6s>^e7HJi*iUdfv ziSebca2Z~2b`g9ekK@knHUxS68DK+P$f4p@u-zlA3RamwX=M!5pvL&r&qg6$r*;*7 zmXYICUt5wqZVj|0FXUGVm9U*8u@Y9HAc=+8uI*u~6t8ca(vO02+}Hws#Sq0cW=WU;P_rW;ON63C&|F!3vIl%E=SAH2D-*xAg6!M%+#Vj|7 zD`tuNO0$FUSvP)pUXKk(gxg$FCslB~8DW2U9#6;EuSF;d4jaUBxpM}wRK+^qHYT#n zReR~t;*Gku&3Q4L5c`V=^Tab)q=-=Pq*cXouN_sf%Js6-L?#A-A_c`APEXmXI?>3M z!+C_o9L`fjlEZn*nX6(sj^%KkB2}tlm18;Fb~*M*49np>1$iCrV|(Tn$KwHF(mXEj zC03@2Bj^SZFlRzyWo+l!Q5maXZ#xRI-5w-XlGpE5ClM(`C&Y2WV3`(0Pkfb7m?tXC z2#F^#*;haNywx#NSgv=%6qcUo#L6d89BC3Wa)sNeh&R9k?47~LiE)Je<$1y!W4{)m z?&z>VtROjO5KC38^KD~79%a8S-l!*8Qb$>3Lan`%P&t_}+%-CRvSXBMGz(>nOATj* z-t~sF)Q|pe{K910d#=&Xn}I$e^Ih4`-UWzB^JGmgu`)vWAh9x5DD0_>Rj{|cRx;5E z5-Z8;_W^KE-s~_5)51BuJi`9+Jh6$fUyD$*95#p*2ImZ7sj^JVbjZV;%O#daI_%KJ zA9YJlmO8Va8+H53^F`f$ErL|i+Jv&fX#-iIbd7;5l}WB^Gc*3jJFb&ATVfe~4!n${d#%$_lmX3}vZJb8Y*XEUL^*hoa7#NrM<$?S3^2Cyy#~+yPBIS(NM= z%h=ZzTwo+C)UGm;r8LErEoZW)vimp?AKu(?%{d3WhlP_(>6U{Ar_OpA-8}h~>Dn5E zMy>y(A*^)y?;66YhWu~+#pJ2Xwe|33ZamcC@JpaL*_z_KKAsGH*LkhE=IfVMuNrch z!K@Tr8)0s-YgS4>t{x%$|&I%D20ezD_~0TWXwRRWh-lB?;q`~ zoE0KRDrYG`j#|%*9sQ(ga6DcC{+zxq$z@b2N8`n=(SdkXR?c#_qEXDN^rFOCBC z*|Vg0Q5tyT<;ZMPy7#bc!hO!NKd|E!s+RZf@y0b7Mihti3H41E7|9BstBhnRwd`12 z(sHI>4|T?$yt!lj;x=Ynt@J<63?Hi(yfJb-{n=kwx>75<2a8s~a<8-sSVaP)+r;?W zSGWwXH#_jYXOc5edI<>e_|aEDRLF_8RkPe7K{ZR>PogP|@4N*H@H(!CC9#EH#L4ku zu#^%!E@hq$kX92rN$seVUWfkrxrKQ}AV8YN_N@?3M5q|HfP)ux-YQw8``cTZ9g#Sycn<$t`|+###a__Mad|x4Dr6p&!3Rn78Ua!? z@$`>juCtCEC>I#Yih?T*Whox+Y7a2uBYqS91pKCS&fAqM`#nDP@fLXbGH=iE5z)sE zfS78Q6GT|>_IOz2UDBNHFhXi~5LL_5H{}13-RwBIx(TdE{bx;J)sp$2 zI*Q4oP~k&nr4FypkZFEnI$nhY{~72X5JGs-Yo_2a7XOf#jQg8Ay)Fe2#{3m-GJ2$F zZN!CBQ%}>q3OnOTSQSR4_}u5D|FM2V4zlUK9h%gIy8KUy;mT zQMf4O->~@D?(~QflVVwTfL=AGO*~2B1zvQlM#%B|*s*pQw${8E7NDu}yV(=bgcsg@ z;Yb-furEp6aEA?BWHW$REW zm1GJnCc{_ZASoL4ysOJ_9MB2RKvnBk08y*Lpa;=8!;D1Eo}z z$t=2MJ=*n`qQLT6eCy(ph6c`3W!73^r5Hht{M^HGo)m5qV_uz1z+vf9s$x#l1XoR> zBc;G+Z**HOT0lSe0dVtkzs2$KwUGsHXB)IN4mGS%c7Skg~Jt zI8$|@IteGC!nI3^odMQ)qE}-Q)|DXjE*0;ABfcBLJ#dtCC&xbojxluH0|=8*k!Ze8 z?8MzwskSID;U(^qO3}N*U`o5f@``4`LrjT5lIA-_xo*m=(@%7XI0$ zS+HHuEX#HwvtYZRS@g~{j@@0|%`L%L3lE}Ny(!MkSW|}^WQwa!{XybCs?lmmO1H6Q zx;`%#pFLCSuV!hfN{w~X@tTz~dO_CFc>PAa$Rj=bm#OfMNSB^z Y$W`IxJ@sU|3akj^4<9jA4*&oF delta 2983 zcmb_edpMM78=n`$vPvaH4kMlAlw;Z4Gss~b8i(mH$uVMyan_1)thSF&Jd@2*blTWT zA+fC(hYd+4C50Tun4xj{T5N5qof*?!*SFt4`^WctpZoef_wjkJ`+2YXdOJELQp4~m zQfC)TEAXBY*k(kP*lJA1FdPp18*rHD!HKZ67esxwqKguWc##!hlInswnvz@bL}3|3 z+h8D@x0HrsAl8N>$bKUMMF`}3fiwt&Clq}nEE}D~Z>6)oz~Pj`8U^b)bk(nlxOkNX z7iR+UuSW6UgHGu_zvo;q7GV={sq98E6NsIUgjh!m5_ZHuVh!Wz);Aq zl?fTmux%vNqF7}TWD{0d7S~Te`Ng^uZ`AsLbZps}DWezuz|Xalb;g0G_ArZofj$XT zzi7e}xel;;Y*zQ(mn;a*StuIe4+3LX(vPx>qvE@L3R;{+*(akKriDY1rkd zd-EVTuX=m;@9l8?S_fEfV!^2cwpu5@n}+O~Pd%sgX5jR#w}RCNua*F$pD(Rb8RbFz z?DX-A^tlOeF?nLE=<*Ek%KLp&@z24^v#C_>{wR=BE55mLfe#DDBV8HL3~^@jkJ|Cw zkT~lsn1C>P(!-{O zUN~ca`!(auAk;o1^wOoCF0pX)>2L#WDDTlU$>8_OeYpqgtTn%0Sc4Z<-+XUmDbV{f(eBPYqG^Y6L=c#~YdS^T0^Vbz79Y z0)1To!h;TyrzlDzBLFRPQ72Bq&xZ-<3yMhf2^~heJL=Z^5hIu-ZHV1y*UFF zl~r3pnl6CW8pC{kehJ*GpisY%o&iD^-CgC5-3*W$Ya@GVeg(?=VvT(n51=6Pd+&y_ zaZu7OXyDCH!mLzFZsNi{i1m3p?zXiV-p(hTjA)wz-?mW)pYbV>?TPWz2pxv?f2akF zmk$GTqWjbE*Ihs~OH3rGb;0%>g(k^5-OywmzgVQw4V|O$KN|h^3jRa?##!4{e;S4* zyOk$PQoy2p@2FXMEl37U>M~|0!GgPBL2#M`UXI=Ckc@hGW&3@Khw(3vo~g*X{IMTC z?%tb`P92Ap{+B;4V^YDad0y)-BN^oD6oN~4j02@#m+RQVha^QCPW+WzFmAJQui8)q z!ScJV%P^*Zeqze8H!^J!l%Dw067G*$y@^xtpL6Z7x1Mf?+h~5UxQ2ZF*S-p)5!%=EI zj>==;F1x%ks=O8K$PZ-_xes9Zi?M)GA0{1i<(*~q5nFtBINVBAAqt?HTGN&LEfXG* z49+Ak8~)3m7|RWvpF9KhoU?ZAd&c10>VsObrd&|a4L)9~&#PI@`{Ka+1NRlT~MQv)*cZ~JQxcLGIf*``>Z z$B=w1OfWLLp>H5z*4e8LW-iDn*8Wxoku3UoPS|DGpEASQRDT|x4&}=pai0Kxm9qNO zxEavg^lrr;0k056~F;9drGqkaLG?S z`RRNpKAe-MmOTS+$S>N++tQ(%p16GQ(>3Vw&gi>2Hv;n1{6qG$V=!S?qTwC&6qxk8 z=Smd^q2u_i))(iVz!}%hoAoaIV-qWQ@Rv z5(Xvyjx!ImXJwvjxcC|}2{)Us&2r&4dXwZY9k0PxmDF^)uN$WA!z-dbuz|8xrD)X` z9bjRiy>P8_2y)xbnAQ|EgYTC8649%NPANGOHZKdNiwoj5DTePDVelMIDPF95gQ#6euSV)@Qm^c zkETUL#rVa9M1&J;BBEoW2^$EG;Q)*nAf0#J2zu`$ZMw>T7Fasm01e@V!n#O+{7=l5slH5kSthYMAT@ci&)kw zXn<&gIUoYb6nQx+A)d7pMn61RG!srim9!$npxdFK zXj^nAgMbEOl_l|B5?Y?< zjHHq-AZnrqov{*nuWj|=4!-boWOA!G@o?nNC&ugL;#adOTFEvU^#A3+h H$K*c&mrDQx diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index a0e40c2af851270573575d03b976e24482c8c9cb..23802eeef04fb7751625cccf3aef8780e78e9cdd 100644 GIT binary patch delta 391 zcmZ22d03LAfo1BBjVvuptPG5d42+ZeS;QtUW?EzK@vur{o%=(3`PSEgJLH~1>8tiJ z{fEA4#5}hzwB@?KxZtUM8W=pYhbX^g&j1E5CigK*)cM zeLY!?MbkbAtmy^R4R4@scnhb&-mrZIw_6kJ!6#7vzuer%62vN^;FDjjkdv8~s*qYy zkeXbQn3r6+`3I*bqrEpsCO0)DGcivgBfqpbH6uSKMIkdUIX^d5A+cB?QK2BUC^sxmuE?fo1A~jVvuptc;9|42+ZeS;QtUW?Ez4f9R`5O#dT$k#+75t3;mJ2knq+ zeI0nieo28X*Y(9O>r<41bCF{?g0f9I6-#|iNU)Y1S*gmpP z1A}+=AQNo4-rH+}q&42y&zcDWFYOt?;DvoP*zz~_V5@h`gYuU^?Ysx2-@*06%rgdq z*OS#)G~w=*2fBWT+*_#o5Hv*3aj4lxpk_URxp{LROAxDsice-*szPc-L27bIVqS8k zf<|R(Vo|Z?<}aL{jLQ1HsVSL>c?uc%rNyZk`8g>HnR&_ixv2^ostWmO3XZ;>nwx95 G-5CK@WuaUE diff --git a/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb b/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb index d6aa454d..ef781baa 100644 --- a/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/compare_two_score_files_for_differences.ipynb @@ -8,7 +8,10 @@ "outputs": [], "source": [ "import IPython\n", + "import os\n", "import pandas as pd\n", + "import pathlib\n", + "import sys\n", "\n", "module_path = os.path.abspath(os.path.join(\"../..\"))\n", "if module_path not in sys.path:\n", @@ -28,12 +31,8 @@ "outputs": [], "source": [ "# Load\n", - "path_to_score_file_1 = (\n", - " DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa1.csv\"\n", - ")\n", - "path_to_score_file_2 = (\n", - " DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa2.csv\"\n", - ")\n", + "path_to_score_file_1 = DATA_DIR / \"compare_two_score_csvs/usa (pre 970).csv\"\n", + "path_to_score_file_2 = DATA_DIR / \"compare_two_score_csvs/usa (post 970).csv\"\n", "\n", "score_1_df = pd.read_csv(\n", " path_to_score_file_1,\n", @@ -55,7 +54,7 @@ "metadata": {}, "outputs": [], "source": [ - "# List columns in one but not the other \n", + "# List columns in one but not the other\n", "score_2_df.columns.difference(score_1_df.columns)" ] }, @@ -68,11 +67,16 @@ "source": [ "# List rows in one but not the other\n", "\n", - "if len(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]) != len(score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]):\n", + "if len(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]) != len(\n", + " score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]\n", + "):\n", " print(\"Different lengths!\")\n", "\n", "print(\"Difference in tract IDs:\")\n", - "print(set(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]) ^ set(score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME]))\n" + "print(\n", + " set(score_2_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME])\n", + " ^ set(score_1_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME])\n", + ")" ] }, { @@ -82,8 +86,13 @@ "metadata": {}, "outputs": [], "source": [ - "# Join \n", - "merged_df = score_1_df.merge(score_2_df, how=\"outer\", on=ExtractTransformLoad.GEOID_TRACT_FIELD_NAME, suffixes=('_1', '_2'))\n", + "# Join\n", + "merged_df = score_1_df.merge(\n", + " score_2_df,\n", + " how=\"outer\",\n", + " on=ExtractTransformLoad.GEOID_TRACT_FIELD_NAME,\n", + " suffixes=(\"_1\", \"_2\"),\n", + ")\n", "merged_df" ] }, @@ -94,14 +103,32 @@ "metadata": {}, "outputs": [], "source": [ - "# Check each duplicate column: \n", + "# Check each duplicate column:\n", + "# Remove the suffix \"_1\"\n", "duplicate_columns = [x[:-2] for x in merged_df.columns if \"_1\" in x]\n", "\n", - "for duplicate_column in duplicate_columns:\n", - " print(f\"Checking duplicate column {duplicate_column}\")\n", - " if not merged_df[f\"{duplicate_column}_1\"].equals(merged_df[f\"{duplicate_column}_2\"]):\n", - " print(merged_df[f\"{duplicate_column}_1\"].compare(merged_df[f\"{duplicate_column}_2\"]))\n", - " raise ValueError(f\"Error! Different values in {duplicate_column}\")" + "columns_to_exclude_from_duplicates_check = [\n", + " \"Total threshold criteria exceeded\"\n", + "]\n", + "\n", + "columns_to_check = [column for column in duplicate_columns if column not in columns_to_exclude_from_duplicates_check]\n", + "\n", + "any_errors_found = False\n", + "for column_to_check in columns_to_check:\n", + " print(f\"Checking duplicate column {column_to_check}\")\n", + " if not merged_df[f\"{column_to_check}_1\"].equals(\n", + " merged_df[f\"{column_to_check}_2\"]\n", + " ):\n", + " print(f\"Error! Different values in {column_to_check}\")\n", + " print(\n", + " merged_df[f\"{column_to_check}_1\"].compare(\n", + " merged_df[f\"{column_to_check}_2\"]\n", + " )\n", + " )\n", + " any_errors_found = True\n", + "\n", + "if any_errors_found:\n", + " raise ValueError(f\"Error! Different values in one or more columns.\")" ] } ], diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 9289cecc..76d11038 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -60,11 +60,15 @@ MEDIAN_INCOME_FIELD = "Median household income in the past 12 months" MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD = ( "Median household income (% of state median household income)" ) -MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = "Median household income (% of AMI)" PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract" AMI_FIELD = "Area Median Income (State or metropolitan)" - COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school" +MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = ( + "Median household income as a percent of area median income" +) +LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = ( + "Low median household income as a percent of area median income" +) # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" @@ -105,7 +109,6 @@ ENERGY_BURDEN_FIELD = "Energy burden" DIABETES_FIELD = "Diagnosed diabetes among adults aged >=18 years" ASTHMA_FIELD = "Current asthma among adults aged >=18 years" HEART_DISEASE_FIELD = "Coronary heart disease among adults aged >=18 years" -LIFE_EXPECTANCY_FIELD = "Life expectancy (years)" CANCER_FIELD = "Cancer (excluding skin cancer) among adults aged >=18 years" HEALTH_INSURANCE_FIELD = ( "Current lack of health insurance among adults aged 18-64 years" @@ -113,6 +116,8 @@ HEALTH_INSURANCE_FIELD = ( PHYS_HEALTH_NOT_GOOD_FIELD = ( "Physical health not good for >=14 days among adults aged >=18 years" ) +LIFE_EXPECTANCY_FIELD = "Life expectancy (years)" +LOW_LIFE_EXPECTANCY_FIELD = "Low life expectancy" # Other Demographics TOTAL_POP_FIELD = "Total population" @@ -130,9 +135,6 @@ OVER_64_FIELD = "Individuals over 64 years old" # Fields from 2010 decennial census (generally only loaded for the territories) CENSUS_DECENNIAL_MEDIAN_INCOME_2009 = "Median household income in 2009 ($)" -CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = ( - "Median household income as a percent of territory median income in 2009" -) CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009 = ( "Percentage households below 100% of federal poverty line in 2009" ) @@ -141,7 +143,10 @@ CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 = ( "Unemployed civilians (percent) in 2009" ) CENSUS_DECENNIAL_TOTAL_POPULATION_FIELD_2009 = "Total population in 2009" - +CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = ( + "Median household income as a percent of territory median income in 2009" +) +LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 = "Low median household income as a percent of territory median income in 2009" # Fields from 2010 ACS (loaded for comparison with the territories) CENSUS_UNEMPLOYMENT_FIELD_2010 = "Unemployed civilians (percent) in 2010" CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010 = ( @@ -265,7 +270,10 @@ ASTHMA_LOW_INCOME_FIELD = ( ) HEART_DISEASE_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for heart disease and is low income" -LIFE_EXPECTANCY_LOW_INCOME_FIELD = f"At or above the {PERCENTILE}th percentile for life expectancy and is low income" +LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD = ( + f"At or above the {PERCENTILE}th percentile " + f"for low life expectancy and is low income" +) # Workforce UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD = ( @@ -288,9 +296,9 @@ LOW_READING_LOW_HS_EDUCATION_FIELD = ( " and has low HS education" ) -MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = ( - f"At or below the {PERCENTILE}th percentile for median income" - " and has low HS education" +LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD = ( + f"At or below the {PERCENTILE}th percentile for low median household income as a " + f"percent of area median income and has low HS education" ) # Not currently used in a factor diff --git a/data/data-pipeline/data_pipeline/score/score_l.py b/data/data-pipeline/data_pipeline/score/score_l.py index b9e46051..d11267bf 100644 --- a/data/data-pipeline/data_pipeline/score/score_l.py +++ b/data/data-pipeline/data_pipeline/score/score_l.py @@ -44,6 +44,8 @@ class ScoreL(Score): robustness over 1-year ACS. """ # Create the combined field. + # TODO: move this combined field percentile calculation to `etl_score`, + # since most other percentile logic is there. # There should only be one entry in either 2009 or 2019 fields, not one in both. # But just to be safe, we take the mean and ignore null values so if there # *were* entries in both, this result would make sense. @@ -169,7 +171,7 @@ class ScoreL(Score): def _climate_factor(self) -> bool: # In Xth percentile or above for FEMA’s Risk Index (Source: FEMA # AND - # Low income: In 60th percentile or above for percent of block group population + # Low income: In Nth percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] @@ -225,7 +227,7 @@ class ScoreL(Score): def _energy_factor(self) -> bool: # In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score) # AND - # Low income: In 60th percentile or above for percent of block group population + # Low income: In Nth percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] @@ -268,7 +270,7 @@ class ScoreL(Score): # or # In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data # AND - # Low income: In 60th percentile or above for percent of block group population + # Low income: In Nth percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] @@ -315,7 +317,7 @@ class ScoreL(Score): # or # In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset # AND - # Low income: In 60th percentile or above for percent of block group population + # Low income: In Nth percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] @@ -363,7 +365,7 @@ class ScoreL(Score): def _pollution_factor(self) -> bool: # Proximity to Risk Management Plan sites is > X # AND - # Low income: In 60th percentile or above for percent of block group population + # Low income: In Nth percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] @@ -410,7 +412,7 @@ class ScoreL(Score): def _water_factor(self) -> bool: # In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model) # AND - # Low income: In 60th percentile or above for percent of block group population + # Low income: In Nth percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] @@ -441,7 +443,7 @@ class ScoreL(Score): # or # In Xth percentile or above for low life expectancy (Source: CDC Places) # AND - # Low income: In 60th percentile or above for percent of block group population + # Low income: In Nth percentile or above for percent of block group population # of households where household income is less than or equal to twice the federal # poverty level. Source: Census's American Community Survey] @@ -449,8 +451,7 @@ class ScoreL(Score): field_names.DIABETES_LOW_INCOME_FIELD, field_names.ASTHMA_LOW_INCOME_FIELD, field_names.HEART_DISEASE_LOW_INCOME_FIELD, - field_names.HEALTHY_FOOD_LOW_INCOME_FIELD, - field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD, + field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD, ] diabetes_threshold = ( @@ -475,24 +476,14 @@ class ScoreL(Score): >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) - healthy_food_threshold = ( + low_life_expectancy_threshold = ( self.df[ - field_names.HEALTHY_FOOD_FIELD + field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX ] >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) - life_expectancy_threshold = ( - self.df[ - field_names.LIFE_EXPECTANCY_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX - ] - # Note: a high life expectancy is good, so take 1 minus the threshold to invert it, - # and then look for life expenctancies lower than that (not greater than). - <= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD - ) - self.df[field_names.DIABETES_LOW_INCOME_FIELD] = ( diabetes_threshold & self.df[field_names.FPL_200_SERIES] ) @@ -502,11 +493,8 @@ class ScoreL(Score): self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = ( heart_disease_threshold & self.df[field_names.FPL_200_SERIES] ) - self.df[field_names.LIFE_EXPECTANCY_LOW_INCOME_FIELD] = ( - life_expectancy_threshold & self.df[field_names.FPL_200_SERIES] - ) - self.df[field_names.HEALTHY_FOOD_LOW_INCOME_FIELD] = ( - healthy_food_threshold & self.df[field_names.FPL_200_SERIES] + self.df[field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD] = ( + low_life_expectancy_threshold & self.df[field_names.FPL_200_SERIES] ) self._increment_total_eligibility_exceeded(health_eligibility_columns) @@ -514,23 +502,25 @@ class ScoreL(Score): return self.df[health_eligibility_columns].any(axis="columns") def _workforce_factor(self) -> bool: - # Where unemployment is above X% + # Where unemployment is above Xth percentile # or - # Where median income is less than Y% of the area median income + # Where median income as a percent of area median income is above Xth percentile # or - # Where the percent of households at or below 100% of the federal poverty level is greater than Z% + # Where the percent of households at or below 100% of the federal poverty level + # is above Xth percentile # or - # Where linguistic isolation is greater than Y% + # Where linguistic isolation is above Xth percentile # AND - # Where the high school degree achievement rates for adults 25 years and older is less than 95% - # (necessary to screen out university block groups) + # Where the high school degree achievement rates for adults 25 years and older + # is less than Y% + # (necessary to screen out university tracts) # Workforce criteria for states fields. workforce_eligibility_columns = [ field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD, field_names.POVERTY_LOW_HS_EDUCATION_FIELD, field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD, - field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD, + field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD, ] high_scool_achievement_rate_threshold = ( @@ -546,14 +536,12 @@ class ScoreL(Score): >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) - median_income_threshold = ( + low_median_income_threshold = ( self.df[ - field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD + field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD + field_names.PERCENTILE_FIELD_SUFFIX ] - # Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it. - # and then look for median income lower than that (not greater than). - <= 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) linguistic_isolation_threshold = ( @@ -581,8 +569,8 @@ class ScoreL(Score): poverty_threshold & high_scool_achievement_rate_threshold ) - self.df[field_names.MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = ( - median_income_threshold & high_scool_achievement_rate_threshold + self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = ( + low_median_income_threshold & high_scool_achievement_rate_threshold ) self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = ( @@ -624,23 +612,31 @@ class ScoreL(Score): threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD, ) + # Also check whether low area median income is 90th percentile or higher + # within the islands. + low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name = ( + f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds " + f"{field_names.PERCENTILE}th percentile" + ) + self.df[ + low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name + ] = ( + self.df[ + field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 + + field_names.PERCENTILE_FIELD_SUFFIX + ] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + workforce_combined_criteria_for_island_areas = ( self.df[unemployment_island_areas_criteria_field_name] | self.df[poverty_island_areas_criteria_field_name] - # Also check whether area median income is 10th percentile or lower - # within the islands. - | ( - self.df[ - field_names.CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 - + field_names.PERCENTILE_FIELD_SUFFIX - ] - # Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it. - # and then look for median income lower than that (not greater than). - < 1 - self.ENVIRONMENTAL_BURDEN_THRESHOLD - ) + | self.df[ + low_median_income_as_a_percent_of_ami_island_areas_criteria_field_name + ] ) & ( self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009] - > self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD + >= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD ) percent_of_island_tracts_highlighted = (