diff --git a/client/src/data/constants.tsx b/client/src/data/constants.tsx index 0af137f6..f238e50f 100644 --- a/client/src/data/constants.tsx +++ b/client/src/data/constants.tsx @@ -1,6 +1,6 @@ -import {LngLatBoundsLike} from 'maplibre-gl'; -import {isMobile as isMobileReactDeviceDetect} from 'react-device-detect'; +import { LngLatBoundsLike } from 'maplibre-gl'; +import { isMobile as isMobileReactDeviceDetect } from 'react-device-detect'; const XYZ_SUFFIX = '{z}/{x}/{y}.pbf'; export const featureURLForTilesetName = (tilesetName: string): string => { diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md index bc975401..0f266bdc 100644 --- a/data/data-pipeline/README.md +++ b/data/data-pipeline/README.md @@ -305,12 +305,86 @@ In a bit more detail: #### Updating Pickles -If you update the input our output to various methods, it is necessary to create new pickles so that data is validated correctly. To do this: +If you update the score in any way, it is necessary to create new pickles so that data is validated correctly. -1. Drop a breakpoint just before the dataframe will otherwise be written to / read from disk. If you're using VSCode, use one of the named run targets within `data-pipeline` such as `Score Full Run` , and put a breakpoint in the margin just before the actionable step. More on using breakpoints in VSCode [here](https://code.visualstudio.com/docs/editor/debugging#_breakpoints). If you are not using VSCode, you can put the line `breakpoint()` in your code and it will stop where you have placed the line in whatever calling context you are using. -1. In your editor/terminal, run `df.to_pickle("data_pipeline/etl/score/tests/snapshots/YOUR_OUT_PATH_HERE.pkl", protocol=4)` to write the pickle to the appropriate location on disk. -1. Be sure to do this for all inputs/outputs that have changed as a result of your modification. It is often necessary to do this several times for cascading operations. -1. To inspect your pickle, open a python interpreter, then run `pickle.load( open( "data_pipeline/etl/score/tests/snapshots/YOUR_OUT_PATH_HERE.pkl", "rb" ) )` to get file contents. +It starts with the `data_pipeline/etl/score/tests/sample_data/score_data_initial.csv`, which is the first two rows of the `score/full/usa.csv`. + +To update this file, run a full score generation and then update the file as follows: +``` +import pickle +from pathlib import Path +import pandas as pd +data_path = Path.cwd() + +# score data expected +score_csv_path = data_path / "data_pipeline" / "data" / "score" / "csv" / "full" / "usa.csv" +score_initial_df = pd.read_csv(score_csv_path, dtype={"GEOID10": "string"}, low_memory=False)[:2] +score_initial_df.to_csv(data_path / "data_pipeline" / "etl" / "score" / "tests" / "sample_data" /"score_data_initial.csv", index=False) +``` + +We have four pickle files that correspond to expected files: +- `score_data_expected.pkl`: Initial score without counties +- `score_transformed_expected.pkl`: Intermediate score with `etl._extract_score` and `etl. _transform_score` applied. There's no file for this intermediate process, so we need to capture the pickle mid-process. +- `tile_data_expected.pkl`: Score with columns to be baked in tiles +- `downloadable_data_expected.pk1`: Downloadable csv + +To update the pickles, let's go one by one: + +For the `score_transformed_expected.pkl`, but a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L58), before the `pdt.assert_frame_equal` and run: +`pytest data_pipeline/etl/score/tests/test_score_post.py::test_transform_score` + +Once on the breakpoint, capture the df to a pickle as follows: + +``` +import pickle +from pathlib import Path +data_path = Path.cwd() +score_transformed_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tests" / "snapshots" / "score_transformed_expected.pkl", protocol=4) +``` + +Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_transform_score` + +For the `score_data_expected.pkl`, but a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L78), before the `pdt.assert_frame_equal` and run: +`pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_score_data` + +Once on the breakpoint, capture the df to a pickle as follows: + +``` +import pickle +from pathlib import Path +data_path = Path.cwd() +score_data_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tests" / "snapshots" / "score_data_expected.pkl", protocol=4) +``` + +Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_score_data` + +For the `tile_data_expected.pkl`, but a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L86), before the `pdt.assert_frame_equal` and run: +`pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_tile_data` + +Once on the breakpoint, capture the df to a pickle as follows: + +``` +import pickle +from pathlib import Path +data_path = Path.cwd() +output_tiles_df_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tests" / "snapshots" / "tile_data_expected.pkl", protocol=4) +``` + +Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_tile_data` + +For the `downloadable_data_expected.pk1`, but a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L98), before the `pdt.assert_frame_equal` and run: +`pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_downloadable_data` + +Once on the breakpoint, capture the df to a pickle as follows: + +``` +import pickle +from pathlib import Path +data_path = Path.cwd() +output_downloadable_df_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tests" / "snapshots" / "downloadable_data_expected.pkl", protocol=4) +``` + +Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_downloadable_data` #### Future Enchancements diff --git a/data/data-pipeline/data_pipeline/application.py b/data/data-pipeline/data_pipeline/application.py index 3ec6629d..10f038f2 100644 --- a/data/data-pipeline/data_pipeline/application.py +++ b/data/data-pipeline/data_pipeline/application.py @@ -262,6 +262,7 @@ def data_full_run(check: bool, data_source: str): score_generate() logger.info("*** Running Post Score scripts") + downloadable_cleanup() score_post(data_source) logger.info("*** Combining Score with Census Geojson") diff --git a/data/data-pipeline/data_pipeline/etl/runner.py b/data/data-pipeline/data_pipeline/etl/runner.py index 3e9fb7f0..59179b0c 100644 --- a/data/data-pipeline/data_pipeline/etl/runner.py +++ b/data/data-pipeline/data_pipeline/etl/runner.py @@ -83,9 +83,6 @@ def score_generate() -> None: score_gen.transform() score_gen.load() - # Post Score Processing - score_post() - def score_post(data_source: str = "local") -> None: """Posts the score files to the local directory diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 1b52f618..4f8a6d01 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -69,6 +69,8 @@ TILES_SCORE_COLUMNS = [ "Score E (top 25th percentile)", "Score G (communities)", "Score G", + "Definition L (communities)", + "Definition L (percentile)", "Poverty (Less than 200% of federal poverty line) (percentile)", "Percent individuals age 25 or over with less than high school degree (percentile)", "Linguistic isolation (percent) (percentile)", @@ -95,6 +97,7 @@ TILES_SCORE_FLOAT_COLUMNS = [ "Score D (top 25th percentile)", "Score E (percentile)", "Score E (top 25th percentile)", + "Definition L (percentile)", "Poverty (Less than 200% of federal poverty line)", "Percent individuals age 25 or over with less than high school degree", "Linguistic isolation (percent)", diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py index 1711fa1a..ecb2b46e 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py @@ -31,8 +31,8 @@ class GeoScoreETL(ExtractTransformLoad): self.DATA_PATH / "census" / "geojson" / "us.json" ) - self.TARGET_SCORE_NAME = "Score G" - self.TARGET_SCORE_RENAME_TO = "G_SCORE" + self.TARGET_SCORE_NAME = "Definition L (percentile)" + self.TARGET_SCORE_RENAME_TO = "L_SCORE" self.NUMBER_OF_BUCKETS = 10 diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index 463c1943..1e7af807 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10,Housing burden (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged >=18 years,Coronary heart disease among adults aged >=18 years,Cancer (excluding skin cancer) among adults aged >=18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged >=18 years,Physical health not good for >=14 days among adults aged >=18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals < 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income (% of AMI),Median household income in the past 12 months,Life expectancy (years),Energy burden,FEMA Risk Index Expected Annual Loss Score,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter,Particulate matter (PM2.5),Ozone,Traffic proximity and volume,Proximity to RMP sites,Proximity to TSDF sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployed civilians (percent),Housing + Transportation Costs % Income for the Regional Typical Household,GEOID10 (percentile),Housing burden (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged >=18 years (percentile),Coronary heart disease among adults aged >=18 years (percentile),Cancer (excluding skin cancer) among adults aged >=18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged >=18 years (percentile),Physical health not good for >=14 days among adults aged >=18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals < 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income (% of AMI) (percentile),Median household income in the past 12 months (percentile),Life expectancy (years) (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter (percentile),Particulate matter (PM2.5) (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to RMP sites (percentile),Proximity to TSDF sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployed civilians (percent) (percentile),Housing + Transportation Costs % Income for the Regional Typical Household (percentile),Housing burden (percent) (min-max normalized),Total population (min-max normalized),Median household income (% of state median household income) (min-max normalized),Current asthma among adults aged >=18 years (min-max normalized),Coronary heart disease among adults aged >=18 years (min-max normalized),Cancer (excluding skin cancer) among adults aged >=18 years (min-max normalized),Current lack of health insurance among adults aged 18-64 years (min-max normalized),Diagnosed diabetes among adults aged >=18 years (min-max normalized),Physical health not good for >=14 days among adults aged >=18 years (min-max normalized),Percent of individuals < 100% Federal Poverty Line (min-max normalized),Percent of individuals < 150% Federal Poverty Line (min-max normalized),Percent of individuals < 200% Federal Poverty Line (min-max normalized),Area Median Income (State or metropolitan) (min-max normalized),Median household income (% of AMI) (min-max normalized),Median household income in the past 12 months (min-max normalized),Life expectancy (years) (min-max normalized),Energy burden (min-max normalized),FEMA Risk Index Expected Annual Loss Score (min-max normalized),Air toxics cancer risk (min-max normalized),Respiratory hazard index (min-max normalized),Diesel particulate matter (min-max normalized),Particulate matter (PM2.5) (min-max normalized),Ozone (min-max normalized),Traffic proximity and volume (min-max normalized),Proximity to RMP sites (min-max normalized),Proximity to TSDF sites (min-max normalized),Proximity to NPL sites (min-max normalized),Wastewater discharge (min-max normalized),Percent pre-1960s housing (lead paint indicator) (min-max normalized),Individuals under 5 years old (min-max normalized),Individuals over 64 years old (min-max normalized),Linguistic isolation (percent) (min-max normalized),Percent of households in linguistic isolation (min-max normalized),Poverty (Less than 200% of federal poverty line) (min-max normalized),Percent individuals age 25 or over with less than high school degree (min-max normalized),Unemployed civilians (percent) (min-max normalized),Housing + Transportation Costs % Income for the Regional Typical Household (min-max normalized),Score A,Score B,Socioeconomic Factors,Sensitive populations,Environmental effects,Exposures,Pollution Burden,Population Characteristics,Score C,Score D,Score E,Score A (percentile),Score A (top 25th percentile),Score A (top 30th percentile),Score A (top 35th percentile),Score A (top 40th percentile),Score B (percentile),Score B (top 25th percentile),Score B (top 30th percentile),Score B (top 35th percentile),Score B (top 40th percentile),Score C (percentile),Score C (top 25th percentile),Score C (top 30th percentile),Score C (top 35th percentile),Score C (top 40th percentile),Score D (percentile),Score D (top 25th percentile),Score D (top 30th percentile),Score D (top 35th percentile),Score D (top 40th percentile),Score E (percentile),Score E (top 25th percentile),Score E (top 30th percentile),Score E (top 35th percentile),Score E (top 40th percentile),Poverty (Less than 200% of federal poverty line) (top 25th percentile),Poverty (Less than 200% of federal poverty line) (top 30th percentile),Poverty (Less than 200% of federal poverty line) (top 35th percentile),Poverty (Less than 200% of federal poverty line) (top 40th percentile),"Low AMI, Low HS graduation",Meets socioeconomic criteria,Meets burden criteria,Score F (communities),Score G (communities),Score G,Score G (percentile),Score I (communities),Score I,Score I (percentile),Score H (communities),Score H,NMTC (communities),Score K (communities) -010010201001,0.15,692.0,0.7064864650941903,9.9,6.7,6.7,12.3,10.9,12.8,0.1863013698630137,0.3082191780821918,0.3821917808219178,52649.0,0.6781325381298791,35703.0,73.1,0.03,18.03609814156384,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,91.0159000855,0.0852006888915,0.0655778245369,0.0709415490545,0.0,0.29,0.0491329479769,0.0953757225434,0.0,0.04,0.293352601156,0.195011337868,0.028125,55.0,4.53854358136474e-06,0.15696208303882897,0.12089201345236528,0.2540730285250378,0.5476983764902142,0.5368504002322592,0.4917349849538477,0.4465204908778381,0.5253319139719538,0.4797764044995599,0.7110645998610756,0.7030604504076335,0.634843527218221,0.11922677359534743,0.23460877501286445,0.15726423186721916,0.09969335127536855,0.4633221856296583,0.4144230149893722,0.9797143208291796,0.9829416396964773,0.34627219635208273,0.9086451463612172,0.28414902233020944,0.3410837232734089,0.13480504509083976,0.13460988594536452,0.5500810137382961,0.18238709002315753,0.5188510118774764,0.4494787435381899,0.25320991408459015,0.2596066814778244,0.7027453899325112,0.46606500161119757,0.7623733167523703,0.3628393561824028,0.5794871072813119,0.10909090909090909,0.013340530536705737,0.13782009152761684,0.29605263157894735,0.17464788732394368,0.30500000000000005,0.1549520766773163,0.24047619047619045,0.24193548387096772,0.1863013698630137,0.3082191780821918,0.3821917808219178,0.16592212135690396,0.13716567578211192,0.13415649166471383,0.40776699029126207,0.05555555555555555,0.18036098141563842,0.028853697167088285,0.18277886087526787,0.045859591901569303,0.5883290826337872,0.3121515260630353,0.0024222132770710053,0.004621252164336263,0.00015416214761450488,0.007893014211979786,0.0,0.29,0.09433526011570838,0.0953757225434,0.0,0.04,0.293352601156,0.195011337868,0.028125,0.2711864406779661,0.6142191591817839,0.3553155211005275,0.5747020343519587,0.3207651130335348,0.3041468093350269,0.640467674807096,0.5283607196497396,0.4477335736927467,0.23656483320764937,0.12511596962298183,0.4015692878125249,0.6357808408182161,False,False,False,True,0.6315486105122701,False,False,False,True,0.5104500914524833,False,False,False,False,0.4426779344086705,False,False,False,False,0.35171603111639604,False,False,False,False,False,False,False,False,False,False,True,False,True,1,1,True,1,1,True,1,True,True -010010201002,0.15,1153.0,1.5632420452746556,9.9,6.7,6.7,12.3,10.9,12.8,0.1551860649247822,0.1955661124307205,0.2129849564528899,52649.0,1.5005033333966458,79000.0,73.1,0.03,18.03609814156384,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,2.61874365577,0.0737963352265,0.0604962870646,0.0643436665275,0.0,0.094623655914,0.0416305290546,0.150043365134,0.0,0.0,0.182133564614,0.039119804401,0.0287878787878787,57.0,9.07708716272948e-06,0.15696208303882897,0.42875102685480615,0.8522662275360714,0.5476983764902142,0.5368504002322592,0.4917349849538477,0.4465204908778381,0.5253319139719538,0.4797764044995599,0.645061784813366,0.5006443534530033,0.349989032281651,0.11922677359534743,0.8592240550695651,0.7098233128945732,0.09969335127536855,0.4633221856296583,0.4144230149893722,0.9797143208291796,0.9829416396964773,0.34627219635208273,0.9086451463612172,0.28414902233020944,0.09634507767787849,0.11004706512415299,0.1228504127842856,0.5178479846414291,0.18238709002315753,0.28270163797524656,0.3660890561105236,0.5188963977252613,0.2596066814778244,0.25592171848974055,0.2701365660159849,0.2207635715031339,0.3696173450745396,0.6379947997334159,0.10909090909090909,0.022227791486736582,0.3122290170974229,0.29605263157894735,0.17464788732394368,0.30500000000000005,0.1549520766773163,0.24047619047619045,0.24193548387096772,0.1551860649247822,0.1955661124307205,0.2129849564528899,0.16592212135690396,0.31058082624893096,0.30909245177816747,0.40776699029126207,0.05555555555555555,0.18036098141563842,0.028853697167088285,0.18277886087526787,0.045859591901569303,0.5883290826337872,0.3121515260630353,6.96928300032502e-05,0.004002684465613169,0.00014221633002379553,0.007158928457599425,0.0,0.094623655914,0.07993061578488315,0.150043365134,0.0,0.0,0.182133564614,0.039119804401,0.0287878787878787,0.2824858757062147,0.24545006875955938,0.05963631310728093,0.350886800163363,0.38153071177120307,0.2431668381096544,0.5996779005411742,0.4808408797306676,0.36620875596728303,0.17608814038438173,0.07182643137875756,0.2554172494220624,0.21102603786087423,False,False,False,False,0.2509565067420677,False,False,False,False,0.2850458170133389,False,False,False,False,0.16238982635453447,False,False,False,False,0.11055942342080659,False,False,False,False,False,False,False,False,False,False,True,False,False,0,0,False,0,0,False,0,False,False +GEOID10,Persistent Poverty Census Tract,Housing burden (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged >=18 years,Coronary heart disease among adults aged >=18 years,Cancer (excluding skin cancer) among adults aged >=18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged >=18 years,Physical health not good for >=14 days among adults aged >=18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals < 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income (% of AMI),Median household income in the past 12 months,Life expectancy (years),Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter,Particulate matter (PM2.5),Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to TSDF sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployed civilians (percent),Housing + Transportation Costs % Income for the Regional Typical Household,Housing burden (percent) (percentile),Housing burden (percent) (min-max normalized),Total population (percentile),Total population (min-max normalized),Median household income (% of state median household income) (percentile),Median household income (% of state median household income) (min-max normalized),Current asthma among adults aged >=18 years (percentile),Current asthma among adults aged >=18 years (min-max normalized),Coronary heart disease among adults aged >=18 years (percentile),Coronary heart disease among adults aged >=18 years (min-max normalized),Cancer (excluding skin cancer) among adults aged >=18 years (percentile),Cancer (excluding skin cancer) among adults aged >=18 years (min-max normalized),Current lack of health insurance among adults aged 18-64 years (percentile),Current lack of health insurance among adults aged 18-64 years (min-max normalized),Diagnosed diabetes among adults aged >=18 years (percentile),Diagnosed diabetes among adults aged >=18 years (min-max normalized),Physical health not good for >=14 days among adults aged >=18 years (percentile),Physical health not good for >=14 days among adults aged >=18 years (min-max normalized),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 100% Federal Poverty Line (min-max normalized),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (min-max normalized),Percent of individuals < 200% Federal Poverty Line (percentile),Percent of individuals < 200% Federal Poverty Line (min-max normalized),Area Median Income (State or metropolitan) (percentile),Area Median Income (State or metropolitan) (min-max normalized),Median household income (% of AMI) (percentile),Median household income (% of AMI) (min-max normalized),Median household income in the past 12 months (percentile),Median household income in the past 12 months (min-max normalized),Life expectancy (years) (percentile),Life expectancy (years) (min-max normalized),Energy burden (percentile),Energy burden (min-max normalized),FEMA Risk Index Expected Annual Loss Score (percentile),FEMA Risk Index Expected Annual Loss Score (min-max normalized),Urban Heuristic Flag (percentile),Urban Heuristic Flag (min-max normalized),Air toxics cancer risk (percentile),Air toxics cancer risk (min-max normalized),Respiratory hazard index (percentile),Respiratory hazard index (min-max normalized),Diesel particulate matter (percentile),Diesel particulate matter (min-max normalized),Particulate matter (PM2.5) (percentile),Particulate matter (PM2.5) (min-max normalized),Ozone (percentile),Ozone (min-max normalized),Traffic proximity and volume (percentile),Traffic proximity and volume (min-max normalized),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to Risk Management Plan (RMP) facilities (min-max normalized),Proximity to TSDF sites (percentile),Proximity to TSDF sites (min-max normalized),Proximity to NPL sites (percentile),Proximity to NPL sites (min-max normalized),Wastewater discharge (percentile),Wastewater discharge (min-max normalized),Percent pre-1960s housing (lead paint indicator) (percentile),Percent pre-1960s housing (lead paint indicator) (min-max normalized),Individuals under 5 years old (percentile),Individuals under 5 years old (min-max normalized),Individuals over 64 years old (percentile),Individuals over 64 years old (min-max normalized),Linguistic isolation (percent) (percentile),Linguistic isolation (percent) (min-max normalized),Percent of households in linguistic isolation (percentile),Percent of households in linguistic isolation (min-max normalized),Poverty (Less than 200% of federal poverty line) (percentile),Poverty (Less than 200% of federal poverty line) (min-max normalized),Percent individuals age 25 or over with less than high school degree (percentile),Percent individuals age 25 or over with less than high school degree (min-max normalized),Unemployed civilians (percent) (percentile),Unemployed civilians (percent) (min-max normalized),Housing + Transportation Costs % Income for the Regional Typical Household (percentile),Housing + Transportation Costs % Income for the Regional Typical Household (min-max normalized),Score A,Score B,Socioeconomic Factors,Sensitive populations,Environmental effects,Exposures,Pollution Burden,Population Characteristics,Score C,Score D,Score E,"Low AMI, Low HS graduation",Meets socioeconomic criteria,Meets burden criteria,Score F (communities),Score G (communities),Score G,Score G (percentile),Score H (communities),Score H,Score I (communities),Score I,Score I (percentile),NMTC (communities),Score K (communities),Climate Factor (Definition L),Energy Factor (Definition L),Transportation Factor (Definition L),Housing Factor (Definition L),Pollution Factor (Definition L),Water Factor (Definition L),Health Factor (Definition L),Workforce Factor (Definition L),Definition L (communities),Any Non-Workforce Factor (Definition L),Definition L (percentile),Score A (percentile),Score A (top 25th percentile),Score A (top 30th percentile),Score A (top 35th percentile),Score A (top 40th percentile),Score B (percentile),Score B (top 25th percentile),Score B (top 30th percentile),Score B (top 35th percentile),Score B (top 40th percentile),Score C (percentile),Score C (top 25th percentile),Score C (top 30th percentile),Score C (top 35th percentile),Score C (top 40th percentile),Score D (percentile),Score D (top 25th percentile),Score D (top 30th percentile),Score D (top 35th percentile),Score D (top 40th percentile),Score E (percentile),Score E (top 25th percentile),Score E (top 30th percentile),Score E (top 35th percentile),Score E (top 40th percentile) +010010201001,False,0.15,692.0,0.7064864650941903,9.9,6.7,6.7,12.3,10.9,12.8,0.1863013698630137,0.3082191780821918,0.3821917808219178,52649.0,0.6781325381298791,35703.0,73.1,0.03,18.03609814156384,1.0,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,91.0159000855,0.0852006888915,0.0655778245369,0.0709415490545,0.0,0.29,0.0491329479769,0.0953757225434,0.0,0.04,0.293352601156,0.195011337868,0.028125,55.0,0.1569620830388289,0.109090909090909,0.1208920134523652,0.0133405305367057,0.2540730285250378,0.1378200915276168,0.5476983764902142,0.2960526315789473,0.5368504002322592,0.1746478873239436,0.4917349849538477,0.305,0.4465204908778381,0.1549520766773163,0.5253319139719538,0.2404761904761904,0.4797764044995599,0.2419354838709677,0.7110645998610756,0.1863013698630137,0.7030604504076335,0.3082191780821918,0.634843527218221,0.3821917808219178,0.1191862722261045,0.1659221213569039,0.2346087750128644,0.1371656757821119,0.1572642318672191,0.1341564916647138,0.0996933512753685,0.407766990291262,0.4633221856296583,0.0555555555555555,0.4144230149893722,0.1803609814156384,0.5963434959162439,1.0,0.9797143208291796,0.0288536971670882,0.9829416396964772,0.1827788608752678,0.3462721963520827,0.0458595919015693,0.9086451463612172,0.5883290826337872,0.2841490223302094,0.3121515260630353,0.3410837232734089,0.002422213277071,0.1348050450908397,0.0046212521643362,0.1346098859453645,0.0001541621476145,0.5500810137382961,0.0078930142119797,0.1823870900231575,0.0,0.5188510118774764,0.29,0.4494787435381899,0.0943352601157083,0.2532099140845901,0.0953757225434,0.2596066814778244,0.0,0.7027453899325112,0.04,0.4660650016111975,0.293352601156,0.7623733167523703,0.195011337868,0.3628393561824028,0.028125,0.5794871072813119,0.2711864406779661,0.6142191591817839,0.3553155211005275,0.5747020343519587,0.3207651130335348,0.3041468093350269,0.640467674807096,0.5283607196497396,0.4477335736927467,0.2365648332076493,0.1251159696229818,0.4015692878125249,False,False,True,False,True,1,1,True,1,True,1,1,True,True,False,True,False,False,False,False,True,False,True,True,1,0.6357808408182161,False,False,False,True,0.6315486105122701,False,False,False,True,0.5104500914524833,False,False,False,False,0.4426779344086705,False,False,False,False,0.351716031116396,False,False,False,False +010010201002,False,0.15,1153.0,1.5632420452746556,9.9,6.7,6.7,12.3,10.9,12.8,0.1551860649247822,0.1955661124307205,0.2129849564528899,52649.0,1.5005033333966458,79000.0,73.1,0.03,18.03609814156384,1.0,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,2.61874365577,0.0737963352265,0.0604962870646,0.0643436665275,0.0,0.094623655914,0.0416305290546,0.150043365134,0.0,0.0,0.182133564614,0.039119804401,0.0287878787878787,57.0,0.1569620830388289,0.109090909090909,0.4287510268548061,0.0222277914867365,0.8522662275360714,0.3122290170974229,0.5476983764902142,0.2960526315789473,0.5368504002322592,0.1746478873239436,0.4917349849538477,0.305,0.4465204908778381,0.1549520766773163,0.5253319139719538,0.2404761904761904,0.4797764044995599,0.2419354838709677,0.645061784813366,0.1551860649247822,0.5006443534530033,0.1955661124307205,0.349989032281651,0.2129849564528899,0.1191862722261045,0.1659221213569039,0.8592240550695651,0.3105808262489309,0.7098233128945732,0.3090924517781674,0.0996933512753685,0.407766990291262,0.4633221856296583,0.0555555555555555,0.4144230149893722,0.1803609814156384,0.5963434959162439,1.0,0.9797143208291796,0.0288536971670882,0.9829416396964772,0.1827788608752678,0.3462721963520827,0.0458595919015693,0.9086451463612172,0.5883290826337872,0.2841490223302094,0.3121515260630353,0.0963450776778784,6.96928300032502e-05,0.1100470651241529,0.0040026844656131,0.1228504127842856,0.0001422163300237,0.5178479846414291,0.0071589284575994,0.1823870900231575,0.0,0.2827016379752465,0.094623655914,0.3660890561105236,0.0799306157848831,0.5188963977252613,0.150043365134,0.2596066814778244,0.0,0.2559217184897405,0.0,0.2701365660159849,0.182133564614,0.2207635715031339,0.039119804401,0.3696173450745396,0.0287878787878787,0.6379947997334159,0.2824858757062147,0.2454500687595593,0.0596363131072809,0.350886800163363,0.381530711771203,0.2431668381096544,0.5996779005411742,0.4808408797306676,0.366208755967283,0.1760881403843817,0.0718264313787575,0.2554172494220624,False,False,True,False,False,0,0,False,0,False,0,0,False,False,False,False,False,False,False,False,False,False,False,False,0,0.2110260378608742,False,False,False,False,0.2509565067420677,False,False,False,False,0.2850458170133389,False,False,False,False,0.1623898263545344,False,False,False,False,0.1105594234208065,False,False,False,False diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 23a6752f..2b3bab06 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index 9475f269..f8fb284d 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index 43d8f79f..68ca05cf 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 597f73d7..198bdc2b 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py b/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py index 3bc33b37..9ef74d71 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py @@ -107,7 +107,7 @@ def check_census_data_source( # check if census data is found locally if not os.path.isfile(census_data_path / "geojson" / "us.json"): logger.info( - "No local census data found. Please use '-d aws` to fetch from AWS" + "No local census data found. Please use '-s aws` to fetch from AWS" ) sys.exit()