diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md index 5a0e7fba..63dba798 100644 --- a/data/data-pipeline/README.md +++ b/data/data-pipeline/README.md @@ -283,7 +283,7 @@ In a bit more detail: If you update the input our output to various methods, it is necessary to create new pickles so that data is validated correctly. To do this: 1. Drop a breakpoint just before the dataframe will otherwise be written to / read from disk. If you're using VSCode, use one of the named run targets within `data-pipeline` such as `Score Full Run` , and put a breakpoint in the margin just before the actionable step. More on using breakpoints in VSCode [here](https://code.visualstudio.com/docs/editor/debugging#_breakpoints). If you are not using VSCode, you can put the line `breakpoint()` in your code and it will stop where you have placed the line in whatever calling context you are using. -1. In your editor/terminal, run `df.to_pickle("data_pipeline/etl/score/tests/snapshots/YOUR_OUT_PATH_HERE.pkl")` to write the pickle to the appropriate location on disk. +1. In your editor/terminal, run `df.to_pickle("data_pipeline/etl/score/tests/snapshots/YOUR_OUT_PATH_HERE.pkl", protocol=4)` to write the pickle to the appropriate location on disk. 1. Be sure to do this for all inputs/outputs that have changed as a result of your modification. It is often necessary to do this several times for cascading operations. 1. To inspect your pickle, open a python interpreter, then run `pickle.load( open( "data_pipeline/etl/score/tests/snapshots/YOUR_OUT_PATH_HERE.pkl", "rb" ) )` to get file contents. diff --git a/data/data-pipeline/data_pipeline/etl/base.py b/data/data-pipeline/data_pipeline/etl/base.py index 8350e28a..ccd69fc0 100644 --- a/data/data-pipeline/data_pipeline/etl/base.py +++ b/data/data-pipeline/data_pipeline/etl/base.py @@ -19,6 +19,7 @@ class ExtractTransformLoad: DATA_PATH: Path = settings.APP_ROOT / "data" TMP_PATH: Path = DATA_PATH / "tmp" + FILES_PATH: Path = settings.APP_ROOT / "files" GEOID_FIELD_NAME: str = "GEOID10" GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT" # TODO: investigate. Census says there are only 217,740 CBGs in the US. diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index b4b429cb..7f5485d3 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -43,7 +43,8 @@ DATA_SCORE_TILES_DIR = DATA_SCORE_DIR / "tiles" SCORE_DOWNLOADABLE_DIR = DATA_SCORE_DIR / "downloadable" SCORE_DOWNLOADABLE_CSV_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.csv" SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = SCORE_DOWNLOADABLE_DIR / "usa.xlsx" -SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / "Draft_Communities_List.pdf" +SCORE_DOWNLOADABLE_PDF_FILE_NAME = "Draft_Communities_List.pdf" +SCORE_DOWNLOADABLE_PDF_FILE_PATH = FILES_PATH / SCORE_DOWNLOADABLE_PDF_FILE_NAME SCORE_DOWNLOADABLE_ZIP_FILE_PATH = ( SCORE_DOWNLOADABLE_DIR / "Screening_Tool_Data.zip" ) diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py b/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py index c1a1c4af..63081c29 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py +++ b/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py @@ -41,6 +41,7 @@ def etl(monkeypatch, root): etl = PostScoreETL() monkeypatch.setattr(etl, "DATA_PATH", root) monkeypatch.setattr(etl, "TMP_PATH", tmp_path) + return etl @@ -65,6 +66,11 @@ def score_data_initial(sample_data_dir): return sample_data_dir / "score_data_initial.csv" +@pytest.fixture() +def score_pdf_initial(sample_data_dir): + return sample_data_dir / "Draft_Communities_List.pdf" + + @pytest.fixture() def counties_transformed_expected(): return pd.DataFrame.from_dict( diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index 8f7259a3..463c1943 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10,Housing burden (percent),Total population,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter,Particulate matter (PM2.5),Ozone,Traffic proximity and volume,Proximity to RMP sites,Proximity to TSDF sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployed civilians (percent),Housing + Transportation Costs % Income for the Regional Typical Household,GEOID10 (percentile),Housing burden (percent) (percentile),Total population (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter (percentile),Particulate matter (PM2.5) (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to RMP sites (percentile),Proximity to TSDF sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployed civilians (percent) (percentile),Housing + Transportation Costs % Income for the Regional Typical Household (percentile),Housing burden (percent) (min-max normalized),Total population (min-max normalized),Air toxics cancer risk (min-max normalized),Respiratory hazard index (min-max normalized),Diesel particulate matter (min-max normalized),Particulate matter (PM2.5) (min-max normalized),Ozone (min-max normalized),Traffic proximity and volume (min-max normalized),Proximity to RMP sites (min-max normalized),Proximity to TSDF sites (min-max normalized),Proximity to NPL sites (min-max normalized),Wastewater discharge (min-max normalized),Percent pre-1960s housing (lead paint indicator) (min-max normalized),Individuals under 5 years old (min-max normalized),Individuals over 64 years old (min-max normalized),Linguistic isolation (percent) (min-max normalized),Percent of households in linguistic isolation (min-max normalized),Poverty (Less than 200% of federal poverty line) (min-max normalized),Percent individuals age 25 or over with less than high school degree (min-max normalized),Unemployed civilians (percent) (min-max normalized),Housing + Transportation Costs % Income for the Regional Typical Household (min-max normalized),Score A,Score B,Socioeconomic Factors,Sensitive populations,Environmental effects,Exposures,Pollution Burden,Population Characteristics,Score C,Score D,Score E,Score A (percentile),Score A (top 25th percentile),Score A (top 30th percentile),Score A (top 35th percentile),Score A (top 40th percentile),Score B (percentile),Score B (top 25th percentile),Score B (top 30th percentile),Score B (top 35th percentile),Score B (top 40th percentile),Score C (percentile),Score C (top 25th percentile),Score C (top 30th percentile),Score C (top 35th percentile),Score C (top 40th percentile),Score D (percentile),Score D (top 25th percentile),Score D (top 30th percentile),Score D (top 35th percentile),Score D (top 40th percentile),Score E (percentile),Score E (top 25th percentile),Score E (top 30th percentile),Score E (top 35th percentile),Score E (top 40th percentile),Poverty (Less than 200% of federal poverty line) (top 25th percentile),Poverty (Less than 200% of federal poverty line) (top 30th percentile),Poverty (Less than 200% of federal poverty line) (top 35th percentile),Poverty (Less than 200% of federal poverty line) (top 40th percentile) -010010201001,0.15,692,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,91.0159000855,0.0852006888915,0.0655778245369,0.0709415490545,0.0,0.29,0.0491329479769,0.0953757225434,0.0,0.04,0.293352601156,0.195011337868,0.028125,55.0,4.53858477849437e-06,0.15696279879978475,0.12089201345236528,0.9797143208291796,0.9829416396964773,0.34627219635208273,0.9086451463612172,0.28414902233020944,0.3410837232734089,0.13480504509083976,0.13460988594536452,0.5500810137382961,0.18238709002315753,0.5188510118774764,0.4494787435381899,0.25320991408459015,0.2596066814778244,0.7027453899325112,0.46606500161119757,0.7623733167523703,0.3628393561824028,0.5794871072813119,0.10909090909090909,0.013340530536705737,0.028853697167088285,0.18277886087526787,0.045859591901569303,0.5883290826337872,0.3121515260630353,0.0024222132770710053,0.004621252164336263,0.00015416214761450488,0.007893014211979786,0.0,0.29,0.09433526011570838,0.0953757225434,0.0,0.04,0.293352601156,0.195011337868,0.028125,0.2711864406779661,0.6142191591817839,0.3553155211005275,0.5747020343519587,0.3207651130335348,0.3041468093350269,0.640467674807096,0.5283607196497396,0.4477335736927467,0.23656483320764937,0.12511596962298183,0.4015694309647159,0.6357808408182161,False,False,False,True,0.6315486105122701,False,False,False,True,0.5104500914524833,False,False,False,False,0.44267994354000534,False,False,False,False,0.3517176274094212,False,False,False,False,False,False,False,False -010010201002,0.15,1153,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,2.61874365577,0.0737963352265,0.0604962870646,0.0643436665275,0.0,0.094623655914,0.0416305290546,0.150043365134,0.0,0.0,0.182133564614,0.039119804401,0.0287878787878787,57.0,9.07716955698874e-06,0.15696279879978475,0.42875102685480615,0.9797143208291796,0.9829416396964773,0.34627219635208273,0.9086451463612172,0.28414902233020944,0.09634507767787849,0.11004706512415299,0.1228504127842856,0.5178479846414291,0.18238709002315753,0.28270163797524656,0.3660890561105236,0.5188963977252613,0.2596066814778244,0.25592171848974055,0.2701365660159849,0.2207635715031339,0.3696173450745396,0.6379947997334159,0.10909090909090909,0.022227791486736582,0.028853697167088285,0.18277886087526787,0.045859591901569303,0.5883290826337872,0.3121515260630353,6.96928300032502e-05,0.004002684465613169,0.00014221633002379553,0.007158928457599425,0.0,0.094623655914,0.07993061578488315,0.150043365134,0.0,0.0,0.182133564614,0.039119804401,0.0287878787878787,0.2824858757062147,0.24545006875955938,0.05963631310728093,0.350886800163363,0.38153071177120307,0.2431668381096544,0.5996779005411742,0.4808408797306676,0.36620875596728303,0.17608814038438173,0.07182643137875756,0.2554173925742535,0.21102603786087423,False,False,False,False,0.2509565067420677,False,False,False,False,0.2850458170133389,False,False,False,False,0.16239056337452856,False,False,False,False,0.11055992520412285,False,False,False,False,False,False,False,False \ No newline at end of file +GEOID10,Housing burden (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged >=18 years,Coronary heart disease among adults aged >=18 years,Cancer (excluding skin cancer) among adults aged >=18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged >=18 years,Physical health not good for >=14 days among adults aged >=18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals < 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income (% of AMI),Median household income in the past 12 months,Life expectancy (years),Energy burden,FEMA Risk Index Expected Annual Loss Score,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter,Particulate matter (PM2.5),Ozone,Traffic proximity and volume,Proximity to RMP sites,Proximity to TSDF sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployed civilians (percent),Housing + Transportation Costs % Income for the Regional Typical Household,GEOID10 (percentile),Housing burden (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged >=18 years (percentile),Coronary heart disease among adults aged >=18 years (percentile),Cancer (excluding skin cancer) among adults aged >=18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged >=18 years (percentile),Physical health not good for >=14 days among adults aged >=18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals < 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income (% of AMI) (percentile),Median household income in the past 12 months (percentile),Life expectancy (years) (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter (percentile),Particulate matter (PM2.5) (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to RMP sites (percentile),Proximity to TSDF sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployed civilians (percent) (percentile),Housing + Transportation Costs % Income for the Regional Typical Household (percentile),Housing burden (percent) (min-max normalized),Total population (min-max normalized),Median household income (% of state median household income) (min-max normalized),Current asthma among adults aged >=18 years (min-max normalized),Coronary heart disease among adults aged >=18 years (min-max normalized),Cancer (excluding skin cancer) among adults aged >=18 years (min-max normalized),Current lack of health insurance among adults aged 18-64 years (min-max normalized),Diagnosed diabetes among adults aged >=18 years (min-max normalized),Physical health not good for >=14 days among adults aged >=18 years (min-max normalized),Percent of individuals < 100% Federal Poverty Line (min-max normalized),Percent of individuals < 150% Federal Poverty Line (min-max normalized),Percent of individuals < 200% Federal Poverty Line (min-max normalized),Area Median Income (State or metropolitan) (min-max normalized),Median household income (% of AMI) (min-max normalized),Median household income in the past 12 months (min-max normalized),Life expectancy (years) (min-max normalized),Energy burden (min-max normalized),FEMA Risk Index Expected Annual Loss Score (min-max normalized),Air toxics cancer risk (min-max normalized),Respiratory hazard index (min-max normalized),Diesel particulate matter (min-max normalized),Particulate matter (PM2.5) (min-max normalized),Ozone (min-max normalized),Traffic proximity and volume (min-max normalized),Proximity to RMP sites (min-max normalized),Proximity to TSDF sites (min-max normalized),Proximity to NPL sites (min-max normalized),Wastewater discharge (min-max normalized),Percent pre-1960s housing (lead paint indicator) (min-max normalized),Individuals under 5 years old (min-max normalized),Individuals over 64 years old (min-max normalized),Linguistic isolation (percent) (min-max normalized),Percent of households in linguistic isolation (min-max normalized),Poverty (Less than 200% of federal poverty line) (min-max normalized),Percent individuals age 25 or over with less than high school degree (min-max normalized),Unemployed civilians (percent) (min-max normalized),Housing + Transportation Costs % Income for the Regional Typical Household (min-max normalized),Score A,Score B,Socioeconomic Factors,Sensitive populations,Environmental effects,Exposures,Pollution Burden,Population Characteristics,Score C,Score D,Score E,Score A (percentile),Score A (top 25th percentile),Score A (top 30th percentile),Score A (top 35th percentile),Score A (top 40th percentile),Score B (percentile),Score B (top 25th percentile),Score B (top 30th percentile),Score B (top 35th percentile),Score B (top 40th percentile),Score C (percentile),Score C (top 25th percentile),Score C (top 30th percentile),Score C (top 35th percentile),Score C (top 40th percentile),Score D (percentile),Score D (top 25th percentile),Score D (top 30th percentile),Score D (top 35th percentile),Score D (top 40th percentile),Score E (percentile),Score E (top 25th percentile),Score E (top 30th percentile),Score E (top 35th percentile),Score E (top 40th percentile),Poverty (Less than 200% of federal poverty line) (top 25th percentile),Poverty (Less than 200% of federal poverty line) (top 30th percentile),Poverty (Less than 200% of federal poverty line) (top 35th percentile),Poverty (Less than 200% of federal poverty line) (top 40th percentile),"Low AMI, Low HS graduation",Meets socioeconomic criteria,Meets burden criteria,Score F (communities),Score G (communities),Score G,Score G (percentile),Score I (communities),Score I,Score I (percentile),Score H (communities),Score H,NMTC (communities),Score K (communities) +010010201001,0.15,692.0,0.7064864650941903,9.9,6.7,6.7,12.3,10.9,12.8,0.1863013698630137,0.3082191780821918,0.3821917808219178,52649.0,0.6781325381298791,35703.0,73.1,0.03,18.03609814156384,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,91.0159000855,0.0852006888915,0.0655778245369,0.0709415490545,0.0,0.29,0.0491329479769,0.0953757225434,0.0,0.04,0.293352601156,0.195011337868,0.028125,55.0,4.53854358136474e-06,0.15696208303882897,0.12089201345236528,0.2540730285250378,0.5476983764902142,0.5368504002322592,0.4917349849538477,0.4465204908778381,0.5253319139719538,0.4797764044995599,0.7110645998610756,0.7030604504076335,0.634843527218221,0.11922677359534743,0.23460877501286445,0.15726423186721916,0.09969335127536855,0.4633221856296583,0.4144230149893722,0.9797143208291796,0.9829416396964773,0.34627219635208273,0.9086451463612172,0.28414902233020944,0.3410837232734089,0.13480504509083976,0.13460988594536452,0.5500810137382961,0.18238709002315753,0.5188510118774764,0.4494787435381899,0.25320991408459015,0.2596066814778244,0.7027453899325112,0.46606500161119757,0.7623733167523703,0.3628393561824028,0.5794871072813119,0.10909090909090909,0.013340530536705737,0.13782009152761684,0.29605263157894735,0.17464788732394368,0.30500000000000005,0.1549520766773163,0.24047619047619045,0.24193548387096772,0.1863013698630137,0.3082191780821918,0.3821917808219178,0.16592212135690396,0.13716567578211192,0.13415649166471383,0.40776699029126207,0.05555555555555555,0.18036098141563842,0.028853697167088285,0.18277886087526787,0.045859591901569303,0.5883290826337872,0.3121515260630353,0.0024222132770710053,0.004621252164336263,0.00015416214761450488,0.007893014211979786,0.0,0.29,0.09433526011570838,0.0953757225434,0.0,0.04,0.293352601156,0.195011337868,0.028125,0.2711864406779661,0.6142191591817839,0.3553155211005275,0.5747020343519587,0.3207651130335348,0.3041468093350269,0.640467674807096,0.5283607196497396,0.4477335736927467,0.23656483320764937,0.12511596962298183,0.4015692878125249,0.6357808408182161,False,False,False,True,0.6315486105122701,False,False,False,True,0.5104500914524833,False,False,False,False,0.4426779344086705,False,False,False,False,0.35171603111639604,False,False,False,False,False,False,False,False,False,False,True,False,True,1,1,True,1,1,True,1,True,True +010010201002,0.15,1153.0,1.5632420452746556,9.9,6.7,6.7,12.3,10.9,12.8,0.1551860649247822,0.1955661124307205,0.2129849564528899,52649.0,1.5005033333966458,79000.0,73.1,0.03,18.03609814156384,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,2.61874365577,0.0737963352265,0.0604962870646,0.0643436665275,0.0,0.094623655914,0.0416305290546,0.150043365134,0.0,0.0,0.182133564614,0.039119804401,0.0287878787878787,57.0,9.07708716272948e-06,0.15696208303882897,0.42875102685480615,0.8522662275360714,0.5476983764902142,0.5368504002322592,0.4917349849538477,0.4465204908778381,0.5253319139719538,0.4797764044995599,0.645061784813366,0.5006443534530033,0.349989032281651,0.11922677359534743,0.8592240550695651,0.7098233128945732,0.09969335127536855,0.4633221856296583,0.4144230149893722,0.9797143208291796,0.9829416396964773,0.34627219635208273,0.9086451463612172,0.28414902233020944,0.09634507767787849,0.11004706512415299,0.1228504127842856,0.5178479846414291,0.18238709002315753,0.28270163797524656,0.3660890561105236,0.5188963977252613,0.2596066814778244,0.25592171848974055,0.2701365660159849,0.2207635715031339,0.3696173450745396,0.6379947997334159,0.10909090909090909,0.022227791486736582,0.3122290170974229,0.29605263157894735,0.17464788732394368,0.30500000000000005,0.1549520766773163,0.24047619047619045,0.24193548387096772,0.1551860649247822,0.1955661124307205,0.2129849564528899,0.16592212135690396,0.31058082624893096,0.30909245177816747,0.40776699029126207,0.05555555555555555,0.18036098141563842,0.028853697167088285,0.18277886087526787,0.045859591901569303,0.5883290826337872,0.3121515260630353,6.96928300032502e-05,0.004002684465613169,0.00014221633002379553,0.007158928457599425,0.0,0.094623655914,0.07993061578488315,0.150043365134,0.0,0.0,0.182133564614,0.039119804401,0.0287878787878787,0.2824858757062147,0.24545006875955938,0.05963631310728093,0.350886800163363,0.38153071177120307,0.2431668381096544,0.5996779005411742,0.4808408797306676,0.36620875596728303,0.17608814038438173,0.07182643137875756,0.2554172494220624,0.21102603786087423,False,False,False,False,0.2509565067420677,False,False,False,False,0.2850458170133389,False,False,False,False,0.16238982635453447,False,False,False,False,0.11055942342080659,False,False,False,False,False,False,False,False,False,False,True,False,False,0,0,False,0,0,False,0,False,False diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index ba72a170..23a6752f 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index 0d5f227a..9475f269 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index df253dbf..43d8f79f 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 0c264604..597f73d7 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py index 24e3a293..84d560e7 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py @@ -2,9 +2,10 @@ ## Above disables warning about access to underscore-prefixed methods from importlib import reload - +from pathlib import Path import pandas.api.types as ptypes import pandas.testing as pdt + from data_pipeline.etl.score import constants # See conftest.py for all fixtures used in these tests @@ -117,8 +118,17 @@ def test_load_tile_csv(etl, tile_data_expected): assert constants.DATA_SCORE_CSV_TILES_FILE_PATH.is_file() -def test_load_downloadable_zip(etl, downloadable_data_expected): +def test_load_downloadable_zip(etl, monkeypatch, downloadable_data_expected): reload(constants) + STATIC_FILES_PATH = ( + Path.cwd() / "data_pipeline" / "files" + ) # need to monkeypatch to real dir + monkeypatch.setattr(constants, "FILES_PATH", STATIC_FILES_PATH) + monkeypatch.setattr( + constants, + "SCORE_DOWNLOADABLE_PDF_FILE_PATH", + STATIC_FILES_PATH / constants.SCORE_DOWNLOADABLE_PDF_FILE_NAME, + ) etl._load_downloadable_zip( downloadable_data_expected, constants.SCORE_DOWNLOADABLE_DIR ) diff --git a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv index 7923f888..715ab55e 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv +++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/input.csv @@ -1,6 +1,6 @@ -TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL -40300,05007040300,10.492015,Very Low,15.3494 -20100,05001020100,14.705854,Relatively Low,36.725828 -40500,15007040500,10.234981,Very Low,13.997993 -21010,15001021010,21.537231,Relatively Moderate,59.488033 -21101,15001021101,19.434585,Relatively Low,53.392265 +TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE +40300,05007040300,10.492015,Very Low,15.3494,11.5 +20100,05001020100,14.705854,Relatively Low,36.725828,12.5 +40500,15007040500,10.234981,Very Low,13.997993,13.5 +21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5 +21101,15001021101,19.434585,Relatively Low,53.392265,15.5 diff --git a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv index 3bebd416..67ac5d00 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv +++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/output.csv @@ -1,11 +1,11 @@ -GEOID10,GEOID10_TRACT,TRACT,RISK_SCORE,RISK_RATNG,RISK_NPCTL -050070403001,05007040300,40300,10.492015,Very Low,15.3494 -050070403002,05007040300,40300,10.492015,Very Low,15.3494 -050010201001,05001020100,20100,14.705854,Relatively Low,36.725828 -050010201002,05001020100,20100,14.705854,Relatively Low,36.725828 -150070405001,15007040500,40500,10.234981,Very Low,13.997993 -150070405002,15007040500,40500,10.234981,Very Low,13.997993 -150010210101,15001021010,21010,21.537231,Relatively Moderate,59.488033 -150010210102,15001021010,21010,21.537231,Relatively Moderate,59.488033 -150010211011,15001021101,21101,19.434585,Relatively Low,53.392265 -150010211012,15001021101,21101,19.434585,Relatively Low,53.392265 +GEOID10,FEMA Risk Index Expected Annual Loss Score +050070403001,11.5 +050070403002,11.5 +050010201001,12.5 +050010201002,12.5 +150070405001,13.5 +150070405002,13.5 +150010210101,14.5 +150010210102,14.5 +150010211011,15.5 +150010211012,15.5 diff --git a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/transform.csv b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/transform.csv new file mode 100644 index 00000000..662cde19 --- /dev/null +++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/data/transform.csv @@ -0,0 +1,11 @@ +GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score +050070403001,05007040300,11.5 +050070403002,05007040300,11.5 +050010201001,05001020100,12.5 +050010201002,05001020100,12.5 +150070405001,15007040500,13.5 +150070405002,15007040500,13.5 +150010210101,15001021010,14.5 +150010210102,15001021010,14.5 +150010211011,15001021101,15.5 +150010211012,15001021101,15.5 diff --git a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py index 0cb5da2c..85110591 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/national_risk_index/test_etl.py @@ -73,13 +73,13 @@ class TestNationalRiskIndexETL: TRACT_COL = etl.GEOID_TRACT_FIELD_NAME BLOCK_COL = etl.GEOID_FIELD_NAME expected = pd.read_csv( - DATA_DIR / "output.csv", + DATA_DIR / "transform.csv", dtype={BLOCK_COL: "string", TRACT_COL: "string"}, ) # execution etl.transform() # validation - assert etl.df.shape == (10, 6) + assert etl.df.shape == (10, 3) assert etl.df.equals(expected) def test_load(self, mock_etl): @@ -90,21 +90,23 @@ class TestNationalRiskIndexETL: self.OUTPUT_DIR - The content of the file that's written matches the data in self.df """ - # setup + # setup - input variables etl = NationalRiskIndexETL() - output_path = etl.OUTPUT_DIR / "usa.csv" TRACT_COL = etl.GEOID_TRACT_FIELD_NAME BLOCK_COL = etl.GEOID_FIELD_NAME - expected = pd.read_csv( - DATA_DIR / "output.csv", - dtype={BLOCK_COL: str, TRACT_COL: str}, + output_path = etl.OUTPUT_DIR / "usa.csv" + # setup - mock transform step + df_transform = pd.read_csv( + DATA_DIR / "transform.csv", + dtype={BLOCK_COL: "string", TRACT_COL: "string"}, ) - etl.df = expected + etl.df = df_transform + # setup - load expected output + expected = pd.read_csv(DATA_DIR / "output.csv", dtype={BLOCK_COL: str}) # execution etl.load() - output = pd.read_csv( - output_path, dtype={BLOCK_COL: str, TRACT_COL: str} - ) + output = pd.read_csv(output_path, dtype={BLOCK_COL: str}) # validation assert output_path.exists() + assert output.shape == (10, 2) assert output.equals(expected) diff --git a/data/data-pipeline/poetry.lock b/data/data-pipeline/poetry.lock index f94fdd96..77cc834e 100644 --- a/data/data-pipeline/poetry.lock +++ b/data/data-pipeline/poetry.lock @@ -997,7 +997,7 @@ version = "3.0.9" description = "A Python library to read/write Excel 2010 xlsx/xlsm files" category = "dev" optional = false -python-versions = ">=3.6" +python-versions = ">=3.6," [package.dependencies] et-xmlfile = "*" @@ -2040,8 +2040,6 @@ lxml = [ {file = "lxml-4.6.3-cp27-cp27m-win_amd64.whl", hash = "sha256:8157dadbb09a34a6bd95a50690595e1fa0af1a99445e2744110e3dca7831c4ee"}, {file = "lxml-4.6.3-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:7728e05c35412ba36d3e9795ae8995e3c86958179c9770e65558ec3fdfd3724f"}, {file = "lxml-4.6.3-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:4bff24dfeea62f2e56f5bab929b4428ae6caba2d1eea0c2d6eb618e30a71e6d4"}, - {file = "lxml-4.6.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:64812391546a18896adaa86c77c59a4998f33c24788cadc35789e55b727a37f4"}, - {file = "lxml-4.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c1a40c06fd5ba37ad39caa0b3144eb3772e813b5fb5b084198a985431c2f1e8d"}, {file = "lxml-4.6.3-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:74f7d8d439b18fa4c385f3f5dfd11144bb87c1da034a466c5b5577d23a1d9b51"}, {file = "lxml-4.6.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:f90ba11136bfdd25cae3951af8da2e95121c9b9b93727b1b896e3fa105b2f586"}, {file = "lxml-4.6.3-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:4c61b3a0db43a1607d6264166b230438f85bfed02e8cff20c22e564d0faff354"}, @@ -2083,22 +2081,12 @@ lxml = [ {file = "lxml-4.6.3.tar.gz", hash = "sha256:39b78571b3b30645ac77b95f7c69d1bffc4cf8c3b157c435a34da72e78c82468"}, ] markupsafe = [ - {file = "MarkupSafe-2.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d8446c54dc28c01e5a2dbac5a25f071f6653e6e40f3a8818e8b45d790fe6ef53"}, - {file = "MarkupSafe-2.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:36bc903cbb393720fad60fc28c10de6acf10dc6cc883f3e24ee4012371399a38"}, - {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d7d807855b419fc2ed3e631034685db6079889a1f01d5d9dac950f764da3dad"}, - {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:add36cb2dbb8b736611303cd3bfcee00afd96471b09cda130da3581cbdc56a6d"}, - {file = "MarkupSafe-2.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:168cd0a3642de83558a5153c8bd34f175a9a6e7f6dc6384b9655d2697312a646"}, - {file = "MarkupSafe-2.0.1-cp310-cp310-win32.whl", hash = "sha256:99df47edb6bda1249d3e80fdabb1dab8c08ef3975f69aed437cb69d0a5de1e28"}, - {file = "MarkupSafe-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:e0f138900af21926a02425cf736db95be9f4af72ba1bb21453432a07f6082134"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:0446679737af14f45767963a1a9ef7620189912317d095f2d9ffa183a4d25d2b"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:f826e31d18b516f653fe296d967d700fddad5901ae07c622bb3705955e1faa94"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:fa130dd50c57d53368c9d59395cb5526eda596d3ffe36666cd81a44d56e48872"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:905fec760bd2fa1388bb5b489ee8ee5f7291d692638ea5f67982d968366bef9f"}, - {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf5d821ffabf0ef3533c39c518f3357b171a1651c1ff6827325e4489b0e46c3c"}, - {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0d4b31cc67ab36e3392bbf3862cfbadac3db12bdd8b02a2731f509ed5b829724"}, - {file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:baa1a4e8f868845af802979fcdbf0bb11f94f1cb7ced4c4b8a351bb60d108145"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-win32.whl", hash = "sha256:6c4ca60fa24e85fe25b912b01e62cb969d69a23a5d5867682dd3e80b5b02581d"}, {file = "MarkupSafe-2.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b2f4bf27480f5e5e8ce285a8c8fd176c0b03e93dcc6646477d4630e83440c6a9"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0717a7390a68be14b8c793ba258e075c6f4ca819f15edfc2a3a027c823718567"}, @@ -2107,21 +2095,14 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:d7f9850398e85aba693bb640262d3611788b1f29a79f0c93c565694658f4071f"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:6a7fae0dd14cf60ad5ff42baa2e95727c3d81ded453457771d02b7d2b3f9c0c2"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b7f2d075102dc8c794cbde1947378051c4e5180d52d276987b8d28a3bd58c17d"}, - {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9936f0b261d4df76ad22f8fee3ae83b60d7c3e871292cd42f40b81b70afae85"}, - {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2a7d351cbd8cfeb19ca00de495e224dea7e7d919659c2841bbb7f420ad03e2d6"}, - {file = "MarkupSafe-2.0.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:60bf42e36abfaf9aff1f50f52644b336d4f0a3fd6d8a60ca0d054ac9f713a864"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-win32.whl", hash = "sha256:a30e67a65b53ea0a5e62fe23682cfe22712e01f453b95233b25502f7c61cb415"}, {file = "MarkupSafe-2.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:611d1ad9a4288cf3e3c16014564df047fe08410e628f89805e475368bd304914"}, - {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5bb28c636d87e840583ee3adeb78172efc47c8b26127267f54a9c0ec251d41a9"}, {file = "MarkupSafe-2.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:be98f628055368795d818ebf93da628541e10b75b41c559fdf36d104c5787066"}, {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1d609f577dc6e1aa17d746f8bd3c31aa4d258f4070d61b2aa5c4166c1539de35"}, {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7d91275b0245b1da4d4cfa07e0faedd5b0812efc15b702576d103293e252af1b"}, {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:01a9b8ea66f1658938f65b93a85ebe8bc016e6769611be228d797c9d998dd298"}, {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:47ab1e7b91c098ab893b828deafa1203de86d0bc6ab587b160f78fe6c4011f75"}, {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:97383d78eb34da7e1fa37dd273c20ad4320929af65d156e35a5e2d89566d9dfb"}, - {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fcf051089389abe060c9cd7caa212c707e58153afa2c649f00346ce6d260f1b"}, - {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5855f8438a7d1d458206a2466bf82b0f104a3724bf96a1c781ab731e4201731a"}, - {file = "MarkupSafe-2.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3dd007d54ee88b46be476e293f48c85048603f5f516008bee124ddd891398ed6"}, {file = "MarkupSafe-2.0.1-cp38-cp38-win32.whl", hash = "sha256:023cb26ec21ece8dc3907c0e8320058b2e0cb3c55cf9564da612bc325bed5e64"}, {file = "MarkupSafe-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:984d76483eb32f1bcb536dc27e4ad56bba4baa70be32fa87152832cdd9db0833"}, {file = "MarkupSafe-2.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2ef54abee730b502252bcdf31b10dacb0a416229b72c18b19e24a4509f273d26"}, @@ -2131,9 +2112,6 @@ markupsafe = [ {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:4efca8f86c54b22348a5467704e3fec767b2db12fc39c6d963168ab1d3fc9135"}, {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:ab3ef638ace319fa26553db0624c4699e31a28bb2a835c5faca8f8acf6a5a902"}, {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f8ba0e8349a38d3001fae7eadded3f6606f0da5d748ee53cc1dab1d6527b9509"}, - {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c47adbc92fc1bb2b3274c4b3a43ae0e4573d9fbff4f54cd484555edbf030baf1"}, - {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:37205cac2a79194e3750b0af2a5720d95f786a55ce7df90c3af697bfa100eaac"}, - {file = "MarkupSafe-2.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1f2ade76b9903f39aa442b4aadd2177decb66525062db244b35d71d0ee8599b6"}, {file = "MarkupSafe-2.0.1-cp39-cp39-win32.whl", hash = "sha256:10f82115e21dc0dfec9ab5c0223652f7197feb168c940f3ef61563fc2d6beb74"}, {file = "MarkupSafe-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:693ce3f9e70a6cf7d2fb9e6c9d8b204b6b39897a2c4a1aa65728d5ac97dcc1d8"}, {file = "MarkupSafe-2.0.1.tar.gz", hash = "sha256:594c67807fb16238b30c44bdf74f36c02cdf22d1c8cda91ef8a0ed8dabf5620a"}, diff --git a/data/data-pipeline/requirements.txt b/data/data-pipeline/requirements.txt index c0a51849..a12d0449 100644 --- a/data/data-pipeline/requirements.txt +++ b/data/data-pipeline/requirements.txt @@ -1,8 +1,12 @@ appnope==0.1.2; sys_platform == "darwin" and python_version >= "3.7" and platform_system == "Darwin" argcomplete==1.12.3; python_version < "3.8.0" and python_version >= "3.7" argon2-cffi==21.1.0; python_version >= "3.6" +astroid==2.8.0; python_version >= "3.6" and python_version < "4.0" +atomicwrites==1.4.0; python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "win32" or sys_platform == "win32" and python_version >= "3.6" and python_full_version >= "3.4.0" attrs==21.2.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6" backcall==0.2.0; python_version >= "3.7" +backports.entry-points-selectable==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7" +black==21.9b0; python_full_version >= "3.6.2" bleach==4.1.0; python_version >= "3.7" censusdata==1.15; python_version >= "2.7" certifi==2021.5.30; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.7" @@ -11,21 +15,29 @@ charset-normalizer==2.0.6; python_full_version >= "3.6.0" and python_version >= click-plugins==1.1.1; python_version >= "3.6" click==8.0.1; python_version >= "3.6" cligj==0.7.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version < "4" and python_version >= "3.6" -colorama==0.4.4; python_version >= "3.7" and python_full_version < "3.0.0" and platform_system == "Windows" and sys_platform == "win32" or platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.5.0" and sys_platform == "win32" +colorama==0.4.4; platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.6.2" and sys_platform == "win32" and python_version < "4.0" and (python_version >= "3.6" and python_full_version < "3.0.0" and sys_platform == "win32" or sys_platform == "win32" and python_version >= "3.6" and python_full_version >= "3.5.0") and (python_version >= "3.7" and python_full_version < "3.0.0" and sys_platform == "win32" or sys_platform == "win32" and python_version >= "3.7" and python_full_version >= "3.5.0") +configparser==5.0.2; python_version >= "3.6" cycler==0.10.0; python_version >= "3.7" debugpy==1.4.3; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" decorator==5.1.0; python_version >= "3.7" defusedxml==0.7.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" +distlib==0.3.2; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" +dparse==0.5.1; python_version >= "3.5" dynaconf==3.1.7; python_version >= "3.7" entrypoints==0.3; python_full_version >= "3.6.1" and python_version >= "3.7" +et-xmlfile==1.1.0; python_version >= "3.6" +filelock==3.0.12; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" fiona==1.8.20; python_version >= "3.6" +flake8==3.9.2; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") geopandas==0.9.0; python_version >= "3.6" idna==3.2; python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.5" -importlib-metadata==4.8.1; python_version == "3.7" +importlib-metadata==4.8.1; python_version == "3.7" and (python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.5.0" and python_version < "3.8" and python_version >= "3.6") and python_full_version >= "3.6.2" +iniconfig==1.1.1; python_version >= "3.6" ipykernel==6.4.1; python_version >= "3.7" ipython-genutils==0.2.0; python_version >= "3.7" ipython==7.27.0; python_version >= "3.7" ipywidgets==7.6.5 +isort==5.9.3; python_full_version >= "3.6.1" and python_version < "4.0" and python_version >= "3.6" jedi==0.18.0; python_version >= "3.7" jellyfish==0.6.1 jinja2==3.0.1; python_version >= "3.7" @@ -42,35 +54,49 @@ jupyter==1.0.0 jupyterlab-pygments==0.1.2; python_version >= "3.7" jupyterlab-widgets==1.0.2; python_version >= "3.6" kiwisolver==1.3.2; python_version >= "3.7" +lazy-object-proxy==1.6.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.6" and python_version < "4.0" and python_full_version >= "3.6.0" +liccheck==0.6.2; python_version >= "2.7" lxml==4.6.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" markupsafe==2.0.1; python_version >= "3.7" matplotlib-inline==0.1.3; python_version >= "3.7" matplotlib==3.4.3; python_version >= "3.7" +mccabe==0.6.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.6" and python_version < "4.0" and python_full_version >= "3.5.0" mistune==0.8.4; python_version >= "3.7" munch==2.5.0; python_version >= "3.6" +mypy-extensions==0.4.3; python_full_version >= "3.6.2" and python_version >= "3.5" +mypy==0.910; python_version >= "3.5" nbclient==0.5.4; python_full_version >= "3.6.1" and python_version >= "3.7" nbconvert==6.1.0; python_version >= "3.7" nbformat==5.1.3; python_full_version >= "3.6.1" and python_version >= "3.7" nest-asyncio==1.5.1; python_full_version >= "3.6.1" and python_version >= "3.7" notebook==6.4.4; python_version >= "3.6" numpy==1.21.1; python_version >= "3.7" -packaging==21.0; python_version >= "3.7" +openpyxl==3.0.7; python_version >= "3.6" +packaging==21.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" pandas==1.3.3; python_full_version >= "3.7.1" pandocfilters==1.5.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.7" parso==0.8.2; python_version >= "3.7" +pathspec==0.9.0; python_full_version >= "3.6.2" pexpect==4.8.0; sys_platform != "win32" and python_version >= "3.7" pickleshare==0.7.5; python_version >= "3.7" pillow==8.3.2; python_version >= "3.7" +platformdirs==2.3.0; python_version >= "3.6" and python_full_version >= "3.6.2" and python_version < "4.0" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6") +pluggy==1.0.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6" prometheus-client==0.11.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" prompt-toolkit==3.0.20; python_full_version >= "3.6.2" and python_version >= "3.7" ptyprocess==0.7.0; sys_platform != "win32" and python_version >= "3.7" and os_name != "nt" -py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" and implementation_name == "pypy" or implementation_name == "pypy" and python_version >= "3.6" and python_full_version >= "3.4.0" +py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" and implementation_name == "pypy" or python_full_version >= "3.5.0" and python_version >= "3.6" and implementation_name == "pypy" +pycodestyle==2.7.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" +pyflakes==2.3.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" pygments==2.10.0; python_version >= "3.7" +pylint==2.11.1; python_version >= "3.6" and python_version < "4.0" pypandoc==1.6.4 pyparsing==2.4.7; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.7" pyproj==3.2.1; python_version >= "3.7" pyrsistent==0.18.0; python_version >= "3.6" +pytest-mock==3.6.1; python_version >= "3.6" +pytest==6.2.5; python_version >= "3.6" python-dateutil==2.8.2; python_full_version >= "3.7.1" and python_version >= "3.7" pytz==2021.1; python_full_version >= "3.7.1" and python_version >= "2.7" pywin32==301; sys_platform == "win32" and platform_python_implementation != "PyPy" and python_version >= "3.6" @@ -79,21 +105,29 @@ pyyaml==5.4.1; python_version >= "2.7" and python_full_version < "3.0.0" or pyth pyzmq==22.3.0; python_full_version >= "3.6.1" and python_version >= "3.7" qtconsole==5.1.1; python_version >= "3.6" qtpy==1.11.1; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.6" +regex==2021.8.28; python_full_version >= "3.6.2" requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0") +safety==1.10.3; python_version >= "3.5" +semantic-version==2.8.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "2.7" send2trash==1.8.0; python_version >= "3.6" shapely==1.7.1; python_version >= "3.6" -six==1.16.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.7" +six==1.16.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" terminado==0.12.1; python_version >= "3.6" testpath==0.5.0; python_version >= "3.7" +toml==0.10.2; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_full_version >= "3.5.0" and python_version >= "3.6" and python_version < "4.0" +tomli==1.2.1; python_version >= "3.6" and python_full_version >= "3.6.2" tornado==6.1; python_full_version >= "3.6.1" and python_version >= "3.7" +tox==3.24.4; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") tqdm==4.62.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.4.0") traitlets==5.1.0; python_full_version >= "3.6.1" and python_version >= "3.7" types-requests==2.25.8 typing-extensions==3.10.0.2; python_version < "3.8" and python_version >= "3.6" urllib3==1.26.6; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "2.7" us==2.0.2 +virtualenv==20.8.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" wcwidth==0.2.5; python_full_version >= "3.6.2" and python_version >= "3.7" webencodings==0.5.1; python_version >= "3.7" widgetsnbextension==3.5.1 +wrapt==1.12.1; python_version >= "3.6" and python_version < "4.0" xlsxwriter==2.0.0 zipp==3.5.0; python_version < "3.8" and python_version >= "3.6" diff --git a/data/data-pipeline/tox.ini b/data/data-pipeline/tox.ini index 50dbfc17..381139bb 100644 --- a/data/data-pipeline/tox.ini +++ b/data/data-pipeline/tox.ini @@ -1,7 +1,7 @@ [tox] # required because we use pyproject.toml isolated_build = true -envlist = py37, py38, py39, lint, checkdeps +envlist = py37, py38, py39, lint, checkdeps, pytest # only checks python versions installed locally skip_missing_interpreters = true @@ -18,3 +18,8 @@ commands = black data_pipeline deps = -rrequirements.txt commands = safety check liccheck + +[testenv:pytest] +# Run tests +deps = pytest +commands = pytest \ No newline at end of file diff --git a/infrastructure/functions/detect-changes-for-worker/package-lock.json b/infrastructure/functions/detect-changes-for-worker/package-lock.json index 237cf583..c47b8543 100644 --- a/infrastructure/functions/detect-changes-for-worker/package-lock.json +++ b/infrastructure/functions/detect-changes-for-worker/package-lock.json @@ -97,9 +97,9 @@ "dev": true }, "ansi-regex": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.0.tgz", - "integrity": "sha512-bY6fj56OUQ0hU1KjFNDQuJFezqKdrAyFdIevADiqrWHwSlbmBNMHp5ak2f40Pm8JTFyM2mqxkG6ngkHO11f/lg==", + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", + "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", "dev": true }, "ansi-styles": {