diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md index 0f266bdc..716f8ce9 100644 --- a/data/data-pipeline/README.md +++ b/data/data-pipeline/README.md @@ -318,7 +318,7 @@ data_path = Path.cwd() # score data expected score_csv_path = data_path / "data_pipeline" / "data" / "score" / "csv" / "full" / "usa.csv" -score_initial_df = pd.read_csv(score_csv_path, dtype={"GEOID10": "string"}, low_memory=False)[:2] +score_initial_df = pd.read_csv(score_csv_path, dtype={"GEOID10_TRACT": "string"}, low_memory=False)[:2] score_initial_df.to_csv(data_path / "data_pipeline" / "etl" / "score" / "tests" / "sample_data" /"score_data_initial.csv", index=False) ``` diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 4f8a6d01..11a29b11 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -59,7 +59,7 @@ SCORE_DOWNLOADABLE_ZIP_FILE_PATH = ( # Column subsets CENSUS_COUNTIES_COLUMNS = ["USPS", "GEOID", "NAME"] TILES_SCORE_COLUMNS = [ - "GEOID10", + "GEOID10_TRACT", "State Name", "County Name", "Total population", @@ -155,7 +155,7 @@ DOWNLOADABLE_SCORE_INDICATOR_COLUMNS_FULL = list( # Finally we augment with the GEOID10, county, and state DOWNLOADABLE_SCORE_COLUMNS = [ - "GEOID10", + "GEOID10_TRACT", "County Name", "State Name", "Score G (communities)", diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py index 66c9e4d7..075dd072 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py @@ -23,7 +23,7 @@ class PostScoreETL(ExtractTransformLoad): self.input_counties_df: pd.DataFrame self.input_states_df: pd.DataFrame self.input_score_df: pd.DataFrame - self.input_national_cbg_df: pd.DataFrame + self.input_national_tract_df: pd.DataFrame self.output_score_county_state_merged_df: pd.DataFrame self.output_score_tiles_df: pd.DataFrame @@ -50,7 +50,9 @@ class PostScoreETL(ExtractTransformLoad): def _extract_score(self, score_path: Path) -> pd.DataFrame: logger.info("Reading Score CSV") - df = pd.read_csv(score_path, dtype={"GEOID10": "string"}) + df = pd.read_csv( + score_path, dtype={self.GEOID_TRACT_FIELD_NAME: "string"} + ) # Convert total population to an int: df["Total population"] = df["Total population"].astype( @@ -59,12 +61,14 @@ class PostScoreETL(ExtractTransformLoad): return df - def _extract_national_cbg(self, national_cbg_path: Path) -> pd.DataFrame: - logger.info("Reading national CBG") + def _extract_national_tract( + self, national_tract_path: Path + ) -> pd.DataFrame: + logger.info("Reading national tract file") return pd.read_csv( - national_cbg_path, - names=["GEOID10"], - dtype={"GEOID10": "string"}, + national_tract_path, + names=[self.GEOID_TRACT_FIELD_NAME], + dtype={self.GEOID_TRACT_FIELD_NAME: "string"}, low_memory=False, header=None, ) @@ -91,7 +95,7 @@ class PostScoreETL(ExtractTransformLoad): self.input_score_df = self._extract_score( constants.DATA_SCORE_CSV_FULL_FILE_PATH ) - self.input_national_cbg_df = self._extract_national_cbg( + self.input_national_tract_df = self._extract_national_tract( constants.DATA_CENSUS_CSV_FILE_PATH ) @@ -130,21 +134,22 @@ class PostScoreETL(ExtractTransformLoad): def _transform_score(self, initial_score_df: pd.DataFrame) -> pd.DataFrame: """ - Necessary modifications to the score dataframe + Add the GEOID field to the score dataframe to do the merge with counties """ - # Add the tract level column - new_df = initial_score_df.copy() - new_df["GEOID"] = initial_score_df.GEOID10.str[:5] - return new_df + # add GEOID column for counties + initial_score_df["GEOID"] = initial_score_df[ + self.GEOID_TRACT_FIELD_NAME + ].str[:5] + + return initial_score_df def _create_score_data( self, - national_cbg_df: pd.DataFrame, + national_tract_df: pd.DataFrame, counties_df: pd.DataFrame, states_df: pd.DataFrame, score_df: pd.DataFrame, ) -> pd.DataFrame: - # merge state with counties logger.info("Merging state with county info") county_state_merged = counties_df.merge( @@ -153,15 +158,19 @@ class PostScoreETL(ExtractTransformLoad): # merge state + county with score score_county_state_merged = score_df.merge( - county_state_merged, on="GEOID", how="left" + county_state_merged, + on="GEOID", # GEOID is the county ID + how="left", ) - # check if there are census cbgs without score - logger.info("Removing CBG rows without score") + # check if there are census tracts without score + logger.info("Removing tract rows without score") - # merge census cbgs with score - merged_df = national_cbg_df.merge( - score_county_state_merged, on="GEOID10", how="left" + # merge census tracts with score + merged_df = national_tract_df.merge( + score_county_state_merged, + on=self.GEOID_TRACT_FIELD_NAME, + how="left", ) # recast population to integer @@ -169,14 +178,14 @@ class PostScoreETL(ExtractTransformLoad): merged_df["Total population"].fillna(0.0).astype(int) ) - # list the null score cbgs - null_cbg_df = merged_df[merged_df["Score E (percentile)"].isnull()] + # list the null score tracts + null_tract_df = merged_df[merged_df["Score E (percentile)"].isnull()] # subtract data sets # this follows the XOR pattern outlined here: # https://stackoverflow.com/a/37313953 de_duplicated_df = pd.concat( - [merged_df, null_cbg_df, null_cbg_df] + [merged_df, null_tract_df, null_tract_df] ).drop_duplicates(keep=False) # set the score to the new df @@ -212,7 +221,7 @@ class PostScoreETL(ExtractTransformLoad): transformed_score = self._transform_score(self.input_score_df) output_score_county_state_merged_df = self._create_score_data( - self.input_national_cbg_df, + self.input_national_tract_df, transformed_counties, transformed_states, transformed_score, diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py b/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py index 63081c29..b7554271 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py +++ b/data/data-pipeline/data_pipeline/etl/score/tests/conftest.py @@ -75,11 +75,9 @@ def score_pdf_initial(sample_data_dir): def counties_transformed_expected(): return pd.DataFrame.from_dict( data={ - "State Abbreviation": pd.Series(["AL", "AL"], dtype="string"), - "GEOID": pd.Series(["01001", "01003"], dtype="string"), - "County Name": pd.Series( - ["AutaugaCounty", "BaldwinCounty"], dtype="object" - ), + "State Abbreviation": pd.Series(["AL"], dtype="string"), + "GEOID": pd.Series(["01073"], dtype="string"), + "County Name": pd.Series(["Jefferson County"], dtype="object"), }, ) @@ -105,11 +103,11 @@ def score_transformed_expected(): @pytest.fixture() -def national_cbg_df(): +def national_tract_df(): return pd.DataFrame.from_dict( data={ - "GEOID10": pd.Series( - ["010010201001", "010010201002"], dtype="string" + "GEOID10_TRACT": pd.Series( + ["01073001100", "01073001400"], dtype="string" ), }, ) diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/county_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/county_data_initial.csv index 05463b44..0cd54c4e 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/county_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/county_data_initial.csv @@ -1,3 +1,2 @@ USPS GEOID ANSICODE NAME POP10 HU10 ALAND AWATER ALAND_SQMI AWATER_SQMI INTPTLAT INTPTLONG -AL 01001 00161526 AutaugaCounty 54571 22135 1539582278 25775735 594.436 9.952 32.536382 -86.644490 -AL 01003 00161527 BaldwinCounty 182265 104061 4117521611 1133190229 1589.784 437.527 30.659218 -87.746067 \ No newline at end of file +AL 01073 00161562 Jefferson County 658466 300552 2878192209 32474487 1111.276 12.538 33.553444 -86.896536 diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index 1e7af807..d04ba002 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10,Persistent Poverty Census Tract,Housing burden (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged >=18 years,Coronary heart disease among adults aged >=18 years,Cancer (excluding skin cancer) among adults aged >=18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged >=18 years,Physical health not good for >=14 days among adults aged >=18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals < 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income (% of AMI),Median household income in the past 12 months,Life expectancy (years),Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter,Particulate matter (PM2.5),Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to TSDF sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployed civilians (percent),Housing + Transportation Costs % Income for the Regional Typical Household,Housing burden (percent) (percentile),Housing burden (percent) (min-max normalized),Total population (percentile),Total population (min-max normalized),Median household income (% of state median household income) (percentile),Median household income (% of state median household income) (min-max normalized),Current asthma among adults aged >=18 years (percentile),Current asthma among adults aged >=18 years (min-max normalized),Coronary heart disease among adults aged >=18 years (percentile),Coronary heart disease among adults aged >=18 years (min-max normalized),Cancer (excluding skin cancer) among adults aged >=18 years (percentile),Cancer (excluding skin cancer) among adults aged >=18 years (min-max normalized),Current lack of health insurance among adults aged 18-64 years (percentile),Current lack of health insurance among adults aged 18-64 years (min-max normalized),Diagnosed diabetes among adults aged >=18 years (percentile),Diagnosed diabetes among adults aged >=18 years (min-max normalized),Physical health not good for >=14 days among adults aged >=18 years (percentile),Physical health not good for >=14 days among adults aged >=18 years (min-max normalized),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 100% Federal Poverty Line (min-max normalized),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (min-max normalized),Percent of individuals < 200% Federal Poverty Line (percentile),Percent of individuals < 200% Federal Poverty Line (min-max normalized),Area Median Income (State or metropolitan) (percentile),Area Median Income (State or metropolitan) (min-max normalized),Median household income (% of AMI) (percentile),Median household income (% of AMI) (min-max normalized),Median household income in the past 12 months (percentile),Median household income in the past 12 months (min-max normalized),Life expectancy (years) (percentile),Life expectancy (years) (min-max normalized),Energy burden (percentile),Energy burden (min-max normalized),FEMA Risk Index Expected Annual Loss Score (percentile),FEMA Risk Index Expected Annual Loss Score (min-max normalized),Urban Heuristic Flag (percentile),Urban Heuristic Flag (min-max normalized),Air toxics cancer risk (percentile),Air toxics cancer risk (min-max normalized),Respiratory hazard index (percentile),Respiratory hazard index (min-max normalized),Diesel particulate matter (percentile),Diesel particulate matter (min-max normalized),Particulate matter (PM2.5) (percentile),Particulate matter (PM2.5) (min-max normalized),Ozone (percentile),Ozone (min-max normalized),Traffic proximity and volume (percentile),Traffic proximity and volume (min-max normalized),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to Risk Management Plan (RMP) facilities (min-max normalized),Proximity to TSDF sites (percentile),Proximity to TSDF sites (min-max normalized),Proximity to NPL sites (percentile),Proximity to NPL sites (min-max normalized),Wastewater discharge (percentile),Wastewater discharge (min-max normalized),Percent pre-1960s housing (lead paint indicator) (percentile),Percent pre-1960s housing (lead paint indicator) (min-max normalized),Individuals under 5 years old (percentile),Individuals under 5 years old (min-max normalized),Individuals over 64 years old (percentile),Individuals over 64 years old (min-max normalized),Linguistic isolation (percent) (percentile),Linguistic isolation (percent) (min-max normalized),Percent of households in linguistic isolation (percentile),Percent of households in linguistic isolation (min-max normalized),Poverty (Less than 200% of federal poverty line) (percentile),Poverty (Less than 200% of federal poverty line) (min-max normalized),Percent individuals age 25 or over with less than high school degree (percentile),Percent individuals age 25 or over with less than high school degree (min-max normalized),Unemployed civilians (percent) (percentile),Unemployed civilians (percent) (min-max normalized),Housing + Transportation Costs % Income for the Regional Typical Household (percentile),Housing + Transportation Costs % Income for the Regional Typical Household (min-max normalized),Score A,Score B,Socioeconomic Factors,Sensitive populations,Environmental effects,Exposures,Pollution Burden,Population Characteristics,Score C,Score D,Score E,"Low AMI, Low HS graduation",Meets socioeconomic criteria,Meets burden criteria,Score F (communities),Score G (communities),Score G,Score G (percentile),Score H (communities),Score H,Score I (communities),Score I,Score I (percentile),NMTC (communities),Score K (communities),Climate Factor (Definition L),Energy Factor (Definition L),Transportation Factor (Definition L),Housing Factor (Definition L),Pollution Factor (Definition L),Water Factor (Definition L),Health Factor (Definition L),Workforce Factor (Definition L),Definition L (communities),Any Non-Workforce Factor (Definition L),Definition L (percentile),Score A (percentile),Score A (top 25th percentile),Score A (top 30th percentile),Score A (top 35th percentile),Score A (top 40th percentile),Score B (percentile),Score B (top 25th percentile),Score B (top 30th percentile),Score B (top 35th percentile),Score B (top 40th percentile),Score C (percentile),Score C (top 25th percentile),Score C (top 30th percentile),Score C (top 35th percentile),Score C (top 40th percentile),Score D (percentile),Score D (top 25th percentile),Score D (top 30th percentile),Score D (top 35th percentile),Score D (top 40th percentile),Score E (percentile),Score E (top 25th percentile),Score E (top 30th percentile),Score E (top 35th percentile),Score E (top 40th percentile) -010010201001,False,0.15,692.0,0.7064864650941903,9.9,6.7,6.7,12.3,10.9,12.8,0.1863013698630137,0.3082191780821918,0.3821917808219178,52649.0,0.6781325381298791,35703.0,73.1,0.03,18.03609814156384,1.0,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,91.0159000855,0.0852006888915,0.0655778245369,0.0709415490545,0.0,0.29,0.0491329479769,0.0953757225434,0.0,0.04,0.293352601156,0.195011337868,0.028125,55.0,0.1569620830388289,0.109090909090909,0.1208920134523652,0.0133405305367057,0.2540730285250378,0.1378200915276168,0.5476983764902142,0.2960526315789473,0.5368504002322592,0.1746478873239436,0.4917349849538477,0.305,0.4465204908778381,0.1549520766773163,0.5253319139719538,0.2404761904761904,0.4797764044995599,0.2419354838709677,0.7110645998610756,0.1863013698630137,0.7030604504076335,0.3082191780821918,0.634843527218221,0.3821917808219178,0.1191862722261045,0.1659221213569039,0.2346087750128644,0.1371656757821119,0.1572642318672191,0.1341564916647138,0.0996933512753685,0.407766990291262,0.4633221856296583,0.0555555555555555,0.4144230149893722,0.1803609814156384,0.5963434959162439,1.0,0.9797143208291796,0.0288536971670882,0.9829416396964772,0.1827788608752678,0.3462721963520827,0.0458595919015693,0.9086451463612172,0.5883290826337872,0.2841490223302094,0.3121515260630353,0.3410837232734089,0.002422213277071,0.1348050450908397,0.0046212521643362,0.1346098859453645,0.0001541621476145,0.5500810137382961,0.0078930142119797,0.1823870900231575,0.0,0.5188510118774764,0.29,0.4494787435381899,0.0943352601157083,0.2532099140845901,0.0953757225434,0.2596066814778244,0.0,0.7027453899325112,0.04,0.4660650016111975,0.293352601156,0.7623733167523703,0.195011337868,0.3628393561824028,0.028125,0.5794871072813119,0.2711864406779661,0.6142191591817839,0.3553155211005275,0.5747020343519587,0.3207651130335348,0.3041468093350269,0.640467674807096,0.5283607196497396,0.4477335736927467,0.2365648332076493,0.1251159696229818,0.4015692878125249,False,False,True,False,True,1,1,True,1,True,1,1,True,True,False,True,False,False,False,False,True,False,True,True,1,0.6357808408182161,False,False,False,True,0.6315486105122701,False,False,False,True,0.5104500914524833,False,False,False,False,0.4426779344086705,False,False,False,False,0.351716031116396,False,False,False,False -010010201002,False,0.15,1153.0,1.5632420452746556,9.9,6.7,6.7,12.3,10.9,12.8,0.1551860649247822,0.1955661124307205,0.2129849564528899,52649.0,1.5005033333966458,79000.0,73.1,0.03,18.03609814156384,1.0,49.3770316066,0.788051737456,0.2786630687,9.99813169399,40.1217287582,2.61874365577,0.0737963352265,0.0604962870646,0.0643436665275,0.0,0.094623655914,0.0416305290546,0.150043365134,0.0,0.0,0.182133564614,0.039119804401,0.0287878787878787,57.0,0.1569620830388289,0.109090909090909,0.4287510268548061,0.0222277914867365,0.8522662275360714,0.3122290170974229,0.5476983764902142,0.2960526315789473,0.5368504002322592,0.1746478873239436,0.4917349849538477,0.305,0.4465204908778381,0.1549520766773163,0.5253319139719538,0.2404761904761904,0.4797764044995599,0.2419354838709677,0.645061784813366,0.1551860649247822,0.5006443534530033,0.1955661124307205,0.349989032281651,0.2129849564528899,0.1191862722261045,0.1659221213569039,0.8592240550695651,0.3105808262489309,0.7098233128945732,0.3090924517781674,0.0996933512753685,0.407766990291262,0.4633221856296583,0.0555555555555555,0.4144230149893722,0.1803609814156384,0.5963434959162439,1.0,0.9797143208291796,0.0288536971670882,0.9829416396964772,0.1827788608752678,0.3462721963520827,0.0458595919015693,0.9086451463612172,0.5883290826337872,0.2841490223302094,0.3121515260630353,0.0963450776778784,6.96928300032502e-05,0.1100470651241529,0.0040026844656131,0.1228504127842856,0.0001422163300237,0.5178479846414291,0.0071589284575994,0.1823870900231575,0.0,0.2827016379752465,0.094623655914,0.3660890561105236,0.0799306157848831,0.5188963977252613,0.150043365134,0.2596066814778244,0.0,0.2559217184897405,0.0,0.2701365660159849,0.182133564614,0.2207635715031339,0.039119804401,0.3696173450745396,0.0287878787878787,0.6379947997334159,0.2824858757062147,0.2454500687595593,0.0596363131072809,0.350886800163363,0.381530711771203,0.2431668381096544,0.5996779005411742,0.4808408797306676,0.366208755967283,0.1760881403843817,0.0718264313787575,0.2554172494220624,False,False,True,False,False,0,0,False,0,False,0,0,False,False,False,False,False,False,False,False,False,False,False,False,0,0.2110260378608742,False,False,False,False,0.2509565067420677,False,False,False,False,0.2850458170133389,False,False,False,False,0.1623898263545344,False,False,False,False,0.1105594234208065,False,False,False,False +GEOID10_TRACT,Persistent Poverty Census Tract,Housing burden (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged >=18 years,Coronary heart disease among adults aged >=18 years,Cancer (excluding skin cancer) among adults aged >=18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged >=18 years,Physical health not good for >=14 days among adults aged >=18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals < 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income (% of AMI),Median household income in the past 12 months,Life expectancy (years),Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter,Particulate matter (PM2.5),Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to TSDF sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployed civilians (percent),Housing + Transportation Costs % Income for the Regional Typical Household,Median value ($) of owner-occupied housing units,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Housing burden (percent) (percentile),Housing burden (percent) (min-max normalized),Total population (percentile),Total population (min-max normalized),Median household income (% of state median household income) (percentile),Median household income (% of state median household income) (min-max normalized),Current asthma among adults aged >=18 years (percentile),Current asthma among adults aged >=18 years (min-max normalized),Coronary heart disease among adults aged >=18 years (percentile),Coronary heart disease among adults aged >=18 years (min-max normalized),Cancer (excluding skin cancer) among adults aged >=18 years (percentile),Cancer (excluding skin cancer) among adults aged >=18 years (min-max normalized),Current lack of health insurance among adults aged 18-64 years (percentile),Current lack of health insurance among adults aged 18-64 years (min-max normalized),Diagnosed diabetes among adults aged >=18 years (percentile),Diagnosed diabetes among adults aged >=18 years (min-max normalized),Physical health not good for >=14 days among adults aged >=18 years (percentile),Physical health not good for >=14 days among adults aged >=18 years (min-max normalized),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 100% Federal Poverty Line (min-max normalized),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (min-max normalized),Percent of individuals < 200% Federal Poverty Line (percentile),Percent of individuals < 200% Federal Poverty Line (min-max normalized),Area Median Income (State or metropolitan) (percentile),Area Median Income (State or metropolitan) (min-max normalized),Median household income (% of AMI) (percentile),Median household income (% of AMI) (min-max normalized),Median household income in the past 12 months (percentile),Median household income in the past 12 months (min-max normalized),Life expectancy (years) (percentile),Life expectancy (years) (min-max normalized),Energy burden (percentile),Energy burden (min-max normalized),FEMA Risk Index Expected Annual Loss Score (percentile),FEMA Risk Index Expected Annual Loss Score (min-max normalized),Urban Heuristic Flag (percentile),Urban Heuristic Flag (min-max normalized),Air toxics cancer risk (percentile),Air toxics cancer risk (min-max normalized),Respiratory hazard index (percentile),Respiratory hazard index (min-max normalized),Diesel particulate matter (percentile),Diesel particulate matter (min-max normalized),Particulate matter (PM2.5) (percentile),Particulate matter (PM2.5) (min-max normalized),Ozone (percentile),Ozone (min-max normalized),Traffic proximity and volume (percentile),Traffic proximity and volume (min-max normalized),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to Risk Management Plan (RMP) facilities (min-max normalized),Proximity to TSDF sites (percentile),Proximity to TSDF sites (min-max normalized),Proximity to NPL sites (percentile),Proximity to NPL sites (min-max normalized),Wastewater discharge (percentile),Wastewater discharge (min-max normalized),Percent pre-1960s housing (lead paint indicator) (percentile),Percent pre-1960s housing (lead paint indicator) (min-max normalized),Individuals under 5 years old (percentile),Individuals under 5 years old (min-max normalized),Individuals over 64 years old (percentile),Individuals over 64 years old (min-max normalized),Linguistic isolation (percent) (percentile),Linguistic isolation (percent) (min-max normalized),Percent of households in linguistic isolation (percentile),Percent of households in linguistic isolation (min-max normalized),Poverty (Less than 200% of federal poverty line) (percentile),Poverty (Less than 200% of federal poverty line) (min-max normalized),Percent individuals age 25 or over with less than high school degree (percentile),Percent individuals age 25 or over with less than high school degree (min-max normalized),Unemployed civilians (percent) (percentile),Unemployed civilians (percent) (min-max normalized),Housing + Transportation Costs % Income for the Regional Typical Household (percentile),Housing + Transportation Costs % Income for the Regional Typical Household (min-max normalized),Median value ($) of owner-occupied housing units (percentile),Median value ($) of owner-occupied housing units (min-max normalized),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected building loss rate (Natural Hazards Risk Index) (min-max normalized),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (min-max normalized),Expected population loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (min-max normalized),Score A,Score B,Socioeconomic Factors,Sensitive populations,Environmental effects,Exposures,Pollution Burden,Population Characteristics,Score C,Score D,Score E,"Low AMI, Low HS graduation",Meets socioeconomic criteria,Meets burden criteria,Score F (communities),Score G (communities),Score G,Score G (percentile),Score H (communities),Score H,Score I (communities),Score I,Score I (percentile),NMTC (communities),Score K (communities),Climate Factor (Definition L),Energy Factor (Definition L),Transportation Factor (Definition L),Housing Factor (Definition L),Pollution Factor (Definition L),Water Factor (Definition L),Health Factor (Definition L),Workforce Factor (Definition L),Definition L (communities),Any Non-Workforce Factor (Definition L),Definition L (percentile),Score A (percentile),Score A (top 25th percentile),Score A (top 30th percentile),Score A (top 35th percentile),Score A (top 40th percentile),Score B (percentile),Score B (top 25th percentile),Score B (top 30th percentile),Score B (top 35th percentile),Score B (top 40th percentile),Score C (percentile),Score C (top 25th percentile),Score C (top 30th percentile),Score C (top 35th percentile),Score C (top 40th percentile),Score D (percentile),Score D (top 25th percentile),Score D (top 30th percentile),Score D (top 35th percentile),Score D (top 40th percentile),Score E (percentile),Score E (top 25th percentile),Score E (top 30th percentile),Score E (top 35th percentile),Score E (top 40th percentile) +01073001100,True,0.3555555555555555,4897.0,0.7327449738800064,11.6,8.0,6.6,15.3,18.5,15.6,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,0.6445941476491375,37030.0,70.3,0.05,37.016020762747445,1.0,51.1530304943,0.735568574566,0.63998588,10.3951975342,39.404630719,253.995131498,2.03499777769,0.708723792992,0.134193041308,1.0556674669,0.205868653936,0.0308352052277,0.185011231366,0.0,0.0,0.407205697528,0.0911016949153,0.0092071611253196,38.0,85500.0,0.0199399484122346,0.0100179814652433,0.0003727376748275,0.7759834149819756,0.2585858585858585,0.6522319452040524,0.0696873532467162,0.2579768630616088,0.1567394572745385,0.8778835393887099,0.4078947368421052,0.7543109840773348,0.2112676056338028,0.4831470520823502,0.3,0.5996548516266466,0.2028753993610224,0.9542880640681668,0.4214285714285714,0.7464709927060441,0.3172043010752688,0.6168263717723972,0.150375939849624,0.7284793525682426,0.318796992481203,0.6286362018742833,0.3744360902255639,0.2971496201383377,0.2232322025800286,0.1757741606170599,0.1364573208439614,0.159739042112665,0.1395180645004889,0.0288201562824969,0.3398058252427184,0.8503016535630522,0.1111111111111111,0.8804885569871426,0.3701602076274744,0.5990586969676596,1.0,0.9853145934585578,0.0300384933188293,0.9694864048338367,0.1696726613313424,0.7707211349008275,0.1053235274194042,0.9307172437981412,0.5130755332417333,0.2444726227893863,0.2834163202031902,0.4679683805148301,0.0081194347824311,0.9043675287131808,0.1192079731904979,0.437416985702008,0.0016327034632494,0.7570383102846576,0.0172289593763426,0.924320063066614,9.412822248180045e-06,0.4488762142875745,0.205868653936,0.1217933692736411,0.0793595720750642,0.6929577831601427,0.185011231366,0.1279192436010707,0.0,0.1287405266036409,0.0,0.665989530432065,0.407205697528,0.4562465817642003,0.0911016949153,0.0304587252654599,0.0092071611253196,0.0933347766081238,0.2372881355932203,0.1107833945360314,0.0379401628742081,0.7913088977138564,0.0640255252302748,0.6494845360824743,0.0687832451226217,0.8598534256706785,0.0093618689816499,0.5611180560981327,0.3038554467503745,0.274954028134698,0.3142234653449515,0.694403820410807,0.7281133967159299,0.716876871280889,0.2945887467398247,0.2111838590774037,0.1532200824308956,0.4113194992089544,False,True,True,True,True,1,1,True,1,True,1,1,True,True,False,False,False,False,False,False,False,False,False,False,0,0.5707151600385447,False,False,False,False,0.5743417454488632,False,False,False,False,0.420866651834208,False,False,False,False,0.5840820897465948,False,False,False,False,0.3836627861551683,False,False,False,False +01073001400,True,0.2601092896174863,1906.0,0.7136694633528574,11.0,9.4,7.2,18.4,20.4,16.6,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,0.6278134628440127,36066.0,71.0,0.07,47.948511946777465,1.0,54.6950518653,0.76056054321,0.9098084377,10.498270137,39.3676601307,3015.87969265,1.81382525188,3.24085850684,0.214095348703,0.365101735929,0.628839590444,0.0582371458552,0.178908709339,0.0245098039215686,0.0165289256198,0.425498426023,0.148840688108,0.1150121065375302,44.0,67800.0,0.0995560141720271,0.0108783755900761,0.0018653260326052,0.5663862883099391,0.1891703924490809,0.0873245305622835,0.0271235644860611,0.2380414312617702,0.1524802086740484,0.8045192865135969,0.3684210526315788,0.8943994283480067,0.2507042253521127,0.6245163203947635,0.33,0.7302382332717638,0.2523961661341853,0.9743093661943348,0.4666666666666666,0.8111121597389815,0.3440860215053763,0.8631193240981165,0.2816032887975334,0.7965704042494299,0.3679342240493319,0.7848791996731208,0.4835560123329907,0.2971496201383377,0.2232322025800286,0.1615528584392014,0.1326629858757091,0.1480323983644555,0.1356231464796244,0.0390929078646344,0.3567961165048544,0.9660879441830278,0.1666666666666666,0.9676082963519111,0.4794851194677746,0.5990586969676596,1.0,0.9898725863654276,0.0324014302096176,0.9773807960068304,0.1759137053158734,0.9050177328254302,0.1497290027189099,0.9346157477978174,0.5213510451938572,0.2429887631355303,0.2826912370192468,0.943152489696642,0.0964083008791804,0.8841706383311196,0.1062519253676046,0.761049040289606,0.0074660410167418,0.8677110190900331,0.027487565893985,0.8909234242304798,3.255416928678718e-06,0.8349740865171759,0.628839590444,0.5187970414355288,0.1498830619032175,0.6589707529234055,0.178908709339,0.5804662877639023,0.0245098039215686,0.5016472641091752,0.0165289256198,0.6940255749147068,0.425498426023,0.6771362346016616,0.148840688108,0.9015492846415304,0.1150121065375302,0.2296362947527941,0.288135593220339,0.050687553715134,0.0290456994515583,0.983216164001288,0.3197166420251072,0.6757017645468937,0.0746906926653988,0.9859745848181268,0.0468739884070143,0.6855809047581842,0.469949864514998,0.6007989306039736,0.5860780273742789,0.8477656416916828,0.8321713526379463,0.8373694489891917,0.5934384789891263,0.4969272521601087,0.1806062834078359,0.683912734046348,False,True,True,True,True,1,1,True,1,True,1,1,True,True,True,True,True,False,False,False,True,True,True,True,1,0.7019884365966091,False,True,True,True,0.7102898663958122,False,True,True,True,0.9565888835084873,True,True,True,True,0.6881136547126078,False,False,True,True,0.7675469437716488,True,True,True,True diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 2b3bab06..be501bd1 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index f8fb284d..1416ef23 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index 68ca05cf..a91b1464 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 198bdc2b..a0e40c2a 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py index 84d560e7..5735bb83 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py @@ -29,7 +29,7 @@ def test_extract_states(etl, state_data_initial): def test_extract_score(etl, score_data_initial): extracted = etl._extract_score(score_data_initial) - string_cols = ["GEOID10"] + string_cols = ["GEOID10_TRACT"] assert all(ptypes.is_string_dtype(extracted[col]) for col in string_cols) @@ -63,14 +63,14 @@ def test_transform_score(etl, score_data_initial, score_transformed_expected): # pylint: disable=too-many-arguments def test_create_score_data( etl, - national_cbg_df, + national_tract_df, counties_transformed_expected, states_transformed_expected, score_transformed_expected, score_data_expected, ): score_data_actual = etl._create_score_data( - national_cbg_df, + national_tract_df, counties_transformed_expected, states_transformed_expected, score_transformed_expected,