diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index 0036686a..14d5b570 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -359,6 +359,12 @@ fields: - score_name: Is there at least one abandoned mine in this census tract? label: Is there at least one abandoned mine in this census tract? format: bool +- score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False? + label: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False? + format: bool +- score_name: Is there at least one abandoned mine in this census tract, where missing data is treated as False? + label: Is there at least one abandoned mine in this census tract, where missing data is treated as False? + format: bool - score_name: There is at least one abandoned mine in this census tract and the tract is low income. label: There is at least one abandoned mine in this census tract and the tract is low income. format: bool diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index 62e657b1..18847687 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -369,6 +369,12 @@ sheets: - score_name: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. label: There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income. format: bool + - score_name: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False? + label: Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False? + format: bool + - score_name: Is there at least one abandoned mine in this census tract, where missing data is treated as False? + label: Is there at least one abandoned mine in this census tract, where missing data is treated as False? + format: bool - score_name: Tract-level redlining score meets or exceeds 3.25 and is low income label: Tract experienced historic underinvestment and remains low income format: bool diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 45415a57..d5573232 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -309,8 +309,10 @@ TILES_SCORE_COLUMNS = { field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME + field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS", field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET", - field_names.AML_BOOLEAN: "AML_ET", - field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET", + field_names.AML_BOOLEAN: "AML_RAW", + field_names.AML_BOOLEAN_FILLED_IN: "AML_ET", + field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_RAW", + field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME: "FUDS_ET", field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG", ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather ## FPL_200 (there is no higher ed in narwhal) diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index e8631258..663dc8d7 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -322,6 +322,8 @@ class ScoreETL(ExtractTransformLoad): # which are now deprecated. if not drop_tracts: # Create the "basic" percentile. + ## note: I believe this is less performant than if we made a bunch of these PFS columns + ## and then concatenated the list. For the refactor! df[ f"{output_column_name_root}" f"{field_names.PERCENTILE_FIELD_SUFFIX}" @@ -538,9 +540,12 @@ class ScoreETL(ExtractTransformLoad): df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric) - # coerce all booleans to bools + # coerce all booleans to bools preserving nan character + # since this is a boolean, need to use `None` for col in boolean_columns: - df_copy[col] = df_copy[col].astype(bool) + tmp = df_copy[col].copy() + df_copy[col] = np.where(tmp.notna(), tmp.astype(bool), None) + logger.info(f"{col} contains {df_copy[col].isna().sum()} nulls.") # Convert all columns to numeric and do math # Note that we have a few special conditions here and we handle them explicitly. diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index af29b11f..bf428750 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,At least one climate threshold exceeded,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,At least one housing threshold exceeded,Housing Factor (Definition N),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for wastewater discharge,Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,At least one water threshold exceeded,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?,Greater than or equal to the 90th percentile for unemployment and has low HS attainment?,At least one workforce threshold exceeded,Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition N),Total categories exceeded,Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition N community, including adjacency index tracts" -01073001100,True,True,True,True,,,False,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,0.0,False,0,True,0.8571428571428571,False,False,False -01073001400,True,True,True,True,,,False,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,False,False,True,False,False,True,False,False,True,False,False,True,True,False,True,False,True,True,True,False,True,True,True,True,False,True,True,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,6.0,True,1,True,1.0,True,True,True +GEOID10_TRACT,Persistent Poverty Census Tract,Does the tract have at least 35 acres in it?,Contains agricultural value,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Tract-level redlining score meets or exceeds 3.25,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,At least one climate threshold exceeded,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,At least one housing threshold exceeded,Housing Factor (Definition N),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,"Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?","Is there at least one abandoned mine in this census tract, where missing data is treated as False?",At least one pollution threshold exceeded,Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for wastewater discharge,Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,At least one water threshold exceeded,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS attainment?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS attainment?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS attainment?,Greater than or equal to the 90th percentile for unemployment and has low HS attainment?,At least one workforce threshold exceeded,Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition N),Total categories exceeded,Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition N community, including adjacency index tracts" +01073001100,True,True,True,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,58.143433,0.6445941476491375,70.3,,,False,,True,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,True,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,0.0,False,0,True,0.8571428571428571,False,False,False +01073001400,True,True,True,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,93.77919,0.6278134628440127,71.0,,,False,,True,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,False,False,True,False,False,True,False,False,True,False,False,True,True,False,True,False,True,True,True,False,True,True,True,True,False,True,True,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,6.0,True,1,True,1.0,True,True,True diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index a91ff8ca..675cf7e5 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index 5ed5203d..aaec36b6 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index 1d34250f..9defffce 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 4baa3447..613e7c18 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 2029fba2..3a721f60 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -355,9 +355,12 @@ TRANSPORTATION_COSTS = "Transportation Costs" # eAMLIS and FUDS variables AML_BOOLEAN = "Is there at least one abandoned mine in this census tract?" +AML_BOOLEAN_FILLED_IN = "Is there at least one abandoned mine in this census tract, where missing data is treated as False?" + ELIGIBLE_FUDS_BINARY_FIELD_NAME = ( "Is there at least one Formerly Used Defense Site (FUDS) in the tract?" ) +ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME = "Is there at least one Formerly Used Defense Site (FUDS) in the tract, where missing data is treated as False?" ##### # Names for individual factors being exceeded diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py index f76fff8b..5fb2923c 100644 --- a/data/data-pipeline/data_pipeline/score/score_narwhal.py +++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py @@ -488,13 +488,21 @@ class ScoreNarwhal(Score): >= self.ENVIRONMENTAL_BURDEN_THRESHOLD ) + self.df[field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME] = self.df[ + field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME + ].fillna(False) + + self.df[field_names.AML_BOOLEAN_FILLED_IN] = self.df[ + field_names.AML_BOOLEAN + ].fillna(False) + self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = self.df[ [ field_names.RMP_PCTILE_THRESHOLD, field_names.NPL_PCTILE_THRESHOLD, field_names.TSDF_PCTILE_THRESHOLD, - field_names.AML_BOOLEAN, - field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME, + field_names.AML_BOOLEAN_FILLED_IN, + field_names.ELIGIBLE_FUDS_FILLED_IN_FIELD_NAME, ] ].any(axis="columns") @@ -513,7 +521,7 @@ class ScoreNarwhal(Score): ) self.df[field_names.AML_LOW_INCOME_FIELD] = ( - self.df[field_names.AML_BOOLEAN] + self.df[field_names.AML_BOOLEAN_FILLED_IN] & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] )