mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 13:41:17 -07:00
Removed unused files
This commit is contained in:
parent
8712ff53ec
commit
d298f7dedb
11 changed files with 0 additions and 1983 deletions
|
@ -1,19 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreA(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score A")
|
||||
self.df[field_names.SCORE_A] = self.df[
|
||||
[
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
]
|
||||
].mean(axis=1)
|
||||
return self.df
|
|
@ -1,21 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreB(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score B")
|
||||
self.df[field_names.SCORE_B] = (
|
||||
self.df[
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
* self.df[
|
||||
field_names.HIGH_SCHOOL_ED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
)
|
||||
return self.df
|
|
@ -1,102 +0,0 @@
|
|||
from collections import namedtuple
|
||||
|
||||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreC(Score):
|
||||
def __init__(self, df: pd.DataFrame) -> None:
|
||||
Bucket = namedtuple(typename="Bucket", field_names=["name", "fields"])
|
||||
|
||||
# Note: we use percentiles for every field below.
|
||||
# To do so, we add the percentile suffix to all the field names.
|
||||
self.BUCKET_SOCIOECONOMIC = Bucket(
|
||||
field_names.C_SOCIOECONOMIC,
|
||||
[
|
||||
field_names.HOUSEHOLDS_LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
],
|
||||
)
|
||||
self.BUCKET_SENSITIVE = Bucket(
|
||||
field_names.C_SENSITIVE,
|
||||
[
|
||||
field_names.UNDER_5_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.OVER_64_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
],
|
||||
)
|
||||
self.BUCKET_ENVIRONMENTAL = Bucket(
|
||||
field_names.C_ENVIRONMENTAL,
|
||||
[
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
],
|
||||
)
|
||||
self.BUCKET_EXPOSURES = Bucket(
|
||||
field_names.C_EXPOSURES,
|
||||
[
|
||||
field_names.AIR_TOXICS_CANCER_RISK_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.RESPIRATORY_HAZARD_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.OZONE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
],
|
||||
)
|
||||
self.BUCKETS = [
|
||||
self.BUCKET_SOCIOECONOMIC,
|
||||
self.BUCKET_SENSITIVE,
|
||||
self.BUCKET_ENVIRONMENTAL,
|
||||
self.BUCKET_EXPOSURES,
|
||||
]
|
||||
super().__init__(df)
|
||||
|
||||
# "CalEnviroScreen for the US" score
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score C")
|
||||
# Average all the percentile values in each bucket into a single score for each of the four buckets.
|
||||
for bucket in self.BUCKETS:
|
||||
self.df[bucket.name] = self.df[bucket.fields].mean(axis=1)
|
||||
|
||||
# Combine the score from the two Exposures and Environmental Effects buckets
|
||||
# into a single score called "Pollution Burden".
|
||||
# The math for this score is:
|
||||
# (1.0 * Exposures Score + 0.5 * Environment Effects score) / 1.5.
|
||||
self.df[field_names.AGGREGATION_POLLUTION_FIELD] = (
|
||||
1.0 * self.df[self.BUCKET_EXPOSURES.name]
|
||||
+ 0.5 * self.df[self.BUCKET_ENVIRONMENTAL.name]
|
||||
) / 1.5
|
||||
|
||||
# Average the score from the two Sensitive populations and
|
||||
# Socioeconomic factors buckets into a single score called
|
||||
# "Population Characteristics".
|
||||
self.df[field_names.AGGREGATION_POPULATION_FIELD] = self.df[
|
||||
[self.BUCKET_SENSITIVE.name, self.BUCKET_SOCIOECONOMIC.name]
|
||||
].mean(axis=1)
|
||||
|
||||
# Multiply the "Pollution Burden" score and the "Population Characteristics"
|
||||
# together to produce the cumulative impact score.
|
||||
self.df[field_names.SCORE_C] = (
|
||||
self.df[field_names.AGGREGATION_POLLUTION_FIELD]
|
||||
* self.df[field_names.AGGREGATION_POPULATION_FIELD]
|
||||
)
|
||||
return self.df
|
|
@ -1,34 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreD(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Scores D and E")
|
||||
fields_to_use_in_score = [
|
||||
field_names.UNEMPLOYMENT_FIELD,
|
||||
field_names.LINGUISTIC_ISO_FIELD,
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.POVERTY_FIELD,
|
||||
field_names.HIGH_SCHOOL_ED_FIELD,
|
||||
]
|
||||
|
||||
fields_min_max = [
|
||||
f"{field}{field_names.MIN_MAX_FIELD_SUFFIX}"
|
||||
for field in fields_to_use_in_score
|
||||
]
|
||||
fields_percentile = [
|
||||
f"{field}{field_names.PERCENTILE_FIELD_SUFFIX}"
|
||||
for field in fields_to_use_in_score
|
||||
]
|
||||
|
||||
# Calculate "Score D", which uses min-max normalization
|
||||
# and calculate "Score E", which uses percentile normalization for the same fields
|
||||
self.df[field_names.SCORE_D] = self.df[fields_min_max].mean(axis=1)
|
||||
self.df[field_names.SCORE_E] = self.df[fields_percentile].mean(axis=1)
|
||||
|
||||
return self.df
|
|
@ -1,97 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreF(Score):
|
||||
# TODO Make variables and constants clearer (meaning and type)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score F")
|
||||
ami_and_high_school_field = "Low AMI, Low HS graduation"
|
||||
meets_socio_field = "Meets socioeconomic criteria"
|
||||
meets_burden_field = "Meets burden criteria"
|
||||
|
||||
self.df[ami_and_high_school_field] = (
|
||||
self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD] < 0.80
|
||||
) & (self.df[field_names.HIGH_SCHOOL_ED_FIELD] > 0.2)
|
||||
|
||||
self.df[meets_socio_field] = (
|
||||
self.df[ami_and_high_school_field]
|
||||
| (self.df[field_names.POVERTY_FIELD] > 0.40)
|
||||
| (self.df[field_names.LINGUISTIC_ISO_FIELD] > 0.10)
|
||||
| (self.df[field_names.HIGH_SCHOOL_ED_FIELD] > 0.4)
|
||||
)
|
||||
|
||||
self.df[meets_burden_field] = (
|
||||
(
|
||||
self.df[
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.RESPIRATORY_HAZARD_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.TRAFFIC_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.ASTHMA_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.CANCER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
| (
|
||||
self.df[
|
||||
field_names.DIABETES_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
> 0.9
|
||||
)
|
||||
)
|
||||
|
||||
self.df[field_names.SCORE_F_COMMUNITIES] = (
|
||||
self.df[meets_socio_field] & self.df[meets_burden_field]
|
||||
)
|
||||
|
||||
return self.df
|
|
@ -1,34 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreG(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score G")
|
||||
|
||||
high_school_cutoff_threshold = 0.05
|
||||
|
||||
# Score G is now modified NMTC
|
||||
self.df[field_names.SCORE_G_COMMUNITIES] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.8)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
) | (
|
||||
(self.df[field_names.POVERTY_LESS_THAN_100_FPL_FIELD] > 0.20)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
)
|
||||
self.df[field_names.SCORE_G] = self.df[
|
||||
field_names.SCORE_G_COMMUNITIES
|
||||
].astype(int)
|
||||
self.df["Score G (percentile)"] = self.df[field_names.SCORE_G]
|
||||
|
||||
return self.df
|
|
@ -1,32 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreH(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score H")
|
||||
|
||||
high_school_cutoff_threshold = 0.06
|
||||
|
||||
self.df[field_names.SCORE_H_COMMUNITIES] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.8)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
) | (
|
||||
(self.df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD] > 0.40)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
)
|
||||
self.df[field_names.SCORE_H] = self.df[
|
||||
field_names.SCORE_H_COMMUNITIES
|
||||
].astype(int)
|
||||
|
||||
return self.df
|
|
@ -1,33 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreI(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score I")
|
||||
|
||||
high_school_cutoff_threshold = 0.05
|
||||
|
||||
self.df[field_names.SCORE_I_COMMUNITIES] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.7)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
) | (
|
||||
(self.df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD] > 0.50)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
)
|
||||
self.df[field_names.SCORE_I] = self.df[
|
||||
field_names.SCORE_I_COMMUNITIES
|
||||
].astype(int)
|
||||
self.df["Score I (percentile)"] = self.df[field_names.SCORE_I]
|
||||
|
||||
return self.df
|
|
@ -1,33 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreK(Score):
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score K")
|
||||
|
||||
high_school_cutoff_threshold = 0.06
|
||||
|
||||
self.df[field_names.SCORE_K] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.8)
|
||||
) | (self.df[field_names.POVERTY_LESS_THAN_100_FPL_FIELD] > 0.20)
|
||||
|
||||
self.df[field_names.SCORE_K_COMMUNITIES] = (
|
||||
(self.df[field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD] < 0.8)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
) | (
|
||||
(self.df[field_names.POVERTY_LESS_THAN_100_FPL_FIELD] > 0.20)
|
||||
& (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
> high_school_cutoff_threshold
|
||||
)
|
||||
)
|
||||
|
||||
return self.df
|
|
@ -1,690 +0,0 @@
|
|||
import data_pipeline.score.field_names as field_names
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreL(Score):
|
||||
def __init__(self, df: pd.DataFrame) -> None:
|
||||
self.LOW_INCOME_THRESHOLD: float = 0.65
|
||||
self.ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
|
||||
self.MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
|
||||
self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD: float = 0.10
|
||||
|
||||
super().__init__(df)
|
||||
|
||||
def _combine_island_areas_with_states_and_set_thresholds(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
column_from_island_areas: str,
|
||||
column_from_decennial_census: str,
|
||||
combined_column_name: str,
|
||||
threshold_cutoff_for_island_areas: float,
|
||||
) -> (pd.DataFrame, str):
|
||||
"""Steps to set thresholds for island areas.
|
||||
|
||||
This function is fairly logically complicated. It takes the following steps:
|
||||
|
||||
1. Combine the two different fields into a single field.
|
||||
2. Calculate the 90th percentile cutoff raw value for the combined field.
|
||||
3. Create a boolean series that is true for any census tract in the island
|
||||
areas (and only the island areas) that exceeds this cutoff.
|
||||
|
||||
For step one, it combines data that is either the island area's Decennial Census
|
||||
value in 2009 or the state's value in 5-year ACS ending in 2010.
|
||||
|
||||
This will be used to generate the percentile cutoff for the 90th percentile.
|
||||
|
||||
The stateside decennial census stopped asking economic comparisons,
|
||||
so this is as close to apples-to-apples as we get. We use 5-year ACS for data
|
||||
robustness over 1-year ACS.
|
||||
"""
|
||||
# Create the combined field.
|
||||
# TODO: move this combined field percentile calculation to `etl_score`,
|
||||
# since most other percentile logic is there.
|
||||
# There should only be one entry in either 2009 or 2019 fields, not one in both.
|
||||
# But just to be safe, we take the mean and ignore null values so if there
|
||||
# *were* entries in both, this result would make sense.
|
||||
df[combined_column_name] = df[
|
||||
[column_from_island_areas, column_from_decennial_census]
|
||||
].mean(axis=1, skipna=True)
|
||||
|
||||
logger.debug(
|
||||
f"Combined field `{combined_column_name}` has "
|
||||
f"{df[combined_column_name].isnull().sum()} "
|
||||
f"({df[combined_column_name].isnull().sum() * 100 / len(df):.2f}%) "
|
||||
f"missing values for census tracts. "
|
||||
)
|
||||
|
||||
# Calculate the percentile threshold raw value.
|
||||
raw_threshold = np.nanquantile(
|
||||
a=df[combined_column_name], q=threshold_cutoff_for_island_areas
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"For combined field `{combined_column_name}`, "
|
||||
f"the {threshold_cutoff_for_island_areas*100:.0f} percentile cutoff is a "
|
||||
f"raw value of {raw_threshold:.3f}."
|
||||
)
|
||||
|
||||
threshold_column_name = (
|
||||
f"{column_from_island_areas} exceeds "
|
||||
f"{threshold_cutoff_for_island_areas*100:.0f}th percentile"
|
||||
)
|
||||
|
||||
df[threshold_column_name] = (
|
||||
df[column_from_island_areas] >= raw_threshold
|
||||
)
|
||||
|
||||
percent_of_tracts_highlighted = (
|
||||
100
|
||||
* df[threshold_column_name].sum()
|
||||
/ df[column_from_island_areas].notnull().sum()
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"For `{threshold_column_name}`, "
|
||||
f"{df[threshold_column_name].sum()} ("
|
||||
f"{percent_of_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
)
|
||||
|
||||
return df, threshold_column_name
|
||||
|
||||
def _create_low_income_threshold(self, df: pd.DataFrame) -> pd.Series:
|
||||
"""
|
||||
Returns a pandas series (really a numpy array)
|
||||
of booleans based on the condition of the FPL at 200%
|
||||
is at or more than some established threshold
|
||||
"""
|
||||
return (
|
||||
df[
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.LOW_INCOME_THRESHOLD
|
||||
)
|
||||
|
||||
def _increment_total_eligibility_exceeded(
|
||||
self, columns_for_subset: list
|
||||
) -> None:
|
||||
"""
|
||||
Increments the total eligible factors for a given tract
|
||||
"""
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] += self.df[columns_for_subset].sum(
|
||||
axis=1, skipna=True
|
||||
)
|
||||
|
||||
def _climate_factor(self) -> bool:
|
||||
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
climate_eligibility_columns = [
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
expected_population_loss_threshold = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
expected_agriculture_loss_threshold = (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
expected_building_loss_threshold = (
|
||||
self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_population_loss_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_agriculture_loss_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
expected_building_loss_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(climate_eligibility_columns)
|
||||
|
||||
return self.df[climate_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _energy_factor(self) -> bool:
|
||||
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
energy_eligibility_columns = [
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
energy_burden_threshold = (
|
||||
self.df[
|
||||
field_names.ENERGY_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
pm25_threshold = (
|
||||
self.df[
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.PM25_EXPOSURE_LOW_INCOME_FIELD] = (
|
||||
pm25_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.ENERGY_BURDEN_LOW_INCOME_FIELD] = (
|
||||
energy_burden_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(energy_eligibility_columns)
|
||||
|
||||
return self.df[energy_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _transportation_factor(self) -> bool:
|
||||
# In Xth percentile or above for diesel particulate matter (Source: EPA National Air Toxics Assessment (NATA)
|
||||
# or
|
||||
# In Xth percentile or above for PM 2.5 (Source: EPA, Office of Air and Radiation (OAR) fusion of model and monitor data)]
|
||||
# or
|
||||
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
transportion_eligibility_columns = [
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
diesel_threshold = (
|
||||
self.df[
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
traffic_threshold = (
|
||||
self.df[
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD] = (
|
||||
diesel_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD] = (
|
||||
traffic_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
transportion_eligibility_columns
|
||||
)
|
||||
|
||||
return self.df[transportion_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _housing_factor(self) -> bool:
|
||||
# (
|
||||
# In Xth percentile or above for lead paint (Source: Census's American Community Survey’s
|
||||
# percent of housing units built pre-1960, used as an indicator of potential lead paint exposure in homes)
|
||||
# AND
|
||||
# In Yth percentile or below for Median House Value (Source: Census's American Community Survey)
|
||||
# )
|
||||
# or
|
||||
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
housing_eligibility_columns = [
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
lead_paint_median_home_value_threshold = (
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
) & (
|
||||
self.df[
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
|
||||
housing_burden_threshold = (
|
||||
self.df[
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
# series by series indicators
|
||||
self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = (
|
||||
lead_paint_median_home_value_threshold
|
||||
& self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_BURDEN_LOW_INCOME_FIELD] = (
|
||||
housing_burden_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(housing_eligibility_columns)
|
||||
|
||||
return self.df[housing_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _pollution_factor(self) -> bool:
|
||||
# Proximity to Risk Management Plan sites is > X
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
pollution_eligibility_columns = [
|
||||
field_names.RMP_LOW_INCOME_FIELD,
|
||||
field_names.SUPERFUND_LOW_INCOME_FIELD,
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
rmp_sites_threshold = (
|
||||
self.df[field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
npl_sites_threshold = (
|
||||
self.df[field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
tsdf_sites_threshold = (
|
||||
self.df[
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
# individual series-by-series
|
||||
self.df[field_names.RMP_LOW_INCOME_FIELD] = (
|
||||
rmp_sites_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.SUPERFUND_LOW_INCOME_FIELD] = (
|
||||
npl_sites_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD] = (
|
||||
tsdf_sites_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
pollution_eligibility_columns
|
||||
)
|
||||
|
||||
return self.df[pollution_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _water_factor(self) -> bool:
|
||||
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
wastewater_threshold = (
|
||||
self.df[
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] = (
|
||||
wastewater_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
|
||||
)
|
||||
|
||||
return self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
|
||||
|
||||
def _health_factor(self) -> bool:
|
||||
# In Xth percentile or above for diabetes (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for asthma (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for heart disease
|
||||
# or
|
||||
# In Xth percentile or above for low life expectancy (Source: CDC Places)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level. Source: Census's American Community Survey]
|
||||
|
||||
health_eligibility_columns = [
|
||||
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
diabetes_threshold = (
|
||||
self.df[
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
asthma_threshold = (
|
||||
self.df[
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
heart_disease_threshold = (
|
||||
self.df[
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
low_life_expectancy_threshold = (
|
||||
self.df[
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.DIABETES_LOW_INCOME_FIELD] = (
|
||||
diabetes_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.ASTHMA_LOW_INCOME_FIELD] = (
|
||||
asthma_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = (
|
||||
heart_disease_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
||||
low_life_expectancy_threshold & self.df[field_names.FPL_200_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(health_eligibility_columns)
|
||||
|
||||
return self.df[health_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _workforce_factor(self) -> bool:
|
||||
# Where unemployment is above Xth percentile
|
||||
# or
|
||||
# Where median income as a percent of area median income is above Xth percentile
|
||||
# or
|
||||
# Where the percent of households at or below 100% of the federal poverty level
|
||||
# is above Xth percentile
|
||||
# or
|
||||
# Where linguistic isolation is above Xth percentile
|
||||
# AND
|
||||
# Where the high school degree achievement rates for adults 25 years and older
|
||||
# is less than Y%
|
||||
# (necessary to screen out university tracts)
|
||||
|
||||
# Workforce criteria for states fields.
|
||||
workforce_eligibility_columns = [
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
unemployment_threshold = (
|
||||
self.df[
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
low_median_income_threshold = (
|
||||
self.df[
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
linguistic_isolation_threshold = (
|
||||
self.df[
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
poverty_threshold = (
|
||||
self.df[
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = (
|
||||
linguistic_isolation_threshold
|
||||
& self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
poverty_threshold & self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
|
||||
low_median_income_threshold
|
||||
& self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = (
|
||||
unemployment_threshold & self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_states = self.df[
|
||||
workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
workforce_eligibility_columns
|
||||
)
|
||||
|
||||
# Now, calculate workforce criteria for island territories.
|
||||
island_areas_workforce_eligibility_columns = [
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
# First, combine unemployment.
|
||||
(
|
||||
self.df,
|
||||
island_areas_unemployment_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# Next, combine poverty.
|
||||
(
|
||||
self.df,
|
||||
island_areas_poverty_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# Also check whether low area median income is 90th percentile or higher
|
||||
# within the islands.
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name = (
|
||||
f"{field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009} exceeds "
|
||||
f"{field_names.PERCENTILE}th percentile"
|
||||
)
|
||||
self.df[
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name
|
||||
] = (
|
||||
self.df[
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[island_areas_unemployment_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[island_areas_poverty_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[
|
||||
island_areas_low_median_income_as_a_percent_of_ami_criteria_field_name
|
||||
]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_island_areas = self.df[
|
||||
island_areas_workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
island_areas_workforce_eligibility_columns
|
||||
)
|
||||
|
||||
percent_of_island_tracts_highlighted = (
|
||||
100
|
||||
* workforce_combined_criteria_for_island_areas.sum()
|
||||
# Choosing a random column from island areas to calculate the denominator.
|
||||
/ self.df[field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009]
|
||||
.notnull()
|
||||
.sum()
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"For workforce criteria in island areas, "
|
||||
f"{workforce_combined_criteria_for_island_areas.sum()} ("
|
||||
f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
)
|
||||
|
||||
# A tract is included if it meets either the states tract criteria or the
|
||||
# island areas tract criteria.
|
||||
return (
|
||||
workforce_combined_criteria_for_states
|
||||
| workforce_combined_criteria_for_island_areas
|
||||
)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score L")
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
self.df[field_names.FPL_200_SERIES] = self._create_low_income_threshold(
|
||||
self.df
|
||||
)
|
||||
self.df[field_names.L_CLIMATE] = self._climate_factor()
|
||||
self.df[field_names.L_ENERGY] = self._energy_factor()
|
||||
self.df[field_names.L_TRANSPORTATION] = self._transportation_factor()
|
||||
self.df[field_names.L_HOUSING] = self._housing_factor()
|
||||
self.df[field_names.L_POLLUTION] = self._pollution_factor()
|
||||
self.df[field_names.L_WATER] = self._water_factor()
|
||||
self.df[field_names.L_HEALTH] = self._health_factor()
|
||||
self.df[field_names.L_WORKFORCE] = self._workforce_factor()
|
||||
|
||||
factors = [
|
||||
field_names.L_CLIMATE,
|
||||
field_names.L_ENERGY,
|
||||
field_names.L_TRANSPORTATION,
|
||||
field_names.L_HOUSING,
|
||||
field_names.L_POLLUTION,
|
||||
field_names.L_WATER,
|
||||
field_names.L_HEALTH,
|
||||
field_names.L_WORKFORCE,
|
||||
]
|
||||
self.df[field_names.SCORE_L_COMMUNITIES] = self.df[factors].any(axis=1)
|
||||
|
||||
# Note: this is purely used for comparison tool analysis, and can be removed at a later date. - LMB.
|
||||
non_workforce_factors = [
|
||||
field_names.L_CLIMATE,
|
||||
field_names.L_ENERGY,
|
||||
field_names.L_TRANSPORTATION,
|
||||
field_names.L_HOUSING,
|
||||
field_names.L_POLLUTION,
|
||||
field_names.L_WATER,
|
||||
field_names.L_HEALTH,
|
||||
]
|
||||
self.df[field_names.L_NON_WORKFORCE] = self.df[
|
||||
non_workforce_factors
|
||||
].any(axis=1)
|
||||
|
||||
self.df[
|
||||
field_names.SCORE_L + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
] = self.df[field_names.SCORE_L_COMMUNITIES].astype(int)
|
||||
|
||||
return self.df
|
|
@ -1,888 +0,0 @@
|
|||
from typing import Tuple
|
||||
|
||||
import data_pipeline.etl.score.constants as constants
|
||||
import data_pipeline.score.field_names as field_names
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from data_pipeline.score.score import Score
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreM(Score):
|
||||
"""Very similar to Score L, with a few minor modifications."""
|
||||
|
||||
def __init__(self, df: pd.DataFrame) -> None:
|
||||
self.LOW_INCOME_THRESHOLD: float = 0.65
|
||||
self.MAX_COLLEGE_ATTENDANCE_THRESHOLD: float = 0.20
|
||||
self.ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
|
||||
self.MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
|
||||
self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD: float = 0.10
|
||||
|
||||
super().__init__(df)
|
||||
|
||||
def _combine_island_areas_with_states_and_set_thresholds(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
column_from_island_areas: str,
|
||||
column_from_decennial_census: str,
|
||||
combined_column_name: str,
|
||||
threshold_cutoff_for_island_areas: float,
|
||||
) -> Tuple[pd.DataFrame, str]:
|
||||
"""Steps to set thresholds for island areas.
|
||||
|
||||
This function is fairly logically complicated. It takes the following steps:
|
||||
|
||||
1. Combine the two different fields into a single field.
|
||||
2. Calculate the 90th percentile for the combined field.
|
||||
3. Create a boolean series that is true for any census tract in the island
|
||||
areas (and only the island areas) that exceeds this percentile.
|
||||
|
||||
For step one, it combines data that is either the island area's Decennial Census
|
||||
value in 2009 or the state's value in 5-year ACS ending in 2010.
|
||||
|
||||
This will be used to generate the percentile cutoff for the 90th percentile.
|
||||
|
||||
The stateside decennial census stopped asking economic comparisons,
|
||||
so this is as close to apples-to-apples as we get. We use 5-year ACS for data
|
||||
robustness over 1-year ACS.
|
||||
"""
|
||||
# Create the combined field.
|
||||
# TODO: move this combined field percentile calculation to `etl_score`,
|
||||
# since most other percentile logic is there.
|
||||
# There should only be one entry in either 2009 or 2019 fields, not one in both.
|
||||
# But just to be safe, we take the mean and ignore null values so if there
|
||||
# *were* entries in both, this result would make sense.
|
||||
df[combined_column_name] = df[
|
||||
[column_from_island_areas, column_from_decennial_census]
|
||||
].mean(axis=1, skipna=True)
|
||||
|
||||
# Create a percentile field for use in the Islands / PR visualization
|
||||
# TODO: move this code
|
||||
# In the code below, percentiles are constructed based on the combined column
|
||||
# of census and island data, but only reported for the island areas (where there
|
||||
# is no other comprehensive percentile information)
|
||||
return_series_name = (
|
||||
column_from_island_areas
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
)
|
||||
df[return_series_name] = np.where(
|
||||
df[column_from_decennial_census].isna(),
|
||||
df[combined_column_name].rank(pct=True),
|
||||
np.nan,
|
||||
)
|
||||
|
||||
threshold_column_name = (
|
||||
f"{column_from_island_areas} exceeds "
|
||||
f"{threshold_cutoff_for_island_areas*100:.0f}th percentile"
|
||||
)
|
||||
|
||||
df[threshold_column_name] = (
|
||||
df[return_series_name] >= threshold_cutoff_for_island_areas
|
||||
)
|
||||
|
||||
return df, threshold_column_name
|
||||
|
||||
def _create_low_income_and_low_college_attendance_threshold(
|
||||
self, df: pd.DataFrame
|
||||
) -> pd.Series:
|
||||
"""
|
||||
Returns a pandas series (really a numpy array)
|
||||
of booleans based on the condition of the FPL at 200%
|
||||
is at or more than some established threshold
|
||||
"""
|
||||
|
||||
return (df[field_names.LOW_INCOME_THRESHOLD]) & (
|
||||
df[field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD]
|
||||
| (
|
||||
# If college attendance data is null for this tract, just rely on the
|
||||
# poverty data
|
||||
df[field_names.COLLEGE_ATTENDANCE_FIELD].isna()
|
||||
)
|
||||
)
|
||||
|
||||
def _increment_total_eligibility_exceeded(
|
||||
self, columns_for_subset: list, skip_fips: tuple = ()
|
||||
) -> None:
|
||||
"""
|
||||
Increments the total eligible factors for a given tract
|
||||
|
||||
The new skip_fips argument specifies which (if any) fips codes to
|
||||
skip over for incrementing.
|
||||
This allows us to essentially skip data we think is of limited veracity,
|
||||
without overriding any values in the data.
|
||||
THIS IS A TEMPORARY FIX.
|
||||
"""
|
||||
if skip_fips:
|
||||
self.df[field_names.THRESHOLD_COUNT] += np.where(
|
||||
self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
|
||||
skip_fips
|
||||
),
|
||||
0,
|
||||
self.df[columns_for_subset].sum(axis=1, skipna=True),
|
||||
)
|
||||
else:
|
||||
self.df[field_names.THRESHOLD_COUNT] += self.df[
|
||||
columns_for_subset
|
||||
].sum(axis=1, skipna=True)
|
||||
|
||||
def _climate_factor(self) -> bool:
|
||||
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and there is low higher ed attendance
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
climate_eligibility_columns = [
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.CLIMATE_THRESHOLD_EXCEEDED] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
| self.df[
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
| self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
climate_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[climate_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _energy_factor(self) -> bool:
|
||||
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has low higher ed attendance.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
energy_eligibility_columns = [
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.ENERGY_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.PM25_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ENERGY_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.PM25_EXCEEDS_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
self.df[field_names.PM25_EXPOSURE_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.PM25_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.ENERGY_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
energy_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[energy_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _transportation_factor(self) -> bool:
|
||||
# In Xth percentile or above for diesel particulate matter (Source: EPA National Air Toxics Assessment (NATA)
|
||||
# or
|
||||
# In Xth percentile or above for PM 2.5 (Source: EPA, Office of Air and Radiation (OAR) fusion of model and monitor data)]
|
||||
# or
|
||||
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
transportion_eligibility_columns = [
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
transportion_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[transportion_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _housing_factor(self) -> bool:
|
||||
# (
|
||||
# In Xth percentile or above for lead paint (Source: Census's American Community Survey’s
|
||||
# percent of housing units built pre-1960, used as an indicator of potential lead paint exposure in homes)
|
||||
# AND
|
||||
# In Yth percentile or below for Median House Value (Source: Census's American Community Survey)
|
||||
# )
|
||||
# or
|
||||
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
housing_eligibility_columns = [
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
) & (
|
||||
self.df[
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_THREHSOLD_EXCEEDED] = (
|
||||
self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
# series by series indicators
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_BURDEN_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
housing_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[housing_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _pollution_factor(self) -> bool:
|
||||
# Proximity to Risk Management Plan sites is > X
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
pollution_eligibility_columns = [
|
||||
field_names.RMP_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.RMP_PCTILE_THRESHOLD] = (
|
||||
self.df[field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.NPL_PCTILE_THRESHOLD] = (
|
||||
self.df[field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TSDF_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.RMP_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.NPL_PCTILE_THRESHOLD]
|
||||
) | self.df[field_names.TSDF_PCTILE_THRESHOLD]
|
||||
|
||||
# individual series-by-series
|
||||
self.df[field_names.RMP_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.RMP_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.SUPERFUND_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.NPL_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.HAZARDOUS_WASTE_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.TSDF_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
pollution_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[pollution_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _water_factor(self) -> bool:
|
||||
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
self.df[field_names.WASTEWATER_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
# Straight copy here in case we add additional water fields.
|
||||
self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[
|
||||
field_names.WASTEWATER_PCTILE_THRESHOLD
|
||||
].copy()
|
||||
|
||||
self.df[
|
||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.WASTEWATER_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD],
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[
|
||||
field_names.WASTEWATER_DISCHARGE_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
]
|
||||
|
||||
def _health_factor(self) -> bool:
|
||||
# In Xth percentile or above for diabetes (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for asthma (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for heart disease
|
||||
# or
|
||||
# In Xth percentile or above for low life expectancy (Source: CDC Places)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
health_eligibility_columns = [
|
||||
field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.DIABETES_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ASTHMA_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HEART_DISEASE_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HEALTH_THRESHOLD_EXCEEDED] = (
|
||||
(
|
||||
self.df[field_names.DIABETES_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.ASTHMA_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.HEART_DISEASE_PCTILE_THRESHOLD]
|
||||
) | self.df[field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD]
|
||||
|
||||
self.df[field_names.DIABETES_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.DIABETES_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.ASTHMA_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.ASTHMA_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[field_names.HEART_DISEASE_LOW_INCOME_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.HEART_DISEASE_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
self.df[
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_LOW_HIGHER_ED_FIELD
|
||||
] = (
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
health_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[health_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _workforce_factor(self) -> bool:
|
||||
# Where unemployment is above Xth percentile
|
||||
# or
|
||||
# Where median income as a percent of area median income is above Xth percentile
|
||||
# or
|
||||
# Where the percent of households at or below 100% of the federal poverty level
|
||||
# is above Xth percentile
|
||||
# or
|
||||
# Where linguistic isolation is above Xth percentile
|
||||
# AND
|
||||
# Where the high school degree achievement rates for adults 25 years and older
|
||||
# is less than Y%
|
||||
# AND the higher ed attendance rates are under Z%
|
||||
# (necessary to screen out university tracts)
|
||||
|
||||
# Workforce criteria for states fields.
|
||||
workforce_eligibility_columns = [
|
||||
field_names.UNEMPLOYMENT_LOW_HS_LOW_HIGHER_ED_FIELD,
|
||||
field_names.POVERTY_LOW_HS_LOW_HIGHER_ED_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_LOW_HIGHER_ED_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_LOW_HIGHER_ED_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
) & (
|
||||
(
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_FIELD]
|
||||
<= self.MAX_COLLEGE_ATTENDANCE_THRESHOLD
|
||||
)
|
||||
| (
|
||||
# If college attendance data is null for this tract, just rely on the
|
||||
# poverty/AMI data
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_FIELD].isna()
|
||||
)
|
||||
)
|
||||
|
||||
self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.POVERTY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.POVERTY_LOW_HS_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.POVERTY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.UNEMPLOYMENT_LOW_HS_LOW_HIGHER_ED_FIELD] = (
|
||||
self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_states = self.df[
|
||||
workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
workforce_eligibility_columns
|
||||
)
|
||||
|
||||
# Now, calculate workforce criteria for island territories.
|
||||
island_areas_workforce_eligibility_columns = [
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
# First, combine unemployment.
|
||||
# This will include an adjusted percentile column for the island areas
|
||||
# to be used by the front end.
|
||||
(
|
||||
self.df,
|
||||
island_areas_unemployment_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# TODO: Remove this, it's for checking only
|
||||
assert (
|
||||
island_areas_unemployment_criteria_field_name
|
||||
== field_names.ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD
|
||||
), "Error combining island columns"
|
||||
|
||||
# Next, combine poverty.
|
||||
# This will include an adjusted percentile column for the island areas
|
||||
# to be used by the front end.
|
||||
(
|
||||
self.df,
|
||||
island_areas_poverty_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# TODO: Remove this, it's for checking only
|
||||
assert (
|
||||
island_areas_poverty_criteria_field_name
|
||||
== field_names.ISLAND_POVERTY_PCTILE_THRESHOLD
|
||||
), "Error combining island columns"
|
||||
|
||||
# Also check whether low area median income is 90th percentile or higher
|
||||
# within the islands.
|
||||
|
||||
# Note that because the field for low median does not have to be combined,
|
||||
# unlike the other fields, we do not need to create a new percentile
|
||||
# column. This code should probably be refactored when (TODO) we do the big
|
||||
# refactor.
|
||||
self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[island_areas_unemployment_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[island_areas_poverty_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_island_areas = self.df[
|
||||
island_areas_workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
island_areas_workforce_eligibility_columns
|
||||
)
|
||||
|
||||
percent_of_island_tracts_highlighted = (
|
||||
100
|
||||
* workforce_combined_criteria_for_island_areas.sum()
|
||||
# Choosing a random column from island areas to calculate the denominator.
|
||||
/ self.df[field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009]
|
||||
.notnull()
|
||||
.sum()
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"For workforce criteria in island areas, "
|
||||
f"{workforce_combined_criteria_for_island_areas.sum()} ("
|
||||
f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
)
|
||||
|
||||
# Because these criteria are calculated differently for the islands, we also calculate the
|
||||
# thresholds to pass to the FE slightly differently
|
||||
|
||||
self.df[field_names.WORKFORCE_THRESHOLD_EXCEEDED] = (
|
||||
## First we calculate for the non-island areas
|
||||
(
|
||||
(
|
||||
self.df[field_names.POVERTY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD]
|
||||
) | (
|
||||
## then we calculate just for the island areas
|
||||
(
|
||||
self.df[field_names.ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.ISLAND_POVERTY_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
# Because of the island complications, we also have to separately calculate the threshold for
|
||||
# socioeconomic thresholds
|
||||
self.df[field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED] = (
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
| self.df[field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD]
|
||||
)
|
||||
|
||||
# A tract is included if it meets either the states tract criteria or the
|
||||
# island areas tract criteria.
|
||||
return (
|
||||
workforce_combined_criteria_for_states
|
||||
| workforce_combined_criteria_for_island_areas
|
||||
)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.debug("Adding Score M")
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
|
||||
# TODO: move this inside of
|
||||
# `_create_low_income_and_low_college_attendance_threshold`
|
||||
# and change the return signature of that method.
|
||||
# Create a standalone field that captures the college attendance boolean
|
||||
# threshold.
|
||||
self.df[field_names.LOW_INCOME_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.LOW_INCOME_THRESHOLD
|
||||
)
|
||||
|
||||
# Because we are moving this variable to be in the same direction as all
|
||||
# other variables, we change this to be < rather than <=. This translates
|
||||
# to "80% or more of residents are not college students", rather than
|
||||
# "Strictly greater than 80% of residents are not college students."
|
||||
# There are two tracts that are impacted by this (that is, they have exactly)
|
||||
# 20% college students -- neither of these has been a DAC under any score.
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD] = (
|
||||
self.df[field_names.COLLEGE_ATTENDANCE_FIELD]
|
||||
< self.MAX_COLLEGE_ATTENDANCE_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES
|
||||
] = self._create_low_income_and_low_college_attendance_threshold(
|
||||
self.df
|
||||
)
|
||||
self.df[field_names.M_CLIMATE] = self._climate_factor()
|
||||
self.df[field_names.M_ENERGY] = self._energy_factor()
|
||||
self.df[field_names.M_TRANSPORTATION] = self._transportation_factor()
|
||||
self.df[field_names.M_HOUSING] = self._housing_factor()
|
||||
self.df[field_names.M_POLLUTION] = self._pollution_factor()
|
||||
self.df[field_names.M_WATER] = self._water_factor()
|
||||
self.df[field_names.M_HEALTH] = self._health_factor()
|
||||
self.df[field_names.M_WORKFORCE] = self._workforce_factor()
|
||||
|
||||
factors = [
|
||||
field_names.M_CLIMATE,
|
||||
field_names.M_ENERGY,
|
||||
field_names.M_TRANSPORTATION,
|
||||
field_names.M_HOUSING,
|
||||
field_names.M_POLLUTION,
|
||||
field_names.M_WATER,
|
||||
field_names.M_HEALTH,
|
||||
field_names.M_WORKFORCE,
|
||||
]
|
||||
self.df[field_names.CATEGORY_COUNT] = self.df[factors].sum(axis=1)
|
||||
self.df[field_names.SCORE_M_COMMUNITIES] = self.df[factors].any(axis=1)
|
||||
|
||||
# Note: this is purely used for comparison tool analysis, and can be removed at a later date. - LMB.
|
||||
non_workforce_factors = [
|
||||
field_names.M_CLIMATE,
|
||||
field_names.M_ENERGY,
|
||||
field_names.M_TRANSPORTATION,
|
||||
field_names.M_HOUSING,
|
||||
field_names.M_POLLUTION,
|
||||
field_names.M_WATER,
|
||||
field_names.M_HEALTH,
|
||||
]
|
||||
self.df[field_names.M_NON_WORKFORCE] = self.df[
|
||||
non_workforce_factors
|
||||
].any(axis=1)
|
||||
|
||||
self.df[
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
] = self.df[field_names.SCORE_M_COMMUNITIES].astype(int)
|
||||
|
||||
return self.df
|
Loading…
Add table
Add a link
Reference in a new issue