mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-25 13:11:39 -07:00
Adding etl files for ej screen areas of concern
This commit is contained in:
parent
3bb1ecf0ed
commit
d49c28ca25
2 changed files with 115 additions and 0 deletions
|
@ -69,6 +69,11 @@ DATASET_LIST = [
|
|||
"module_dir": "persistent_poverty",
|
||||
"class_name": "PersistentPovertyETL",
|
||||
},
|
||||
{
|
||||
"name": "ejscreen_areas_of_concern",
|
||||
"module_dir": "ejscreen_areas_of_concern",
|
||||
"class_name": "EJScreenAreasOfConcernETL",
|
||||
},
|
||||
]
|
||||
CENSUS_INFO = {
|
||||
"name": "census",
|
||||
|
|
|
@ -86,6 +86,44 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# Persistent poverty
|
||||
self.PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
|
||||
|
||||
# EJ Areas of Concern
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_70TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 70th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_75TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 75th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_80TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 80th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_85TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 85th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_90TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 90th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_95TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 95th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_STATE_70TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 70th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_STATE_75TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 75th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_STATE_80TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 80th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_STATE_85TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 85th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_STATE_90TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 90th percentile (communities)"
|
||||
)
|
||||
self.EJSCREEN_AREAS_OF_CONCERN_STATE_95TH_PERCENTILE_COMMUNITIES_FIELD_NAME = (
|
||||
"EJSCREEN Areas of Concern, National, 95th percentile (communities)"
|
||||
)
|
||||
|
||||
# dataframes
|
||||
self.df: pd.DataFrame
|
||||
self.ejscreen_df: pd.DataFrame
|
||||
|
@ -99,6 +137,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.national_risk_index_df: pd.DataFrame
|
||||
self.geocorr_urban_rural_df: pd.DataFrame
|
||||
self.persistent_poverty_df: pd.DataFrame
|
||||
self.ejscreen_areas_of_concern_df: pd.DataFrame
|
||||
|
||||
def data_sets(self) -> list:
|
||||
# Define a named tuple that will be used for each data set input.
|
||||
|
@ -215,6 +254,66 @@ class ScoreETL(ExtractTransformLoad):
|
|||
renamed_field=self.PERSISTENT_POVERTY_FIELD,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_70TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_70TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_75TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_75TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_80TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_80TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_85TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_85TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_90TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_90TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_95TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_NATIONAL_95TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_70TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_70TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_75TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_75TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_80TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_80TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_85TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_85TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_90TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_90TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_95TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
renamed_field=self.EJSCREEN_AREAS_OF_CONCERN_STATE_95TH_PERCENTILE_COMMUNITIES_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
# The following data sets have buckets, because they're used in Score C
|
||||
DataSet(
|
||||
input_field="CANCER",
|
||||
|
@ -424,6 +523,16 @@ class ScoreETL(ExtractTransformLoad):
|
|||
low_memory=False,
|
||||
)
|
||||
|
||||
# Load EJ Screen Areas of Concern
|
||||
ejscreen_areas_of_concern_csv = (
|
||||
self.DATA_PATH / "dataset" / "ejscreen_areas_of_concern" / "usa.csv"
|
||||
)
|
||||
self.ejscreen_areas_of_concern_df = pd.read_csv(
|
||||
ejscreen_areas_of_concern_csv,
|
||||
dtype={self.GEOID_FIELD_NAME: "string"},
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
def _join_cbg_dfs(self, census_block_group_dfs: list) -> pd.DataFrame:
|
||||
logger.info("Joining Census Block Group dataframes")
|
||||
census_block_group_df = functools.reduce(
|
||||
|
@ -701,6 +810,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.housing_and_transportation_df,
|
||||
self.census_acs_median_incomes_df,
|
||||
self.national_risk_index_df,
|
||||
self.ejscreen_areas_of_concern_df,
|
||||
]
|
||||
census_block_group_df = self._join_cbg_dfs(census_block_group_dfs)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue