mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-29 20:01:16 -07:00
Adding persistent poverty tracts (#738)
* persistent poverty working * fixing left-padding * running black and adding persistent poverty to comp tool * fixing bug * running black and fixing linter * fixing linter * fixing linter error
This commit is contained in:
parent
d1ced6d584
commit
b1a4d26be8
15 changed files with 518 additions and 201 deletions
|
@ -83,6 +83,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# Urban Rural Map
|
||||
self.URBAN_HERUISTIC_FIELD_NAME = "Urban Heuristic Flag"
|
||||
|
||||
# Persistent poverty
|
||||
self.PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
|
||||
|
||||
# dataframes
|
||||
self.df: pd.DataFrame
|
||||
self.ejscreen_df: pd.DataFrame
|
||||
|
@ -95,6 +98,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.doe_energy_burden_df: pd.DataFrame
|
||||
self.national_risk_index_df: pd.DataFrame
|
||||
self.geocorr_urban_rural_df: pd.DataFrame
|
||||
self.persistent_poverty_df: pd.DataFrame
|
||||
|
||||
def data_sets(self) -> list:
|
||||
# Define a named tuple that will be used for each data set input.
|
||||
|
@ -206,6 +210,11 @@ class ScoreETL(ExtractTransformLoad):
|
|||
renamed_field=self.URBAN_HERUISTIC_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field=self.PERSISTENT_POVERTY_FIELD,
|
||||
renamed_field=self.PERSISTENT_POVERTY_FIELD,
|
||||
bucket=None,
|
||||
),
|
||||
# The following data sets have buckets, because they're used in Score C
|
||||
DataSet(
|
||||
input_field="CANCER",
|
||||
|
@ -405,6 +414,16 @@ class ScoreETL(ExtractTransformLoad):
|
|||
low_memory=False,
|
||||
)
|
||||
|
||||
# Load persistent poverty
|
||||
persistent_poverty_csv = (
|
||||
self.DATA_PATH / "dataset" / "persistent_poverty" / "usa.csv"
|
||||
)
|
||||
self.persistent_poverty_df = pd.read_csv(
|
||||
persistent_poverty_csv,
|
||||
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
def _join_cbg_dfs(self, census_block_group_dfs: list) -> pd.DataFrame:
|
||||
logger.info("Joining Census Block Group dataframes")
|
||||
census_block_group_df = functools.reduce(
|
||||
|
@ -692,6 +711,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.cdc_life_expectancy_df,
|
||||
self.doe_energy_burden_df,
|
||||
self.geocorr_urban_rural_df,
|
||||
self.persistent_poverty_df,
|
||||
]
|
||||
census_tract_df = self._join_tract_dfs(census_tract_dfs)
|
||||
|
||||
|
@ -743,7 +763,11 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# TODO do this at the same time as calculating percentiles in future refactor
|
||||
for data_set in data_sets:
|
||||
# Skip GEOID_FIELD_NAME, because it's a string.
|
||||
if data_set.renamed_field == self.GEOID_FIELD_NAME:
|
||||
# Skip `PERSISTENT_POVERTY_FIELD` because it's a straight pass-through.
|
||||
if data_set.renamed_field in (
|
||||
self.GEOID_FIELD_NAME,
|
||||
self.PERSISTENT_POVERTY_FIELD,
|
||||
):
|
||||
continue
|
||||
|
||||
df[data_set.renamed_field] = pd.to_numeric(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue