Adding persistent poverty tracts (#738)

* persistent poverty working

* fixing left-padding

* running black and adding persistent poverty to comp tool

* fixing bug

* running black and fixing linter

* fixing linter

* fixing linter error
This commit is contained in:
Lucas Merrill Brown 2021-09-22 16:57:08 -05:00 committed by GitHub
commit b1a4d26be8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 518 additions and 201 deletions

View file

@ -83,6 +83,9 @@ class ScoreETL(ExtractTransformLoad):
# Urban Rural Map
self.URBAN_HERUISTIC_FIELD_NAME = "Urban Heuristic Flag"
# Persistent poverty
self.PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
# dataframes
self.df: pd.DataFrame
self.ejscreen_df: pd.DataFrame
@ -95,6 +98,7 @@ class ScoreETL(ExtractTransformLoad):
self.doe_energy_burden_df: pd.DataFrame
self.national_risk_index_df: pd.DataFrame
self.geocorr_urban_rural_df: pd.DataFrame
self.persistent_poverty_df: pd.DataFrame
def data_sets(self) -> list:
# Define a named tuple that will be used for each data set input.
@ -206,6 +210,11 @@ class ScoreETL(ExtractTransformLoad):
renamed_field=self.URBAN_HERUISTIC_FIELD_NAME,
bucket=None,
),
DataSet(
input_field=self.PERSISTENT_POVERTY_FIELD,
renamed_field=self.PERSISTENT_POVERTY_FIELD,
bucket=None,
),
# The following data sets have buckets, because they're used in Score C
DataSet(
input_field="CANCER",
@ -405,6 +414,16 @@ class ScoreETL(ExtractTransformLoad):
low_memory=False,
)
# Load persistent poverty
persistent_poverty_csv = (
self.DATA_PATH / "dataset" / "persistent_poverty" / "usa.csv"
)
self.persistent_poverty_df = pd.read_csv(
persistent_poverty_csv,
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
low_memory=False,
)
def _join_cbg_dfs(self, census_block_group_dfs: list) -> pd.DataFrame:
logger.info("Joining Census Block Group dataframes")
census_block_group_df = functools.reduce(
@ -692,6 +711,7 @@ class ScoreETL(ExtractTransformLoad):
self.cdc_life_expectancy_df,
self.doe_energy_burden_df,
self.geocorr_urban_rural_df,
self.persistent_poverty_df,
]
census_tract_df = self._join_tract_dfs(census_tract_dfs)
@ -743,7 +763,11 @@ class ScoreETL(ExtractTransformLoad):
# TODO do this at the same time as calculating percentiles in future refactor
for data_set in data_sets:
# Skip GEOID_FIELD_NAME, because it's a string.
if data_set.renamed_field == self.GEOID_FIELD_NAME:
# Skip `PERSISTENT_POVERTY_FIELD` because it's a straight pass-through.
if data_set.renamed_field in (
self.GEOID_FIELD_NAME,
self.PERSISTENT_POVERTY_FIELD,
):
continue
df[data_set.renamed_field] = pd.to_numeric(