mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-03 18:44:18 -07:00
Add demos for island areas (#1932)
* Backfill population in island areas (#1882) * Update smoketest to account for backfills (#1882) As I wrote in the commend: We backfill island areas with data from the 2010 census, so if THOSE tracts have data beyond the data source, that's to be expected and is fine to pass. If some other state or territory does though, this should fail This ends up being a nice way of documenting that behavior i guess! * Fixup lint issues (#1882) * Add in race demos to 2010 census pull (#1851) * Add backfill data to score (#1851) * Change column name (#1851) * Fill demos after the score (#1851) * Add income back, adjust test (#1882) * Apply code-review feedback (#1851) * Add test for island area backfill (#1851) * Fix bad rename (#1851)
This commit is contained in:
parent
0f0d6db2d0
commit
8e5ed5b593
7 changed files with 274 additions and 17 deletions
|
@ -381,8 +381,6 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.PERCENT_AGE_OVER_64: "AGE_OLD",
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT: "TA_COUNT",
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC",
|
||||
|
||||
|
||||
}
|
||||
|
||||
# columns to round floats to 2 decimals
|
||||
|
@ -456,5 +454,5 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||
field_names.AML_BOOLEAN,
|
||||
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
|
||||
]
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
import functools
|
||||
from typing import List
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
|
@ -56,6 +58,8 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.fuds_df: pd.DataFrame
|
||||
self.tribal_overlap_df: pd.DataFrame
|
||||
|
||||
self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS: List[str] = []
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Loading data sets from disk.")
|
||||
|
||||
|
@ -402,6 +406,25 @@ class ScoreETL(ExtractTransformLoad):
|
|||
df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD]
|
||||
)
|
||||
|
||||
self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS = [
|
||||
field_names.PERCENT_BLACK_FIELD_NAME
|
||||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
|
||||
field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME
|
||||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
|
||||
field_names.PERCENT_ASIAN_FIELD_NAME
|
||||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
|
||||
field_names.PERCENT_HAWAIIAN_FIELD_NAME
|
||||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
|
||||
field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME
|
||||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
|
||||
field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME
|
||||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
|
||||
field_names.PERCENT_HISPANIC_FIELD_NAME
|
||||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
|
||||
field_names.PERCENT_OTHER_RACE_FIELD_NAME
|
||||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
|
||||
]
|
||||
|
||||
# Donut columns get added later
|
||||
numeric_columns = [
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
|
@ -471,7 +494,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.PERCENT_AGE_OVER_64,
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
|
||||
]
|
||||
] + self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS
|
||||
|
||||
non_numeric_columns = [
|
||||
self.GEOID_TRACT_FIELD_NAME,
|
||||
|
@ -636,6 +659,32 @@ class ScoreETL(ExtractTransformLoad):
|
|||
|
||||
return df_copy
|
||||
|
||||
@staticmethod
|
||||
def _get_island_areas(df: pd.DataFrame) -> pd.Series:
|
||||
return (
|
||||
df[field_names.GEOID_TRACT_FIELD]
|
||||
.str[:2]
|
||||
.isin(constants.TILES_ISLAND_AREA_FIPS_CODES)
|
||||
)
|
||||
|
||||
def _backfill_island_demographics(self, df: pd.DataFrame) -> pd.DataFrame:
|
||||
logger.info("Backfilling island demographic data")
|
||||
island_index = self._get_island_areas(df)
|
||||
for backfill_field_name in self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS:
|
||||
actual_field_name = backfill_field_name.replace(
|
||||
field_names.ISLAND_AREA_BACKFILL_SUFFIX, ""
|
||||
)
|
||||
df.loc[island_index, actual_field_name] = df.loc[
|
||||
island_index, backfill_field_name
|
||||
]
|
||||
df = df.drop(columns=self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS)
|
||||
|
||||
df.loc[island_index, field_names.TOTAL_POP_FIELD] = df.loc[
|
||||
island_index, field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010
|
||||
]
|
||||
|
||||
return df
|
||||
|
||||
def transform(self) -> None:
|
||||
logger.info("Transforming Score Data")
|
||||
|
||||
|
@ -645,6 +694,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# calculate scores
|
||||
self.df = ScoreRunner(df=self.df).calculate_scores()
|
||||
|
||||
# We add island demographic data since it doesn't matter to the score anyway
|
||||
self.df = self._backfill_island_demographics(self.df)
|
||||
|
||||
def load(self) -> None:
|
||||
logger.info("Saving Score CSV")
|
||||
constants.DATA_SCORE_CSV_FULL_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue