Add demos for island areas (#1932)

* Backfill population in island areas (#1882)

* Update smoketest to account for backfills (#1882)

As I wrote in the commend:
We backfill island areas with data from the 2010 census, so if THOSE tracts
have data beyond the data source, that's to be expected and is fine to pass.
If some other state or territory does though, this should fail

This ends up being a nice way of documenting that behavior i guess!

* Fixup lint issues (#1882)

* Add in race demos to 2010 census pull (#1851)

* Add backfill data to score (#1851)

* Change column name (#1851)

* Fill demos after the score (#1851)

* Add income back, adjust test (#1882)

* Apply code-review feedback (#1851)

* Add test for island area backfill (#1851)

* Fix bad rename (#1851)
This commit is contained in:
Matt Bowen 2022-09-29 12:42:56 -04:00 committed by GitHub
commit 8e5ed5b593
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 274 additions and 17 deletions

View file

@ -381,8 +381,6 @@ TILES_SCORE_COLUMNS = {
field_names.PERCENT_AGE_OVER_64: "AGE_OLD",
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT: "TA_COUNT",
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC",
}
# columns to round floats to 2 decimals
@ -456,5 +454,5 @@ TILES_SCORE_FLOAT_COLUMNS = [
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
field_names.AML_BOOLEAN,
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
]

View file

@ -1,4 +1,6 @@
import functools
from typing import List
from dataclasses import dataclass
import numpy as np
@ -56,6 +58,8 @@ class ScoreETL(ExtractTransformLoad):
self.fuds_df: pd.DataFrame
self.tribal_overlap_df: pd.DataFrame
self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS: List[str] = []
def extract(self) -> None:
logger.info("Loading data sets from disk.")
@ -402,6 +406,25 @@ class ScoreETL(ExtractTransformLoad):
df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD]
)
self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS = [
field_names.PERCENT_BLACK_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_ASIAN_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_HAWAIIAN_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_HISPANIC_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_OTHER_RACE_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
]
# Donut columns get added later
numeric_columns = [
field_names.HOUSING_BURDEN_FIELD,
@ -471,7 +494,7 @@ class ScoreETL(ExtractTransformLoad):
field_names.PERCENT_AGE_OVER_64,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
]
] + self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS
non_numeric_columns = [
self.GEOID_TRACT_FIELD_NAME,
@ -636,6 +659,32 @@ class ScoreETL(ExtractTransformLoad):
return df_copy
@staticmethod
def _get_island_areas(df: pd.DataFrame) -> pd.Series:
return (
df[field_names.GEOID_TRACT_FIELD]
.str[:2]
.isin(constants.TILES_ISLAND_AREA_FIPS_CODES)
)
def _backfill_island_demographics(self, df: pd.DataFrame) -> pd.DataFrame:
logger.info("Backfilling island demographic data")
island_index = self._get_island_areas(df)
for backfill_field_name in self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS:
actual_field_name = backfill_field_name.replace(
field_names.ISLAND_AREA_BACKFILL_SUFFIX, ""
)
df.loc[island_index, actual_field_name] = df.loc[
island_index, backfill_field_name
]
df = df.drop(columns=self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS)
df.loc[island_index, field_names.TOTAL_POP_FIELD] = df.loc[
island_index, field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010
]
return df
def transform(self) -> None:
logger.info("Transforming Score Data")
@ -645,6 +694,9 @@ class ScoreETL(ExtractTransformLoad):
# calculate scores
self.df = ScoreRunner(df=self.df).calculate_scores()
# We add island demographic data since it doesn't matter to the score anyway
self.df = self._backfill_island_demographics(self.df)
def load(self) -> None:
logger.info("Saving Score CSV")
constants.DATA_SCORE_CSV_FULL_DIR.mkdir(parents=True, exist_ok=True)