Analysis by region (#385)

* Adding regional comparisons

* Small ETL fixes
This commit is contained in:
Lucas Merrill Brown 2021-07-26 08:02:25 -07:00 committed by GitHub
commit 67b39475f7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 440 additions and 158 deletions

View file

@ -394,13 +394,15 @@ class ScoreETL(ExtractTransformLoad):
"Score C",
"Score D",
"Score E",
"Poverty (Less than 200% of federal poverty line)",
]:
self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] = self.df[
score_field
].rank(pct=True)
self.df[f"{score_field} (top 25th percentile)"] = (
self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] >= 0.75
)
self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] = self.df[score_field].rank(pct=True)
for threshold in [0.25, 0.3, 0.35, 0.4]:
fraction_converted_to_percent = int(100 * threshold)
self.df[f"{score_field} (top {fraction_converted_to_percent}th percentile)"] = (
self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] >= 1 - threshold
)
def load(self) -> None:
logger.info(f"Saving Score CSV")