mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
wip
This commit is contained in:
parent
a6ba9f6970
commit
a7a4df037e
3 changed files with 29 additions and 1 deletions
|
@ -58,6 +58,10 @@ SCORE_DOWNLOADABLE_CSV_FILE_PATH = (
|
||||||
SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = (
|
SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = (
|
||||||
SCORE_DOWNLOADABLE_DIR / f"communities-{timestamp_str}.xlsx"
|
SCORE_DOWNLOADABLE_DIR / f"communities-{timestamp_str}.xlsx"
|
||||||
)
|
)
|
||||||
|
ZIP_CODES_DOWNLOADABLE_CSV_FILE_PATH = (
|
||||||
|
SCORE_DOWNLOADABLE_DIR
|
||||||
|
/ f"communities-compared-to-zip-codes-{timestamp_str}.csv"
|
||||||
|
)
|
||||||
SCORE_DOWNLOADABLE_CODEBOOK_FILE_PATH = (
|
SCORE_DOWNLOADABLE_CODEBOOK_FILE_PATH = (
|
||||||
SCORE_DOWNLOADABLE_DIR / f"codebook-{timestamp_str}.csv"
|
SCORE_DOWNLOADABLE_DIR / f"codebook-{timestamp_str}.csv"
|
||||||
)
|
)
|
||||||
|
|
|
@ -387,6 +387,9 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
return final_df
|
return final_df
|
||||||
|
|
||||||
|
def _create_zip_codes_data(self, downloadable_df):
|
||||||
|
pass
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
logger.info("Transforming data sources for Score + County CSVs")
|
logger.info("Transforming data sources for Score + County CSVs")
|
||||||
|
|
||||||
|
@ -543,6 +546,9 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
codebook_df.to_csv(codebook_path, index=False)
|
codebook_df.to_csv(codebook_path, index=False)
|
||||||
|
|
||||||
# TODO: Write zip-code based files
|
# TODO: Write zip-code based files
|
||||||
|
zip_codes_df = self._create_zip_codes_data(
|
||||||
|
downloadable_df=downloadable_df
|
||||||
|
)
|
||||||
|
|
||||||
logger.info("Compressing files")
|
logger.info("Compressing files")
|
||||||
files_to_compress = [
|
files_to_compress = [
|
||||||
|
|
|
@ -11,7 +11,25 @@ logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class GeoCorrAlternativesETL(ExtractTransformLoad):
|
class GeoCorrAlternativesETL(ExtractTransformLoad):
|
||||||
"""Calculates overlap between Census tracts & various alternative geographies."""
|
"""Calculates overlap between Census tracts & various alternative geographies.
|
||||||
|
|
||||||
|
Note: for almost all 2020 zip codes in the USA (33,781 zip codes), this ETL
|
||||||
|
divides them into census tracts such that 100% of the zip code is represented
|
||||||
|
within the census tracts in the output of this file.
|
||||||
|
|
||||||
|
For a very small number of 2020 zip codes in the USA (9 zip codes), this ETL
|
||||||
|
only matches 98% of more of the zip code into tracts. For one 2020 zip code, this
|
||||||
|
ETL only matches 86% of the tract.
|
||||||
|
|
||||||
|
The reason for these 10 outliers is unclear.
|
||||||
|
|
||||||
|
Here are the value counts for `PERCENT_OF_ZIP_CODE_IN_TRACT` aggregated at two
|
||||||
|
digits of precision:
|
||||||
|
1.00 33781
|
||||||
|
0.99 7
|
||||||
|
0.98 2
|
||||||
|
0.86 1
|
||||||
|
"""
|
||||||
|
|
||||||
NAME = "geocorr_alternatives"
|
NAME = "geocorr_alternatives"
|
||||||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||||
|
|
Loading…
Add table
Reference in a new issue