mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
wip
This commit is contained in:
parent
a6ba9f6970
commit
a7a4df037e
3 changed files with 29 additions and 1 deletions
|
@ -58,6 +58,10 @@ SCORE_DOWNLOADABLE_CSV_FILE_PATH = (
|
|||
SCORE_DOWNLOADABLE_EXCEL_FILE_PATH = (
|
||||
SCORE_DOWNLOADABLE_DIR / f"communities-{timestamp_str}.xlsx"
|
||||
)
|
||||
ZIP_CODES_DOWNLOADABLE_CSV_FILE_PATH = (
|
||||
SCORE_DOWNLOADABLE_DIR
|
||||
/ f"communities-compared-to-zip-codes-{timestamp_str}.csv"
|
||||
)
|
||||
SCORE_DOWNLOADABLE_CODEBOOK_FILE_PATH = (
|
||||
SCORE_DOWNLOADABLE_DIR / f"codebook-{timestamp_str}.csv"
|
||||
)
|
||||
|
|
|
@ -387,6 +387,9 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
|
||||
return final_df
|
||||
|
||||
def _create_zip_codes_data(self, downloadable_df):
|
||||
pass
|
||||
|
||||
def transform(self) -> None:
|
||||
logger.info("Transforming data sources for Score + County CSVs")
|
||||
|
||||
|
@ -543,6 +546,9 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
codebook_df.to_csv(codebook_path, index=False)
|
||||
|
||||
# TODO: Write zip-code based files
|
||||
zip_codes_df = self._create_zip_codes_data(
|
||||
downloadable_df=downloadable_df
|
||||
)
|
||||
|
||||
logger.info("Compressing files")
|
||||
files_to_compress = [
|
||||
|
|
|
@ -11,7 +11,25 @@ logger = get_module_logger(__name__)
|
|||
|
||||
|
||||
class GeoCorrAlternativesETL(ExtractTransformLoad):
|
||||
"""Calculates overlap between Census tracts & various alternative geographies."""
|
||||
"""Calculates overlap between Census tracts & various alternative geographies.
|
||||
|
||||
Note: for almost all 2020 zip codes in the USA (33,781 zip codes), this ETL
|
||||
divides them into census tracts such that 100% of the zip code is represented
|
||||
within the census tracts in the output of this file.
|
||||
|
||||
For a very small number of 2020 zip codes in the USA (9 zip codes), this ETL
|
||||
only matches 98% of more of the zip code into tracts. For one 2020 zip code, this
|
||||
ETL only matches 86% of the tract.
|
||||
|
||||
The reason for these 10 outliers is unclear.
|
||||
|
||||
Here are the value counts for `PERCENT_OF_ZIP_CODE_IN_TRACT` aggregated at two
|
||||
digits of precision:
|
||||
1.00 33781
|
||||
0.99 7
|
||||
0.98 2
|
||||
0.86 1
|
||||
"""
|
||||
|
||||
NAME = "geocorr_alternatives"
|
||||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
|
||||
|
|
Loading…
Add table
Reference in a new issue