mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-25 05:11:40 -07:00
Fix and enable smoke tests
This commit is contained in:
parent
0f184a63f2
commit
6093ce0f53
9 changed files with 64 additions and 93 deletions
|
@ -34,12 +34,12 @@ DATA_SCORE_CSV_DIR = DATA_SCORE_DIR / "csv"
|
|||
DATA_SCORE_CSV_FULL_DIR = DATA_SCORE_CSV_DIR / "full"
|
||||
DATA_SCORE_CSV_FULL_FILE_PATH = DATA_SCORE_CSV_FULL_DIR / "usa_score.parquet"
|
||||
FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH = (
|
||||
DATA_SCORE_CSV_FULL_DIR / "usa_counties.csv"
|
||||
DATA_SCORE_CSV_FULL_DIR / "usa_counties.parquet"
|
||||
)
|
||||
|
||||
# Score Tile CSV source path
|
||||
DATA_SCORE_CSV_TILES_PATH = DATA_SCORE_CSV_DIR / "tiles"
|
||||
DATA_SCORE_CSV_TILES_FILE_PATH = DATA_SCORE_CSV_TILES_PATH / "usa.csv"
|
||||
DATA_SCORE_CSV_TILES_FILE_PATH = DATA_SCORE_CSV_TILES_PATH / "usa.parquet"
|
||||
DATA_SCORE_JSON_INDEX_FILE_PATH = (
|
||||
DATA_SCORE_CSV_TILES_PATH / "tile_indexes.json"
|
||||
)
|
||||
|
|
|
@ -35,7 +35,6 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
self.SCORE_SHP_FILE = self.SCORE_SHP_PATH / "usa.shp"
|
||||
|
||||
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
|
||||
self.TILE_SCORE_CSV = self.SCORE_CSV_PATH / "tiles" / "usa.csv"
|
||||
|
||||
self.CENSUS_USA_GEOJSON = constants.DATA_CENSUS_GEOJSON_FILE_PATH
|
||||
|
||||
|
@ -100,13 +99,9 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
full_geojson_usa_df[self.LAND_FIELD_NAME] > 0
|
||||
]
|
||||
|
||||
logger.info("Reading tile score CSV")
|
||||
self.score_usa_df = pd.read_csv(
|
||||
self.TILE_SCORE_CSV,
|
||||
dtype={
|
||||
self.TRACT_SHORT_FIELD: str,
|
||||
},
|
||||
low_memory=False,
|
||||
logger.info("Reading tile score")
|
||||
self.score_usa_df = pd.read_parquet(
|
||||
constants.DATA_SCORE_CSV_TILES_FILE_PATH,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
|
|
|
@ -442,15 +442,14 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
self.input_census_geo_df
|
||||
)
|
||||
|
||||
def _load_score_csv_full(
|
||||
self, score_county_state_merged: pd.DataFrame, score_csv_path: Path
|
||||
def _load_score_full(
|
||||
self, score_county_state_merged: pd.DataFrame, score_path: Path
|
||||
) -> None:
|
||||
logger.debug("Saving Full Score CSV with County Information")
|
||||
score_csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
score_county_state_merged.to_csv(
|
||||
score_csv_path,
|
||||
score_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
score_county_state_merged.to_parquet(
|
||||
score_path,
|
||||
index=False,
|
||||
encoding="utf-8-sig", # windows compat https://stackoverflow.com/a/43684587
|
||||
)
|
||||
|
||||
def _load_excel_from_df(
|
||||
|
@ -514,12 +513,12 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
|
||||
return excel_csv_config
|
||||
|
||||
def _load_tile_csv(
|
||||
def _load_tile_score(
|
||||
self, score_tiles_df: pd.DataFrame, tile_score_path: Path
|
||||
) -> None:
|
||||
logger.debug("Saving Tile Score CSV")
|
||||
logger.debug("Saving Tile Score")
|
||||
tile_score_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")
|
||||
score_tiles_df.to_parquet(tile_score_path, index=False)
|
||||
|
||||
def _load_downloadable_zip(self, downloadable_info_path: Path) -> None:
|
||||
downloadable_info_path.mkdir(parents=True, exist_ok=True)
|
||||
|
@ -631,11 +630,11 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
self.output_tract_search_df.to_json(output_path, orient="records")
|
||||
|
||||
def load(self) -> None:
|
||||
self._load_score_csv_full(
|
||||
self._load_score_full(
|
||||
self.output_score_county_state_merged_df,
|
||||
constants.FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH,
|
||||
)
|
||||
self._load_tile_csv(
|
||||
self._load_tile_score(
|
||||
self.output_score_tiles_df, constants.DATA_SCORE_CSV_TILES_FILE_PATH
|
||||
)
|
||||
self._load_search_tract_data(constants.SCORE_TRACT_SEARCH_FILE_PATH)
|
||||
|
|
|
@ -43,17 +43,17 @@ def check_score_data_source(
|
|||
settings.AWS_JUSTICE40_DATAPIPELINE_URL
|
||||
+ "/data/score/csv/tiles/usa.csv"
|
||||
)
|
||||
TILE_SCORE_CSV = score_csv_data_path / "tiles" / "usa.csv"
|
||||
TILE_SCORE_FILE = constants.DATA_SCORE_CSV_TILES_FILE_PATH
|
||||
|
||||
# download from s3 if census_data_source is aws
|
||||
if score_data_source == "aws":
|
||||
logger.debug("Fetching Score Tile data from AWS S3")
|
||||
Downloader.download_file_from_url(
|
||||
file_url=TILE_SCORE_CSV_S3_URL, download_file_name=TILE_SCORE_CSV
|
||||
file_url=TILE_SCORE_CSV_S3_URL, download_file_name=TILE_SCORE_FILE
|
||||
)
|
||||
else:
|
||||
# check if score data is found locally
|
||||
if not os.path.isfile(TILE_SCORE_CSV):
|
||||
if not os.path.isfile(TILE_SCORE_FILE):
|
||||
logger.warning(
|
||||
"No local score tiles data found. Please use '-s aws` to fetch from AWS"
|
||||
)
|
||||
|
|
|
@ -110,9 +110,9 @@ def test_create_downloadable_data(
|
|||
)
|
||||
|
||||
|
||||
def test_load_score_csv_full(etl, score_data_expected):
|
||||
def test_load_score_full(etl, score_data_expected):
|
||||
reload(constants)
|
||||
etl._load_score_csv_full(
|
||||
etl._load_score_full(
|
||||
score_data_expected,
|
||||
constants.FULL_SCORE_CSV_FULL_PLUS_COUNTIES_FILE_PATH,
|
||||
)
|
||||
|
@ -121,7 +121,7 @@ def test_load_score_csv_full(etl, score_data_expected):
|
|||
|
||||
def test_load_tile_csv(etl, tile_data_expected):
|
||||
reload(constants)
|
||||
etl._load_score_csv_full(
|
||||
etl._load_score_full(
|
||||
tile_data_expected, constants.DATA_SCORE_CSV_TILES_FILE_PATH
|
||||
)
|
||||
assert constants.DATA_SCORE_CSV_TILES_FILE_PATH.is_file()
|
||||
|
|
|
@ -970,9 +970,8 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
# Then the imputed field should have no nulls
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
|
||||
]
|
||||
.isna()
|
||||
.sum()
|
||||
== 0
|
||||
.notna()
|
||||
.all()
|
||||
), "Error: not all values were filled..."
|
||||
|
||||
logger.debug("Renaming columns...")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue