mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-29 13:41:17 -07:00
Data Pipeline performance improvements for Census GeoJson and Score file
This commit is contained in:
parent
d5d055864f
commit
c32bd1f363
37 changed files with 1305 additions and 1413 deletions
|
@ -7,10 +7,13 @@ from data_pipeline.score.field_names import GEOID_TRACT_FIELD
|
|||
|
||||
@pytest.fixture(scope="session")
|
||||
def final_score_df():
|
||||
return pd.read_csv(
|
||||
settings.APP_ROOT / "data" / "score" / "csv" / "full" / "usa.csv",
|
||||
dtype={GEOID_TRACT_FIELD: str},
|
||||
low_memory=False,
|
||||
return pd.read_parquet(
|
||||
settings.APP_ROOT
|
||||
/ "data"
|
||||
/ "score"
|
||||
/ "csv"
|
||||
/ "full"
|
||||
/ "usa_score.parquet",
|
||||
)
|
||||
|
||||
|
||||
|
@ -173,7 +176,7 @@ def geocorr_urban_rural_df():
|
|||
@pytest.fixture()
|
||||
def census_decennial_df():
|
||||
census_decennial_csv = (
|
||||
constants.DATA_PATH / "dataset" / "census_decennial_2010" / "usa.csv"
|
||||
constants.DATA_PATH / "dataset" / "census_decennial_2020" / "usa.csv"
|
||||
)
|
||||
return pd.read_csv(
|
||||
census_decennial_csv,
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
|
@ -17,7 +17,7 @@ from data_pipeline.score.utils import (
|
|||
@contextmanager
|
||||
def patch_calculate_tract_adjacency_scores():
|
||||
# Use fixtures for tract data.
|
||||
tract_data_path = Path(__file__).parent / "data" / "us.geojson"
|
||||
tract_data_path = Path(__file__).parent / "data" / "us_geo.parquet"
|
||||
|
||||
get_tract_geojson_mock = partial(
|
||||
get_tract_geojson, _tract_data_path=tract_data_path
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
|
@ -68,7 +68,7 @@ def transformed_data_fixture(
|
|||
"""Load the test data and call the ETL transform"""
|
||||
dec = CensusDecennialETL()
|
||||
dec.df_all = extracted_data_fixture
|
||||
dec.transform(imputed_path_fixture / "census-us-territory-geojson.json")
|
||||
dec.transform(imputed_path_fixture / "census-us-territory-geojson.parquet")
|
||||
return dec.df_all
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue