mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 14:31:16 -07:00
Data Pipeline performance improvements for Census GeoJson and Score file
This commit is contained in:
parent
d5d055864f
commit
c32bd1f363
37 changed files with 1305 additions and 1413 deletions
|
@ -507,7 +507,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
# geojson file for all of the US, this will read it off of S3
|
||||
logger.debug("Reading in geojson for the country")
|
||||
if not os.path.exists(
|
||||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
||||
self.DATA_PATH / "census" / "geojson" / "us_geo.parquet"
|
||||
):
|
||||
logger.debug("Fetching Census data from AWS S3")
|
||||
unzip_file_from_url(
|
||||
|
@ -515,9 +515,8 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.DATA_PATH / "tmp",
|
||||
self.DATA_PATH,
|
||||
)
|
||||
|
||||
self.geo_df = gpd.read_file(
|
||||
self.DATA_PATH / "census" / "geojson" / "us.json",
|
||||
self.geo_df = gpd.read_parquet(
|
||||
self.DATA_PATH / "census" / "geojson" / "us_geo.parquet",
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue