mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-29 05:01:17 -07:00
User Story 2152 – Clean up logging (#2155)
Update logging messages and message consistency This update includes changes to the level of many log messages. Rather than everything being logged at the info level, it differentiates between debug, info, warning, and error messages. It also changes the default log level to info to avoid much of the noise previously in the logs. It also removes many extra log messages, and adds additional decorators at the beginning of each pipeline run.
This commit is contained in:
parent
7cfb56476e
commit
03a6d3c660
63 changed files with 307 additions and 339 deletions
|
@ -70,14 +70,9 @@ class CensusETL(ExtractTransformLoad):
|
|||
None
|
||||
"""
|
||||
shp_file_path = self._path_for_fips_file(fips_code, GeoFileType.SHP)
|
||||
logger.info(f"Checking if {fips_code} shp file exists")
|
||||
|
||||
# check if file exists
|
||||
if not shp_file_path.is_file():
|
||||
logger.info(
|
||||
f"{fips_code} shp file does not exist. Downloading and extracting shape file"
|
||||
)
|
||||
|
||||
tract_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/TRACT/2010/tl_2010_{fips_code}_tract10.zip"
|
||||
unzip_file_from_url(
|
||||
tract_state_url,
|
||||
|
@ -86,8 +81,11 @@ class CensusETL(ExtractTransformLoad):
|
|||
)
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Downloading Census Data")
|
||||
for fips_code in self.STATE_FIPS_CODES:
|
||||
logger.debug("Extracting census data")
|
||||
for index, fips_code in enumerate(self.STATE_FIPS_CODES):
|
||||
logger.debug(
|
||||
f"Extracting shape for FIPS {fips_code} – {index+1} of {len(self.STATE_FIPS_CODES)}"
|
||||
)
|
||||
self._extract_shp(fips_code)
|
||||
|
||||
def _transform_to_geojson(self, fips_code: str) -> None:
|
||||
|
@ -100,11 +98,8 @@ class CensusETL(ExtractTransformLoad):
|
|||
geojson_file_path = self._path_for_fips_file(
|
||||
fips_code, GeoFileType.GEOJSON
|
||||
)
|
||||
logger.info(f"Checking if {fips_code} geoJSON file exists ")
|
||||
|
||||
if not geojson_file_path.is_file():
|
||||
logger.info(
|
||||
f"GeoJSON file {fips_code} does not exist. Converting shp to geoJSON"
|
||||
)
|
||||
cmd = [
|
||||
"ogr2ogr",
|
||||
"-f",
|
||||
|
@ -120,9 +115,11 @@ class CensusETL(ExtractTransformLoad):
|
|||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.debug("Transforming tracts")
|
||||
|
||||
for file in self.GEOJSON_BASE_PATH.iterdir():
|
||||
if file.suffix == ".json":
|
||||
logger.info(f"Ingesting geoid10 for file {file}")
|
||||
logger.debug(f"Adding GEOID10 for file {file.name}")
|
||||
with open(self.GEOJSON_BASE_PATH / file, encoding="utf-8") as f:
|
||||
geojson = json.load(f)
|
||||
for feature in geojson["features"]:
|
||||
|
@ -142,13 +139,19 @@ class CensusETL(ExtractTransformLoad):
|
|||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.info("Transforming Census Data")
|
||||
for fips_code in self.STATE_FIPS_CODES:
|
||||
logger.debug("Transforming census data")
|
||||
|
||||
logger.debug("Transforming SHP files to GeoJSON")
|
||||
for index, fips_code in enumerate(self.STATE_FIPS_CODES):
|
||||
logger.debug(
|
||||
f"Transforming FIPS {fips_code} to GeoJSON – {index+1} of {len(self.STATE_FIPS_CODES)}"
|
||||
)
|
||||
self._transform_to_geojson(fips_code)
|
||||
|
||||
self._generate_tract_table()
|
||||
|
||||
def _load_into_state_csvs(self, fips_code: str) -> None:
|
||||
"""Load state CSVS into individual CSV files
|
||||
"""Load state CSVs into individual CSV files
|
||||
|
||||
Args:
|
||||
fips_code (str): the FIPS code for the region of interest
|
||||
|
@ -182,10 +185,9 @@ class CensusETL(ExtractTransformLoad):
|
|||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.info("Writing national us.csv file")
|
||||
logger.debug("Loading national US.csv")
|
||||
|
||||
if not self.NATIONAL_TRACT_CSV_PATH.is_file():
|
||||
logger.info(f"Creating {self.NATIONAL_TRACT_CSV_PATH}")
|
||||
with open(
|
||||
self.NATIONAL_TRACT_CSV_PATH,
|
||||
mode="w",
|
||||
|
@ -211,22 +213,21 @@ class CensusETL(ExtractTransformLoad):
|
|||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.info("Generating national geojson file")
|
||||
logger.debug("Loading National GeoJson")
|
||||
|
||||
usa_df = gpd.GeoDataFrame()
|
||||
|
||||
for file_name in self.GEOJSON_BASE_PATH.rglob("*.json"):
|
||||
logger.info(f"Ingesting {file_name}")
|
||||
logger.debug(f"Adding national GeoJSON file {file_name.name}")
|
||||
state_gdf = gpd.read_file(file_name)
|
||||
usa_df = usa_df.append(state_gdf)
|
||||
|
||||
usa_df = usa_df.to_crs(
|
||||
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
|
||||
)
|
||||
logger.info("Writing national geojson file")
|
||||
usa_df.to_file(self.NATIONAL_TRACT_JSON_PATH, driver="GeoJSON")
|
||||
|
||||
logger.info("Census tract downloading complete")
|
||||
logger.debug("Saving national GeoJSON file")
|
||||
usa_df.to_file(self.NATIONAL_TRACT_JSON_PATH, driver="GeoJSON")
|
||||
|
||||
def load(self) -> None:
|
||||
"""Create state CSVs, National CSV, and National GeoJSON
|
||||
|
@ -234,8 +235,13 @@ class CensusETL(ExtractTransformLoad):
|
|||
Returns:
|
||||
None
|
||||
"""
|
||||
logger.info("Saving Census CSV")
|
||||
logger.debug("Loading census data")
|
||||
|
||||
logger.debug("Loading individual state csv files")
|
||||
for fips_code in self.TRACT_PER_STATE:
|
||||
self._load_into_state_csvs(fips_code)
|
||||
|
||||
self._load_national_csv()
|
||||
self._load_national_geojson()
|
||||
|
||||
logger.debug("Census data complete")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue