User Story 2152 – Clean up logging (#2155)

Update logging messages and message consistency

This update includes changes to the level of many log messages. Rather than everything being logged at the info level, it differentiates between debug, info, warning, and error messages. It also changes the default log level to info to avoid much of the noise previously in the logs.

It also removes many extra log messages, and adds additional decorators at the beginning of each pipeline run.
This commit is contained in:
Travis Newby 2023-02-08 13:08:55 -06:00 committed by GitHub
commit 03a6d3c660
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
63 changed files with 307 additions and 339 deletions

View file

@ -70,14 +70,9 @@ class CensusETL(ExtractTransformLoad):
None
"""
shp_file_path = self._path_for_fips_file(fips_code, GeoFileType.SHP)
logger.info(f"Checking if {fips_code} shp file exists")
# check if file exists
if not shp_file_path.is_file():
logger.info(
f"{fips_code} shp file does not exist. Downloading and extracting shape file"
)
tract_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/TRACT/2010/tl_2010_{fips_code}_tract10.zip"
unzip_file_from_url(
tract_state_url,
@ -86,8 +81,11 @@ class CensusETL(ExtractTransformLoad):
)
def extract(self) -> None:
logger.info("Downloading Census Data")
for fips_code in self.STATE_FIPS_CODES:
logger.debug("Extracting census data")
for index, fips_code in enumerate(self.STATE_FIPS_CODES):
logger.debug(
f"Extracting shape for FIPS {fips_code} {index+1} of {len(self.STATE_FIPS_CODES)}"
)
self._extract_shp(fips_code)
def _transform_to_geojson(self, fips_code: str) -> None:
@ -100,11 +98,8 @@ class CensusETL(ExtractTransformLoad):
geojson_file_path = self._path_for_fips_file(
fips_code, GeoFileType.GEOJSON
)
logger.info(f"Checking if {fips_code} geoJSON file exists ")
if not geojson_file_path.is_file():
logger.info(
f"GeoJSON file {fips_code} does not exist. Converting shp to geoJSON"
)
cmd = [
"ogr2ogr",
"-f",
@ -120,9 +115,11 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.debug("Transforming tracts")
for file in self.GEOJSON_BASE_PATH.iterdir():
if file.suffix == ".json":
logger.info(f"Ingesting geoid10 for file {file}")
logger.debug(f"Adding GEOID10 for file {file.name}")
with open(self.GEOJSON_BASE_PATH / file, encoding="utf-8") as f:
geojson = json.load(f)
for feature in geojson["features"]:
@ -142,13 +139,19 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Transforming Census Data")
for fips_code in self.STATE_FIPS_CODES:
logger.debug("Transforming census data")
logger.debug("Transforming SHP files to GeoJSON")
for index, fips_code in enumerate(self.STATE_FIPS_CODES):
logger.debug(
f"Transforming FIPS {fips_code} to GeoJSON {index+1} of {len(self.STATE_FIPS_CODES)}"
)
self._transform_to_geojson(fips_code)
self._generate_tract_table()
def _load_into_state_csvs(self, fips_code: str) -> None:
"""Load state CSVS into individual CSV files
"""Load state CSVs into individual CSV files
Args:
fips_code (str): the FIPS code for the region of interest
@ -182,10 +185,9 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Writing national us.csv file")
logger.debug("Loading national US.csv")
if not self.NATIONAL_TRACT_CSV_PATH.is_file():
logger.info(f"Creating {self.NATIONAL_TRACT_CSV_PATH}")
with open(
self.NATIONAL_TRACT_CSV_PATH,
mode="w",
@ -211,22 +213,21 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Generating national geojson file")
logger.debug("Loading National GeoJson")
usa_df = gpd.GeoDataFrame()
for file_name in self.GEOJSON_BASE_PATH.rglob("*.json"):
logger.info(f"Ingesting {file_name}")
logger.debug(f"Adding national GeoJSON file {file_name.name}")
state_gdf = gpd.read_file(file_name)
usa_df = usa_df.append(state_gdf)
usa_df = usa_df.to_crs(
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
)
logger.info("Writing national geojson file")
usa_df.to_file(self.NATIONAL_TRACT_JSON_PATH, driver="GeoJSON")
logger.info("Census tract downloading complete")
logger.debug("Saving national GeoJSON file")
usa_df.to_file(self.NATIONAL_TRACT_JSON_PATH, driver="GeoJSON")
def load(self) -> None:
"""Create state CSVs, National CSV, and National GeoJSON
@ -234,8 +235,13 @@ class CensusETL(ExtractTransformLoad):
Returns:
None
"""
logger.info("Saving Census CSV")
logger.debug("Loading census data")
logger.debug("Loading individual state csv files")
for fips_code in self.TRACT_PER_STATE:
self._load_into_state_csvs(fips_code)
self._load_national_csv()
self._load_national_geojson()
logger.debug("Census data complete")

View file

@ -39,7 +39,6 @@ def get_state_fips_codes(data_path: Path) -> list:
"""Returns a list with state data"""
fips_csv_path = data_path / "census" / "csv" / "fips_states_2010.csv"
logger.info("Downloading fips from S3 repository")
unzip_file_from_url(
settings.AWS_JUSTICE40_DATASOURCES_URL + "/fips_states_2010.zip",
data_path / "tmp",
@ -97,7 +96,6 @@ def check_census_data_source(
# download from s3 if census_data_source is aws
if census_data_source == "aws":
logger.info("Fetching Census data from AWS S3")
unzip_file_from_url(
CENSUS_DATA_S3_URL,
DATA_PATH / "tmp",
@ -106,14 +104,13 @@ def check_census_data_source(
else:
# check if census data is found locally
if not os.path.isfile(census_data_path / "geojson" / "us.json"):
logger.info(
logger.error(
"No local census data found. Please use '-s aws` to fetch from AWS"
)
sys.exit()
def zip_census_data():
logger.info("Compressing census files to data/tmp folder")
CENSUS_DATA_PATH = settings.APP_ROOT / "data" / "census"
TMP_PATH = settings.APP_ROOT / "data" / "tmp"