User Story 2152 – Clean up logging (#2155)

Update logging messages and message consistency

This update includes changes to the level of many log messages. Rather than everything being logged at the info level, it differentiates between debug, info, warning, and error messages. It also changes the default log level to info to avoid much of the noise previously in the logs.

It also removes many extra log messages, and adds additional decorators at the beginning of each pipeline run.
This commit is contained in:
Travis Newby 2023-02-08 13:08:55 -06:00 committed by GitHub
commit 03a6d3c660
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
63 changed files with 307 additions and 339 deletions

View file

@ -352,7 +352,7 @@ class CensusDecennialETL(ExtractTransformLoad):
dfs = []
dfs_vi = []
for island in self.ISLAND_TERRITORIES:
logger.info(
logger.debug(
f"Downloading data for state/territory {island['state_abbreviation']}"
)
for county in island["county_fips"]:
@ -369,7 +369,13 @@ class CensusDecennialETL(ExtractTransformLoad):
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
)
df = json.loads(download.content)
try:
df = json.loads(download.content)
except ValueError as e:
logger.error(
f"Could not load content in census decennial ETL because {e}. Content is {download.content}."
)
# First row is the header
df = pd.DataFrame(df[1:], columns=df[0])
@ -393,8 +399,6 @@ class CensusDecennialETL(ExtractTransformLoad):
self.df_vi = pd.concat(dfs_vi)
def transform(self) -> None:
logger.info("Starting Census Decennial Transform")
# Rename All Fields
self.df.rename(columns=self.FIELD_NAME_XWALK, inplace=True)
self.df_vi.rename(columns=self.FIELD_NAME_XWALK, inplace=True)
@ -489,13 +493,11 @@ class CensusDecennialETL(ExtractTransformLoad):
# Reporting Missing Values
for col in self.df_all.columns:
missing_value_count = self.df_all[col].isnull().sum()
logger.info(
logger.debug(
f"There are {missing_value_count} missing values in the field {col} out of a total of {self.df_all.shape[0]} rows"
)
def load(self) -> None:
logger.info("Saving Census Decennial Data")
# mkdir census
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)