User Story 2152 – Clean up logging (#2155)

Update logging messages and message consistency This update includes changes to the level of many log messages. Rather than everything being logged at the info level, it differentiates between debug, info, warning, and error messages. It also changes the default log level to info to avoid much of the noise previously in the logs. It also removes many extra log messages, and adds additional decorators at the beginning of each pipeline run.
2025-09-29 21:53:18 -07:00 · 2023-02-08 13:08:55 -06:00 · 2023-02-08 13:08:55 -06:00 · 03a6d3c660
commit 03a6d3c660
parent 7cfb56476e
63 changed files with 307 additions and 339 deletions
--- a/data/data-pipeline/data_pipeline/score/score_a.py
+++ b/data/data-pipeline/data_pipeline/score/score_a.py
@ -8,7 +8,7 @@ logger = get_module_logger(__name__)

 class ScoreA(Score):
    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score A")
+        logger.debug("Adding Score A")
        self.df[field_names.SCORE_A] = self.df[
            [
                field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
--- a/data/data-pipeline/data_pipeline/score/score_b.py
+++ b/data/data-pipeline/data_pipeline/score/score_b.py
@ -8,7 +8,7 @@ logger = get_module_logger(__name__)

 class ScoreB(Score):
    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score B")
+        logger.debug("Adding Score B")
        self.df[field_names.SCORE_B] = (
            self.df[
                field_names.POVERTY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
--- a/data/data-pipeline/data_pipeline/score/score_c.py
+++ b/data/data-pipeline/data_pipeline/score/score_c.py
@ -72,7 +72,7 @@ class ScoreC(Score):

    # "CalEnviroScreen for the US" score
    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score C")
+        logger.debug("Adding Score C")
        # Average all the percentile values in each bucket into a single score for each of the four buckets.
        for bucket in self.BUCKETS:
            self.df[bucket.name] = self.df[bucket.fields].mean(axis=1)
--- a/data/data-pipeline/data_pipeline/score/score_d.py
+++ b/data/data-pipeline/data_pipeline/score/score_d.py
@ -8,7 +8,7 @@ logger = get_module_logger(__name__)

 class ScoreD(Score):
    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Scores D and E")
+        logger.debug("Adding Scores D and E")
        fields_to_use_in_score = [
            field_names.UNEMPLOYMENT_FIELD,
            field_names.LINGUISTIC_ISO_FIELD,
--- a/data/data-pipeline/data_pipeline/score/score_f.py
+++ b/data/data-pipeline/data_pipeline/score/score_f.py
@ -10,7 +10,7 @@ class ScoreF(Score):
    # TODO Make variables and constants clearer (meaning and type)

    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score F")
+        logger.debug("Adding Score F")
        ami_and_high_school_field = "Low AMI, Low HS graduation"
        meets_socio_field = "Meets socioeconomic criteria"
        meets_burden_field = "Meets burden criteria"
--- a/data/data-pipeline/data_pipeline/score/score_g.py
+++ b/data/data-pipeline/data_pipeline/score/score_g.py
@ -8,7 +8,7 @@ logger = get_module_logger(__name__)

 class ScoreG(Score):
    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score G")
+        logger.debug("Adding Score G")

        high_school_cutoff_threshold = 0.05

--- a/data/data-pipeline/data_pipeline/score/score_h.py
+++ b/data/data-pipeline/data_pipeline/score/score_h.py
@ -8,7 +8,7 @@ logger = get_module_logger(__name__)

 class ScoreH(Score):
    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score H")
+        logger.debug("Adding Score H")

        high_school_cutoff_threshold = 0.06

--- a/data/data-pipeline/data_pipeline/score/score_i.py
+++ b/data/data-pipeline/data_pipeline/score/score_i.py
@ -8,7 +8,7 @@ logger = get_module_logger(__name__)

 class ScoreI(Score):
    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score I")
+        logger.debug("Adding Score I")

        high_school_cutoff_threshold = 0.05

--- a/data/data-pipeline/data_pipeline/score/score_k.py
+++ b/data/data-pipeline/data_pipeline/score/score_k.py
@ -8,7 +8,7 @@ logger = get_module_logger(__name__)

 class ScoreK(Score):
    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score K")
+        logger.debug("Adding Score K")

        high_school_cutoff_threshold = 0.06

--- a/data/data-pipeline/data_pipeline/score/score_l.py
+++ b/data/data-pipeline/data_pipeline/score/score_l.py
@ -52,7 +52,7 @@ class ScoreL(Score):
            [column_from_island_areas, column_from_decennial_census]
        ].mean(axis=1, skipna=True)

-        logger.info(
+        logger.debug(
            f"Combined field `{combined_column_name}` has "
            f"{df[combined_column_name].isnull().sum()} "
            f"({df[combined_column_name].isnull().sum() * 100 / len(df):.2f}%) "
@ -64,7 +64,7 @@ class ScoreL(Score):
            a=df[combined_column_name], q=threshold_cutoff_for_island_areas
        )

-        logger.info(
+        logger.debug(
            f"For combined field `{combined_column_name}`, "
            f"the {threshold_cutoff_for_island_areas*100:.0f} percentile cutoff is a "
            f"raw value of {raw_threshold:.3f}."
@ -627,7 +627,7 @@ class ScoreL(Score):
            .sum()
        )

-        logger.info(
+        logger.debug(
            f"For workforce criteria in island areas, "
            f"{workforce_combined_criteria_for_island_areas.sum()} ("
            f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
@ -642,7 +642,7 @@ class ScoreL(Score):
        )

    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score L")
+        logger.debug("Adding Score L")

        self.df[field_names.THRESHOLD_COUNT] = 0
        self.df[field_names.FPL_200_SERIES] = self._create_low_income_threshold(
--- a/data/data-pipeline/data_pipeline/score/score_m.py
+++ b/data/data-pipeline/data_pipeline/score/score_m.py
@ -768,7 +768,7 @@ class ScoreM(Score):
            .sum()
        )

-        logger.info(
+        logger.debug(
            f"For workforce criteria in island areas, "
            f"{workforce_combined_criteria_for_island_areas.sum()} ("
            f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
@ -812,7 +812,7 @@ class ScoreM(Score):
        )

    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score M")
+        logger.debug("Adding Score M")

        self.df[field_names.THRESHOLD_COUNT] = 0

--- a/data/data-pipeline/data_pipeline/score/score_narwhal.py
+++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py
@ -889,7 +889,7 @@ class ScoreNarwhal(Score):
            .sum()
        )

-        logger.info(
+        logger.debug(
            f"For workforce criteria in island areas, "
            f"{workforce_combined_criteria_for_island_areas.sum()} ("
            f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
@ -947,7 +947,7 @@ class ScoreNarwhal(Score):

        We calculate "donut holes" after the initial score generation
        """
-        logger.info("Marking donut hole tracts")
+        logger.debug("Marking donut hole tracts")

        # This is the boolean we pass to the front end for the donut-hole-specific
        # low income criterion
@ -1025,7 +1025,7 @@ class ScoreNarwhal(Score):
        )

    def add_columns(self) -> pd.DataFrame:
-        logger.info("Adding Score Narhwal")
+        logger.debug("Adding Score Narhwal")
        self.df[field_names.THRESHOLD_COUNT] = 0

        self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (