Excel formatting and tract id ordering (#1172)

* excel formatting and tract id ordering * lint * lint try $2 * lint 3 * addressed comments * typo
2025-09-30 04:33:19 -07:00 · 2022-02-04 18:35:45 -05:00 · 2022-02-04 18:35:45 -05:00 · f5fe8d90e2
commit f5fe8d90e2
parent 6a00b29f5d
1 changed files with 40 additions and 2 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py
@ -271,7 +271,10 @@ class PostScoreETL(ExtractTransformLoad):
                    number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS,
                )
-        return df
+        # sort by tract id
        df_sorted = df.sort_values(self.GEOID_TRACT_FIELD_NAME)
        return df_sorted
    def transform(self) -> None:
        logger.info("Transforming data sources for Score + County CSVs")
@ -307,6 +310,41 @@ class PostScoreETL(ExtractTransformLoad):
            encoding="utf-8-sig",  # windows compat https://stackoverflow.com/a/43684587
        )
    def _load_excel_from_df(
        self, excel_df: pd.DataFrame, excel_path: Path
    ) -> None:
        # Define Excel Columns Column Width
        num_excel_cols_width = 30
        # Create a Pandas Excel writer using XlsxWriter as the engine.
        with pd.ExcelWriter(  # pylint: disable=abstract-class-instantiated
            # (https://github.com/PyCQA/pylint/issues/3060)
            excel_path,
            engine="xlsxwriter",
        ) as writer:
            # Convert the dataframe to an XlsxWriter Excel object. We also turn off the
            # index column at the left of the output dataframe.
            excel_df.to_excel(writer, sheet_name="Data", index=False)
            # Get the xlsxwriter workbook and worksheet objects.
            workbook = writer.book
            worksheet = writer.sheets["Data"]
            # set header format
            header_format = workbook.add_format(
                {"bold": True, "text_wrap": True, "valign": "bottom"}
            )
            # write headers
            for col_num, value in enumerate(excel_df.columns.array):
                worksheet.write(0, col_num, value, header_format)
            num_cols = len(excel_df.columns)
            worksheet.set_column(0, num_cols - 1, num_excel_cols_width)
            writer.save()
    def _load_tile_csv(
        self, score_tiles_df: pd.DataFrame, tile_score_path: Path
    ) -> None:
@ -334,7 +372,7 @@ class PostScoreETL(ExtractTransformLoad):
        )
        logger.info("Writing downloadable excel")
-        downloadable_df_copy.to_excel(excel_path, index=False)
+        self._load_excel_from_df(downloadable_df_copy, excel_path)
        logger.info("Writing downloadable csv")
        downloadable_df_copy[self.GEOID_TRACT_FIELD_NAME] = (