diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py index a745e66b..8f281897 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py @@ -271,7 +271,10 @@ class PostScoreETL(ExtractTransformLoad): number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS, ) - return df + # sort by tract id + df_sorted = df.sort_values(self.GEOID_TRACT_FIELD_NAME) + + return df_sorted def transform(self) -> None: logger.info("Transforming data sources for Score + County CSVs") @@ -307,6 +310,41 @@ class PostScoreETL(ExtractTransformLoad): encoding="utf-8-sig", # windows compat https://stackoverflow.com/a/43684587 ) + def _load_excel_from_df( + self, excel_df: pd.DataFrame, excel_path: Path + ) -> None: + # Define Excel Columns Column Width + num_excel_cols_width = 30 + + # Create a Pandas Excel writer using XlsxWriter as the engine. + with pd.ExcelWriter( # pylint: disable=abstract-class-instantiated + # (https://github.com/PyCQA/pylint/issues/3060) + excel_path, + engine="xlsxwriter", + ) as writer: + + # Convert the dataframe to an XlsxWriter Excel object. We also turn off the + # index column at the left of the output dataframe. + excel_df.to_excel(writer, sheet_name="Data", index=False) + + # Get the xlsxwriter workbook and worksheet objects. + workbook = writer.book + worksheet = writer.sheets["Data"] + + # set header format + header_format = workbook.add_format( + {"bold": True, "text_wrap": True, "valign": "bottom"} + ) + + # write headers + for col_num, value in enumerate(excel_df.columns.array): + worksheet.write(0, col_num, value, header_format) + + num_cols = len(excel_df.columns) + worksheet.set_column(0, num_cols - 1, num_excel_cols_width) + + writer.save() + def _load_tile_csv( self, score_tiles_df: pd.DataFrame, tile_score_path: Path ) -> None: @@ -334,7 +372,7 @@ class PostScoreETL(ExtractTransformLoad): ) logger.info("Writing downloadable excel") - downloadable_df_copy.to_excel(excel_path, index=False) + self._load_excel_from_df(downloadable_df_copy, excel_path) logger.info("Writing downloadable csv") downloadable_df_copy[self.GEOID_TRACT_FIELD_NAME] = (