Excel formatting and tract id ordering (#1172)

* excel formatting and tract id ordering

* lint

* lint try $2

* lint 3

* addressed comments

* typo
This commit is contained in:
Jorge Escobar 2022-02-04 18:35:45 -05:00 committed by GitHub
parent 6a00b29f5d
commit f5fe8d90e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -271,7 +271,10 @@ class PostScoreETL(ExtractTransformLoad):
number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS,
)
return df
# sort by tract id
df_sorted = df.sort_values(self.GEOID_TRACT_FIELD_NAME)
return df_sorted
def transform(self) -> None:
logger.info("Transforming data sources for Score + County CSVs")
@ -307,6 +310,41 @@ class PostScoreETL(ExtractTransformLoad):
encoding="utf-8-sig", # windows compat https://stackoverflow.com/a/43684587
)
def _load_excel_from_df(
self, excel_df: pd.DataFrame, excel_path: Path
) -> None:
# Define Excel Columns Column Width
num_excel_cols_width = 30
# Create a Pandas Excel writer using XlsxWriter as the engine.
with pd.ExcelWriter( # pylint: disable=abstract-class-instantiated
# (https://github.com/PyCQA/pylint/issues/3060)
excel_path,
engine="xlsxwriter",
) as writer:
# Convert the dataframe to an XlsxWriter Excel object. We also turn off the
# index column at the left of the output dataframe.
excel_df.to_excel(writer, sheet_name="Data", index=False)
# Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets["Data"]
# set header format
header_format = workbook.add_format(
{"bold": True, "text_wrap": True, "valign": "bottom"}
)
# write headers
for col_num, value in enumerate(excel_df.columns.array):
worksheet.write(0, col_num, value, header_format)
num_cols = len(excel_df.columns)
worksheet.set_column(0, num_cols - 1, num_excel_cols_width)
writer.save()
def _load_tile_csv(
self, score_tiles_df: pd.DataFrame, tile_score_path: Path
) -> None:
@ -334,7 +372,7 @@ class PostScoreETL(ExtractTransformLoad):
)
logger.info("Writing downloadable excel")
downloadable_df_copy.to_excel(excel_path, index=False)
self._load_excel_from_df(downloadable_df_copy, excel_path)
logger.info("Writing downloadable csv")
downloadable_df_copy[self.GEOID_TRACT_FIELD_NAME] = (