mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Excel formatting and tract id ordering (#1172)
* excel formatting and tract id ordering * lint * lint try $2 * lint 3 * addressed comments * typo
This commit is contained in:
parent
6a00b29f5d
commit
f5fe8d90e2
1 changed files with 40 additions and 2 deletions
|
@ -271,7 +271,10 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS,
|
number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS,
|
||||||
)
|
)
|
||||||
|
|
||||||
return df
|
# sort by tract id
|
||||||
|
df_sorted = df.sort_values(self.GEOID_TRACT_FIELD_NAME)
|
||||||
|
|
||||||
|
return df_sorted
|
||||||
|
|
||||||
def transform(self) -> None:
|
def transform(self) -> None:
|
||||||
logger.info("Transforming data sources for Score + County CSVs")
|
logger.info("Transforming data sources for Score + County CSVs")
|
||||||
|
@ -307,6 +310,41 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
encoding="utf-8-sig", # windows compat https://stackoverflow.com/a/43684587
|
encoding="utf-8-sig", # windows compat https://stackoverflow.com/a/43684587
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _load_excel_from_df(
|
||||||
|
self, excel_df: pd.DataFrame, excel_path: Path
|
||||||
|
) -> None:
|
||||||
|
# Define Excel Columns Column Width
|
||||||
|
num_excel_cols_width = 30
|
||||||
|
|
||||||
|
# Create a Pandas Excel writer using XlsxWriter as the engine.
|
||||||
|
with pd.ExcelWriter( # pylint: disable=abstract-class-instantiated
|
||||||
|
# (https://github.com/PyCQA/pylint/issues/3060)
|
||||||
|
excel_path,
|
||||||
|
engine="xlsxwriter",
|
||||||
|
) as writer:
|
||||||
|
|
||||||
|
# Convert the dataframe to an XlsxWriter Excel object. We also turn off the
|
||||||
|
# index column at the left of the output dataframe.
|
||||||
|
excel_df.to_excel(writer, sheet_name="Data", index=False)
|
||||||
|
|
||||||
|
# Get the xlsxwriter workbook and worksheet objects.
|
||||||
|
workbook = writer.book
|
||||||
|
worksheet = writer.sheets["Data"]
|
||||||
|
|
||||||
|
# set header format
|
||||||
|
header_format = workbook.add_format(
|
||||||
|
{"bold": True, "text_wrap": True, "valign": "bottom"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# write headers
|
||||||
|
for col_num, value in enumerate(excel_df.columns.array):
|
||||||
|
worksheet.write(0, col_num, value, header_format)
|
||||||
|
|
||||||
|
num_cols = len(excel_df.columns)
|
||||||
|
worksheet.set_column(0, num_cols - 1, num_excel_cols_width)
|
||||||
|
|
||||||
|
writer.save()
|
||||||
|
|
||||||
def _load_tile_csv(
|
def _load_tile_csv(
|
||||||
self, score_tiles_df: pd.DataFrame, tile_score_path: Path
|
self, score_tiles_df: pd.DataFrame, tile_score_path: Path
|
||||||
) -> None:
|
) -> None:
|
||||||
|
@ -334,7 +372,7 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Writing downloadable excel")
|
logger.info("Writing downloadable excel")
|
||||||
downloadable_df_copy.to_excel(excel_path, index=False)
|
self._load_excel_from_df(downloadable_df_copy, excel_path)
|
||||||
|
|
||||||
logger.info("Writing downloadable csv")
|
logger.info("Writing downloadable csv")
|
||||||
downloadable_df_copy[self.GEOID_TRACT_FIELD_NAME] = (
|
downloadable_df_copy[self.GEOID_TRACT_FIELD_NAME] = (
|
||||||
|
|
Loading…
Add table
Reference in a new issue