mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-24 10:34:18 -08:00
completed ticket
This commit is contained in:
parent
63ad12dbbb
commit
28e5020858
7 changed files with 91 additions and 43 deletions
|
@ -7,8 +7,7 @@ global_config:
|
|||
excel_config:
|
||||
default_column_width: 30
|
||||
sheets:
|
||||
- main:
|
||||
label: "Data"
|
||||
- label: "Data"
|
||||
fields:
|
||||
- score_name: GEOID10_TRACT
|
||||
label: Census tract ID
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import List
|
||||
|
||||
|
||||
class FieldType(Enum):
|
||||
STRING = "string"
|
||||
INT64 = "int64"
|
||||
BOOL = "bool"
|
||||
FLOAT = "float"
|
||||
PERCENTAGE = "percentage"
|
||||
LOSS_RATE_PERCENTAGE = "loss_rate_percentage"
|
||||
|
||||
|
||||
@dataclass
|
||||
class CSVConfig:
|
||||
@dataclass
|
||||
class GlobalConfig:
|
||||
@dataclass
|
||||
class RoundingNum:
|
||||
float: int
|
||||
loss_rate_percentage: int
|
||||
|
||||
sort_by_label: str
|
||||
rounding_num: RoundingNum
|
||||
|
||||
@dataclass
|
||||
class Field:
|
||||
score_name: str
|
||||
label: str
|
||||
format: FieldType = field(metadata={"by_value": True})
|
||||
|
||||
global_config: GlobalConfig
|
||||
fields: List[Field]
|
|
@ -0,0 +1,57 @@
|
|||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import List
|
||||
|
||||
|
||||
class FieldType(Enum):
|
||||
STRING = "string"
|
||||
INT64 = "int64"
|
||||
BOOL = "bool"
|
||||
FLOAT = "float"
|
||||
PERCENTAGE = "percentage"
|
||||
LOSS_RATE_PERCENTAGE = "loss_rate_percentage"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RoundingNum:
|
||||
float: int
|
||||
loss_rate_percentage: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class Field:
|
||||
score_name: str
|
||||
label: str
|
||||
format: FieldType = field(metadata={"by_value": True})
|
||||
|
||||
|
||||
@dataclass
|
||||
class CSVConfig:
|
||||
@dataclass
|
||||
class GlobalConfig:
|
||||
sort_by_label: str
|
||||
rounding_num: RoundingNum
|
||||
|
||||
global_config: GlobalConfig
|
||||
fields: List[Field]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExcelConfig:
|
||||
@dataclass
|
||||
class GlobalConfig:
|
||||
@dataclass
|
||||
class ExcelGlobalConfig:
|
||||
default_column_width: int
|
||||
|
||||
sort_by_label: str
|
||||
rounding_num: RoundingNum
|
||||
excel_config: ExcelGlobalConfig
|
||||
|
||||
@dataclass
|
||||
class SheetItem:
|
||||
label: str
|
||||
fields: List[Field]
|
||||
|
||||
global_config: GlobalConfig
|
||||
sheets: List[SheetItem]
|
|
@ -3,6 +3,10 @@ import json
|
|||
from numpy import float64
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from data_pipeline.content.schemas.download_schemas import (
|
||||
CSVConfig,
|
||||
ExcelConfig,
|
||||
)
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.etl.score.etl_utils import floor_series
|
||||
|
@ -452,7 +456,7 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
|
||||
# open excel yaml config
|
||||
excel_csv_config = load_yaml_dict_from_file(
|
||||
self.CONTENT_CONFIG / "excel.yml"
|
||||
self.CONTENT_CONFIG / "excel.yml", ExcelConfig
|
||||
)
|
||||
|
||||
# Define Excel Columns Column Width
|
||||
|
@ -525,7 +529,7 @@ class PostScoreETL(ExtractTransformLoad):
|
|||
logger.info("Writing downloadable csv")
|
||||
# open yaml config
|
||||
downloadable_csv_config = load_yaml_dict_from_file(
|
||||
self.CONTENT_CONFIG / "csv.yml"
|
||||
self.CONTENT_CONFIG / "csv.yml", CSVConfig
|
||||
)
|
||||
downloadable_df = self._create_downloadable_data(
|
||||
score_df=self.output_score_county_state_merged_df,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import List, Type
|
||||
from typing import List, Union
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
|
@ -14,7 +14,10 @@ import yaml
|
|||
from marshmallow_dataclass import class_schema
|
||||
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.content.schemas.csv import CSVConfig
|
||||
from data_pipeline.content.schemas.download_schemas import (
|
||||
CSVConfig,
|
||||
ExcelConfig,
|
||||
)
|
||||
|
||||
|
||||
## zlib is not available on all systems
|
||||
|
@ -330,7 +333,7 @@ def zip_directory(
|
|||
|
||||
|
||||
def load_yaml_dict_from_file(
|
||||
yaml_file_path: Path, yaml_schema: Type[CSVConfig]
|
||||
yaml_file_path: Path, schema_class: Union[CSVConfig, ExcelConfig]
|
||||
) -> dict:
|
||||
"""Load a YAML file specified in path into a Python dictionary.
|
||||
|
||||
|
@ -343,7 +346,10 @@ def load_yaml_dict_from_file(
|
|||
with open(yaml_file_path, encoding="UTF-8") as file:
|
||||
yaml_dict = yaml.load(file, Loader=yaml.FullLoader)
|
||||
|
||||
pass
|
||||
# validate YAML
|
||||
yaml_config_schema = class_schema(schema_class)
|
||||
yaml_config_schema().load(yaml_dict)
|
||||
|
||||
return yaml_dict
|
||||
|
||||
|
||||
|
|
17
data/data-pipeline/poetry.lock
generated
17
data/data-pipeline/poetry.lock
generated
|
@ -870,6 +870,17 @@ lint = ["pre-commit (>=1.18,<2.0)"]
|
|||
tests = ["pytest (>=5.4)", "pytest-mypy-plugins (>=1.2.0)", "typing-extensions (>=3.7.2,<3.8.0)"]
|
||||
union = ["typeguard"]
|
||||
|
||||
[[package]]
|
||||
name = "marshmallow-enum"
|
||||
version = "1.5.1"
|
||||
description = "Enum field for Marshmallow"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[package.dependencies]
|
||||
marshmallow = ">=2.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "matplotlib"
|
||||
version = "3.5.1"
|
||||
|
@ -1847,7 +1858,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "4278ecd8aab0cf352d62961687c33f92cda5e6d246309b046484dd797edf4986"
|
||||
content-hash = "2dda480b8f50a2414ff01d3c73663c28d64bc6df01f950367763c241a84b02f6"
|
||||
|
||||
[metadata.files]
|
||||
appnope = [
|
||||
|
@ -2371,6 +2382,10 @@ marshmallow-dataclass = [
|
|||
{file = "marshmallow_dataclass-8.5.3-py3-none-any.whl", hash = "sha256:eefeff62ee975c64d293d2db9370e7e748a2ff83dcb5109416b75e087a2ac02e"},
|
||||
{file = "marshmallow_dataclass-8.5.3.tar.gz", hash = "sha256:c0c5e1ea8d0e557b6fa00343799a9a9e60757b948fb096076beb6aa76bd68d30"},
|
||||
]
|
||||
marshmallow-enum = [
|
||||
{file = "marshmallow-enum-1.5.1.tar.gz", hash = "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58"},
|
||||
{file = "marshmallow_enum-1.5.1-py2.py3-none-any.whl", hash = "sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072"},
|
||||
]
|
||||
matplotlib = [
|
||||
{file = "matplotlib-3.5.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:456cc8334f6d1124e8ff856b42d2cc1c84335375a16448189999496549f7182b"},
|
||||
{file = "matplotlib-3.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8a77906dc2ef9b67407cec0bdbf08e3971141e535db888974a915be5e1e3efc6"},
|
||||
|
|
|
@ -25,6 +25,7 @@ ipython = "^7.31.1"
|
|||
jupyter = "^1.0.0"
|
||||
jupyter-contrib-nbextensions = "^0.5.1"
|
||||
marshmallow-dataclass = "^8.5.3"
|
||||
marshmallow-enum = "^1.5.1"
|
||||
matplotlib = "^3.4.2"
|
||||
numpy = "^1.22.1"
|
||||
pandas = "^1.2.5"
|
||||
|
|
Loading…
Add table
Reference in a new issue