completed ticket

This commit is contained in:
Jorge Escobar 2022-03-28 18:43:32 -04:00
parent 63ad12dbbb
commit 28e5020858
7 changed files with 91 additions and 43 deletions

View file

@ -7,8 +7,7 @@ global_config:
excel_config:
default_column_width: 30
sheets:
- main:
label: "Data"
- label: "Data"
fields:
- score_name: GEOID10_TRACT
label: Census tract ID

View file

@ -1,34 +0,0 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import List
class FieldType(Enum):
STRING = "string"
INT64 = "int64"
BOOL = "bool"
FLOAT = "float"
PERCENTAGE = "percentage"
LOSS_RATE_PERCENTAGE = "loss_rate_percentage"
@dataclass
class CSVConfig:
@dataclass
class GlobalConfig:
@dataclass
class RoundingNum:
float: int
loss_rate_percentage: int
sort_by_label: str
rounding_num: RoundingNum
@dataclass
class Field:
score_name: str
label: str
format: FieldType = field(metadata={"by_value": True})
global_config: GlobalConfig
fields: List[Field]

View file

@ -0,0 +1,57 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import List
class FieldType(Enum):
STRING = "string"
INT64 = "int64"
BOOL = "bool"
FLOAT = "float"
PERCENTAGE = "percentage"
LOSS_RATE_PERCENTAGE = "loss_rate_percentage"
@dataclass
class RoundingNum:
float: int
loss_rate_percentage: int
@dataclass
class Field:
score_name: str
label: str
format: FieldType = field(metadata={"by_value": True})
@dataclass
class CSVConfig:
@dataclass
class GlobalConfig:
sort_by_label: str
rounding_num: RoundingNum
global_config: GlobalConfig
fields: List[Field]
@dataclass
class ExcelConfig:
@dataclass
class GlobalConfig:
@dataclass
class ExcelGlobalConfig:
default_column_width: int
sort_by_label: str
rounding_num: RoundingNum
excel_config: ExcelGlobalConfig
@dataclass
class SheetItem:
label: str
fields: List[Field]
global_config: GlobalConfig
sheets: List[SheetItem]

View file

@ -3,6 +3,10 @@ import json
from numpy import float64
import numpy as np
import pandas as pd
from data_pipeline.content.schemas.download_schemas import (
CSVConfig,
ExcelConfig,
)
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.score.etl_utils import floor_series
@ -452,7 +456,7 @@ class PostScoreETL(ExtractTransformLoad):
# open excel yaml config
excel_csv_config = load_yaml_dict_from_file(
self.CONTENT_CONFIG / "excel.yml"
self.CONTENT_CONFIG / "excel.yml", ExcelConfig
)
# Define Excel Columns Column Width
@ -525,7 +529,7 @@ class PostScoreETL(ExtractTransformLoad):
logger.info("Writing downloadable csv")
# open yaml config
downloadable_csv_config = load_yaml_dict_from_file(
self.CONTENT_CONFIG / "csv.yml"
self.CONTENT_CONFIG / "csv.yml", CSVConfig
)
downloadable_df = self._create_downloadable_data(
score_df=self.output_score_county_state_merged_df,

View file

@ -1,4 +1,4 @@
from typing import List, Type
from typing import List, Union
import datetime
import json
import logging
@ -14,7 +14,10 @@ import yaml
from marshmallow_dataclass import class_schema
from data_pipeline.config import settings
from data_pipeline.content.schemas.csv import CSVConfig
from data_pipeline.content.schemas.download_schemas import (
CSVConfig,
ExcelConfig,
)
## zlib is not available on all systems
@ -330,7 +333,7 @@ def zip_directory(
def load_yaml_dict_from_file(
yaml_file_path: Path, yaml_schema: Type[CSVConfig]
yaml_file_path: Path, schema_class: Union[CSVConfig, ExcelConfig]
) -> dict:
"""Load a YAML file specified in path into a Python dictionary.
@ -343,7 +346,10 @@ def load_yaml_dict_from_file(
with open(yaml_file_path, encoding="UTF-8") as file:
yaml_dict = yaml.load(file, Loader=yaml.FullLoader)
pass
# validate YAML
yaml_config_schema = class_schema(schema_class)
yaml_config_schema().load(yaml_dict)
return yaml_dict

View file

@ -870,6 +870,17 @@ lint = ["pre-commit (>=1.18,<2.0)"]
tests = ["pytest (>=5.4)", "pytest-mypy-plugins (>=1.2.0)", "typing-extensions (>=3.7.2,<3.8.0)"]
union = ["typeguard"]
[[package]]
name = "marshmallow-enum"
version = "1.5.1"
description = "Enum field for Marshmallow"
category = "main"
optional = false
python-versions = "*"
[package.dependencies]
marshmallow = ">=2.0.0"
[[package]]
name = "matplotlib"
version = "3.5.1"
@ -1847,7 +1858,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
[metadata]
lock-version = "1.1"
python-versions = "^3.8"
content-hash = "4278ecd8aab0cf352d62961687c33f92cda5e6d246309b046484dd797edf4986"
content-hash = "2dda480b8f50a2414ff01d3c73663c28d64bc6df01f950367763c241a84b02f6"
[metadata.files]
appnope = [
@ -2371,6 +2382,10 @@ marshmallow-dataclass = [
{file = "marshmallow_dataclass-8.5.3-py3-none-any.whl", hash = "sha256:eefeff62ee975c64d293d2db9370e7e748a2ff83dcb5109416b75e087a2ac02e"},
{file = "marshmallow_dataclass-8.5.3.tar.gz", hash = "sha256:c0c5e1ea8d0e557b6fa00343799a9a9e60757b948fb096076beb6aa76bd68d30"},
]
marshmallow-enum = [
{file = "marshmallow-enum-1.5.1.tar.gz", hash = "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58"},
{file = "marshmallow_enum-1.5.1-py2.py3-none-any.whl", hash = "sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072"},
]
matplotlib = [
{file = "matplotlib-3.5.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:456cc8334f6d1124e8ff856b42d2cc1c84335375a16448189999496549f7182b"},
{file = "matplotlib-3.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8a77906dc2ef9b67407cec0bdbf08e3971141e535db888974a915be5e1e3efc6"},

View file

@ -25,6 +25,7 @@ ipython = "^7.31.1"
jupyter = "^1.0.0"
jupyter-contrib-nbextensions = "^0.5.1"
marshmallow-dataclass = "^8.5.3"
marshmallow-enum = "^1.5.1"
matplotlib = "^3.4.2"
numpy = "^1.22.1"
pandas = "^1.2.5"