mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-24 18:44:20 -08:00
completed ticket
This commit is contained in:
parent
63ad12dbbb
commit
28e5020858
7 changed files with 91 additions and 43 deletions
|
@ -7,8 +7,7 @@ global_config:
|
||||||
excel_config:
|
excel_config:
|
||||||
default_column_width: 30
|
default_column_width: 30
|
||||||
sheets:
|
sheets:
|
||||||
- main:
|
- label: "Data"
|
||||||
label: "Data"
|
|
||||||
fields:
|
fields:
|
||||||
- score_name: GEOID10_TRACT
|
- score_name: GEOID10_TRACT
|
||||||
label: Census tract ID
|
label: Census tract ID
|
||||||
|
|
|
@ -1,34 +0,0 @@
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from enum import Enum
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
|
|
||||||
class FieldType(Enum):
|
|
||||||
STRING = "string"
|
|
||||||
INT64 = "int64"
|
|
||||||
BOOL = "bool"
|
|
||||||
FLOAT = "float"
|
|
||||||
PERCENTAGE = "percentage"
|
|
||||||
LOSS_RATE_PERCENTAGE = "loss_rate_percentage"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class CSVConfig:
|
|
||||||
@dataclass
|
|
||||||
class GlobalConfig:
|
|
||||||
@dataclass
|
|
||||||
class RoundingNum:
|
|
||||||
float: int
|
|
||||||
loss_rate_percentage: int
|
|
||||||
|
|
||||||
sort_by_label: str
|
|
||||||
rounding_num: RoundingNum
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Field:
|
|
||||||
score_name: str
|
|
||||||
label: str
|
|
||||||
format: FieldType = field(metadata={"by_value": True})
|
|
||||||
|
|
||||||
global_config: GlobalConfig
|
|
||||||
fields: List[Field]
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from enum import Enum
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
|
class FieldType(Enum):
|
||||||
|
STRING = "string"
|
||||||
|
INT64 = "int64"
|
||||||
|
BOOL = "bool"
|
||||||
|
FLOAT = "float"
|
||||||
|
PERCENTAGE = "percentage"
|
||||||
|
LOSS_RATE_PERCENTAGE = "loss_rate_percentage"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RoundingNum:
|
||||||
|
float: int
|
||||||
|
loss_rate_percentage: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Field:
|
||||||
|
score_name: str
|
||||||
|
label: str
|
||||||
|
format: FieldType = field(metadata={"by_value": True})
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CSVConfig:
|
||||||
|
@dataclass
|
||||||
|
class GlobalConfig:
|
||||||
|
sort_by_label: str
|
||||||
|
rounding_num: RoundingNum
|
||||||
|
|
||||||
|
global_config: GlobalConfig
|
||||||
|
fields: List[Field]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ExcelConfig:
|
||||||
|
@dataclass
|
||||||
|
class GlobalConfig:
|
||||||
|
@dataclass
|
||||||
|
class ExcelGlobalConfig:
|
||||||
|
default_column_width: int
|
||||||
|
|
||||||
|
sort_by_label: str
|
||||||
|
rounding_num: RoundingNum
|
||||||
|
excel_config: ExcelGlobalConfig
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SheetItem:
|
||||||
|
label: str
|
||||||
|
fields: List[Field]
|
||||||
|
|
||||||
|
global_config: GlobalConfig
|
||||||
|
sheets: List[SheetItem]
|
|
@ -3,6 +3,10 @@ import json
|
||||||
from numpy import float64
|
from numpy import float64
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from data_pipeline.content.schemas.download_schemas import (
|
||||||
|
CSVConfig,
|
||||||
|
ExcelConfig,
|
||||||
|
)
|
||||||
|
|
||||||
from data_pipeline.etl.base import ExtractTransformLoad
|
from data_pipeline.etl.base import ExtractTransformLoad
|
||||||
from data_pipeline.etl.score.etl_utils import floor_series
|
from data_pipeline.etl.score.etl_utils import floor_series
|
||||||
|
@ -452,7 +456,7 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
# open excel yaml config
|
# open excel yaml config
|
||||||
excel_csv_config = load_yaml_dict_from_file(
|
excel_csv_config = load_yaml_dict_from_file(
|
||||||
self.CONTENT_CONFIG / "excel.yml"
|
self.CONTENT_CONFIG / "excel.yml", ExcelConfig
|
||||||
)
|
)
|
||||||
|
|
||||||
# Define Excel Columns Column Width
|
# Define Excel Columns Column Width
|
||||||
|
@ -525,7 +529,7 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
logger.info("Writing downloadable csv")
|
logger.info("Writing downloadable csv")
|
||||||
# open yaml config
|
# open yaml config
|
||||||
downloadable_csv_config = load_yaml_dict_from_file(
|
downloadable_csv_config = load_yaml_dict_from_file(
|
||||||
self.CONTENT_CONFIG / "csv.yml"
|
self.CONTENT_CONFIG / "csv.yml", CSVConfig
|
||||||
)
|
)
|
||||||
downloadable_df = self._create_downloadable_data(
|
downloadable_df = self._create_downloadable_data(
|
||||||
score_df=self.output_score_county_state_merged_df,
|
score_df=self.output_score_county_state_merged_df,
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import List, Type
|
from typing import List, Union
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
@ -14,7 +14,10 @@ import yaml
|
||||||
from marshmallow_dataclass import class_schema
|
from marshmallow_dataclass import class_schema
|
||||||
|
|
||||||
from data_pipeline.config import settings
|
from data_pipeline.config import settings
|
||||||
from data_pipeline.content.schemas.csv import CSVConfig
|
from data_pipeline.content.schemas.download_schemas import (
|
||||||
|
CSVConfig,
|
||||||
|
ExcelConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
## zlib is not available on all systems
|
## zlib is not available on all systems
|
||||||
|
@ -330,7 +333,7 @@ def zip_directory(
|
||||||
|
|
||||||
|
|
||||||
def load_yaml_dict_from_file(
|
def load_yaml_dict_from_file(
|
||||||
yaml_file_path: Path, yaml_schema: Type[CSVConfig]
|
yaml_file_path: Path, schema_class: Union[CSVConfig, ExcelConfig]
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Load a YAML file specified in path into a Python dictionary.
|
"""Load a YAML file specified in path into a Python dictionary.
|
||||||
|
|
||||||
|
@ -343,7 +346,10 @@ def load_yaml_dict_from_file(
|
||||||
with open(yaml_file_path, encoding="UTF-8") as file:
|
with open(yaml_file_path, encoding="UTF-8") as file:
|
||||||
yaml_dict = yaml.load(file, Loader=yaml.FullLoader)
|
yaml_dict = yaml.load(file, Loader=yaml.FullLoader)
|
||||||
|
|
||||||
pass
|
# validate YAML
|
||||||
|
yaml_config_schema = class_schema(schema_class)
|
||||||
|
yaml_config_schema().load(yaml_dict)
|
||||||
|
|
||||||
return yaml_dict
|
return yaml_dict
|
||||||
|
|
||||||
|
|
||||||
|
|
17
data/data-pipeline/poetry.lock
generated
17
data/data-pipeline/poetry.lock
generated
|
@ -870,6 +870,17 @@ lint = ["pre-commit (>=1.18,<2.0)"]
|
||||||
tests = ["pytest (>=5.4)", "pytest-mypy-plugins (>=1.2.0)", "typing-extensions (>=3.7.2,<3.8.0)"]
|
tests = ["pytest (>=5.4)", "pytest-mypy-plugins (>=1.2.0)", "typing-extensions (>=3.7.2,<3.8.0)"]
|
||||||
union = ["typeguard"]
|
union = ["typeguard"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "marshmallow-enum"
|
||||||
|
version = "1.5.1"
|
||||||
|
description = "Enum field for Marshmallow"
|
||||||
|
category = "main"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
marshmallow = ">=2.0.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "matplotlib"
|
name = "matplotlib"
|
||||||
version = "3.5.1"
|
version = "3.5.1"
|
||||||
|
@ -1847,7 +1858,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = "^3.8"
|
python-versions = "^3.8"
|
||||||
content-hash = "4278ecd8aab0cf352d62961687c33f92cda5e6d246309b046484dd797edf4986"
|
content-hash = "2dda480b8f50a2414ff01d3c73663c28d64bc6df01f950367763c241a84b02f6"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
appnope = [
|
appnope = [
|
||||||
|
@ -2371,6 +2382,10 @@ marshmallow-dataclass = [
|
||||||
{file = "marshmallow_dataclass-8.5.3-py3-none-any.whl", hash = "sha256:eefeff62ee975c64d293d2db9370e7e748a2ff83dcb5109416b75e087a2ac02e"},
|
{file = "marshmallow_dataclass-8.5.3-py3-none-any.whl", hash = "sha256:eefeff62ee975c64d293d2db9370e7e748a2ff83dcb5109416b75e087a2ac02e"},
|
||||||
{file = "marshmallow_dataclass-8.5.3.tar.gz", hash = "sha256:c0c5e1ea8d0e557b6fa00343799a9a9e60757b948fb096076beb6aa76bd68d30"},
|
{file = "marshmallow_dataclass-8.5.3.tar.gz", hash = "sha256:c0c5e1ea8d0e557b6fa00343799a9a9e60757b948fb096076beb6aa76bd68d30"},
|
||||||
]
|
]
|
||||||
|
marshmallow-enum = [
|
||||||
|
{file = "marshmallow-enum-1.5.1.tar.gz", hash = "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58"},
|
||||||
|
{file = "marshmallow_enum-1.5.1-py2.py3-none-any.whl", hash = "sha256:57161ab3dbfde4f57adeb12090f39592e992b9c86d206d02f6bd03ebec60f072"},
|
||||||
|
]
|
||||||
matplotlib = [
|
matplotlib = [
|
||||||
{file = "matplotlib-3.5.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:456cc8334f6d1124e8ff856b42d2cc1c84335375a16448189999496549f7182b"},
|
{file = "matplotlib-3.5.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:456cc8334f6d1124e8ff856b42d2cc1c84335375a16448189999496549f7182b"},
|
||||||
{file = "matplotlib-3.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8a77906dc2ef9b67407cec0bdbf08e3971141e535db888974a915be5e1e3efc6"},
|
{file = "matplotlib-3.5.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8a77906dc2ef9b67407cec0bdbf08e3971141e535db888974a915be5e1e3efc6"},
|
||||||
|
|
|
@ -25,6 +25,7 @@ ipython = "^7.31.1"
|
||||||
jupyter = "^1.0.0"
|
jupyter = "^1.0.0"
|
||||||
jupyter-contrib-nbextensions = "^0.5.1"
|
jupyter-contrib-nbextensions = "^0.5.1"
|
||||||
marshmallow-dataclass = "^8.5.3"
|
marshmallow-dataclass = "^8.5.3"
|
||||||
|
marshmallow-enum = "^1.5.1"
|
||||||
matplotlib = "^3.4.2"
|
matplotlib = "^3.4.2"
|
||||||
numpy = "^1.22.1"
|
numpy = "^1.22.1"
|
||||||
pandas = "^1.2.5"
|
pandas = "^1.2.5"
|
||||||
|
|
Loading…
Add table
Reference in a new issue