Marshmallow Schemas for YAML files (#1497)

* Marshmallow Schemas for YAML files

* completed ticket

* passing tests

* lint

* click dep

* staging BE map

* Pr review
This commit is contained in:
Jorge Escobar 2022-03-31 13:56:10 -04:00 committed by GitHub
commit 859177a877
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 387 additions and 183 deletions

View file

@ -3,6 +3,11 @@ import json
from numpy import float64
import numpy as np
import pandas as pd
from data_pipeline.content.schemas.download_schemas import (
CSVConfig,
CodebookConfig,
ExcelConfig,
)
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.score.etl_utils import floor_series, create_codebook
@ -460,7 +465,7 @@ class PostScoreETL(ExtractTransformLoad):
# open excel yaml config
excel_csv_config = load_yaml_dict_from_file(
self.CONTENT_CONFIG / "excel.yml"
self.CONTENT_CONFIG / "excel.yml", ExcelConfig
)
# Define Excel Columns Column Width
@ -535,7 +540,7 @@ class PostScoreETL(ExtractTransformLoad):
logger.info("Writing downloadable csv")
# open yaml config
downloadable_csv_config = load_yaml_dict_from_file(
self.CONTENT_CONFIG / "csv.yml"
self.CONTENT_CONFIG / "csv.yml", CSVConfig
)
downloadable_df = self._create_downloadable_data(
score_df=self.output_score_county_state_merged_df,
@ -557,7 +562,8 @@ class PostScoreETL(ExtractTransformLoad):
# load supplemental codebook yml
field_descriptions_for_codebook_config = load_yaml_dict_from_file(
self.CONTENT_CONFIG / "field_descriptions_for_codebook.yml"
self.CONTENT_CONFIG / "field_descriptions_for_codebook.yml",
CodebookConfig,
)
# create codebook

View file

@ -5,6 +5,9 @@ from importlib import reload
from pathlib import Path
import pandas.api.types as ptypes
import pandas.testing as pdt
from data_pipeline.content.schemas.download_schemas import (
CSVConfig,
)
from data_pipeline.etl.score import constants
from data_pipeline.utils import load_yaml_dict_from_file
@ -94,7 +97,7 @@ def test_create_downloadable_data(
etl, score_data_expected, downloadable_data_expected
):
downloadable_csv_config = load_yaml_dict_from_file(
etl.CONTENT_CONFIG / "csv.yml"
etl.CONTENT_CONFIG / "csv.yml", CSVConfig
)
output_downloadable_df_actual = etl._create_downloadable_data(
score_data_expected,