docsctrings

This commit is contained in:
Jorge Escobar 2022-06-27 11:19:11 -04:00
parent 158c8bffb2
commit 7415a15bc0
2 changed files with 36 additions and 9 deletions

View file

@ -33,6 +33,9 @@ class ExtractTransformLoad:
Attributes: Attributes:
DATA_PATH (pathlib.Path): Local path where all data will be stored DATA_PATH (pathlib.Path): Local path where all data will be stored
TMP_PATH (pathlib.Path): Local path where temporary data will be stored TMP_PATH (pathlib.Path): Local path where temporary data will be stored
TODO: Fill missing attrs here
GEOID_FIELD_NAME (str): The common column name for a Census Block Group identifier GEOID_FIELD_NAME (str): The common column name for a Census Block Group identifier
GEOID_TRACT_FIELD_NAME (str): The common column name for a Census Tract identifier GEOID_TRACT_FIELD_NAME (str): The common column name for a Census Tract identifier
""" """
@ -97,6 +100,8 @@ class ExtractTransformLoad:
@classmethod @classmethod
def yaml_config_load(cls) -> dict: def yaml_config_load(cls) -> dict:
"""Generate config dictionary and set instance variables from YAML dataset."""
# check if the class instance has score YAML definitions # check if the class instance has score YAML definitions
datasets_config = load_yaml_dict_from_file( datasets_config = load_yaml_dict_from_file(
cls.DATASET_CONFIG / "datasets.yml", cls.DATASET_CONFIG / "datasets.yml",

View file

@ -16,24 +16,46 @@ class FieldType(Enum):
class DatasetsConfig: class DatasetsConfig:
@dataclass @dataclass
class Dataset: class Dataset:
"""A class that defines a dataset and its load variables.
Attributes:
long_name (str): A human readable title for the dataset.
short_name (str): used to compose the short variable names for tiles/arcgis. All short variable names will be prepended
with the short name of the data set it comes from, i.e. `nri__ex_loss`.
module_name (str): A string that matches both the Python module name for the dataset and the `NAME` property on the ETL class.
description (str): A human readable description of the dataset.
load_fields (LoadField): A list of type LoadField that will drive the score ETL and side effects (tiles, downloadables).
"""
@dataclass @dataclass
class LoadField: class LoadField:
"""A class to define the fields to be saved on the dataset's output.
These fields will be then imported by the score generation ETL.
Attributes:
short_name (str): Used in conjunction with the dataset's `short_name` for files where short names are needed.
df_field_name (str): Name for the field in the etl class.
long_name (str): Column name for the dataset's output csv.
field_type (FieldType): An enum that dictates what type of field this is. This will be used on the `etl_score_post`
for the data manipulation.
The `by_value` metadata prop will load the field type's Enum value instead of the index, i.e. "string" and not STRING
include_in_tiles (bool): Include this field on the tile export.
include_in_csv (bool): Include this field on the CSV export.
include_in_excel (bool): Include this field on the Excel export.
"""
short_name: str short_name: str
df_field_name: str df_field_name: str
long_name: str long_name: str
field_type: FieldType = field( field_type: FieldType = field(metadata={"by_value": True})
metadata={"by_value": True} include_in_tiles: bool
) # this will load the field type's Enum value include_in_csv: bool
# instead of the index, i.e. "string" and not include_in_excel: bool
# STRING
tile_include: bool
csv_download: bool
excel_download: bool
long_name: str long_name: str
short_name: str short_name: str
module_name: str module_name: str
last_updated_year: int
description: str description: str
input_geoid_tract_field_name: str input_geoid_tract_field_name: str
load_fields: List[LoadField] load_fields: List[LoadField]