From 7415a15bc02411e2e333ef15a2dfd9ab2d08a1c9 Mon Sep 17 00:00:00 2001
From: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
Date: Mon, 27 Jun 2022 11:19:11 -0400
Subject: [PATCH] docsctrings

---
 data/data-pipeline/data_pipeline/etl/base.py  |  5 +++
 .../etl/score/schemas/datasets.py             | 40 ++++++++++++++-----
 2 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/etl/base.py b/data/data-pipeline/data_pipeline/etl/base.py
index 532bc8fb..b1aed335 100644
--- a/data/data-pipeline/data_pipeline/etl/base.py
+++ b/data/data-pipeline/data_pipeline/etl/base.py
@@ -33,6 +33,9 @@ class ExtractTransformLoad:
     Attributes:
         DATA_PATH (pathlib.Path): Local path where all data will be stored
         TMP_PATH (pathlib.Path): Local path where temporary data will be stored
+
+        TODO: Fill missing attrs here
+
         GEOID_FIELD_NAME (str): The common column name for a Census Block Group identifier
         GEOID_TRACT_FIELD_NAME (str): The common column name for a Census Tract identifier
     """
@@ -97,6 +100,8 @@ class ExtractTransformLoad:
 
     @classmethod
     def yaml_config_load(cls) -> dict:
+        """Generate config dictionary and set instance variables from YAML dataset."""
+
         # check if the class instance has score YAML definitions
         datasets_config = load_yaml_dict_from_file(
             cls.DATASET_CONFIG / "datasets.yml",
diff --git a/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py
index b78c1445..0d1e6f84 100644
--- a/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py
+++ b/data/data-pipeline/data_pipeline/etl/score/schemas/datasets.py
@@ -16,24 +16,46 @@ class FieldType(Enum):
 class DatasetsConfig:
     @dataclass
     class Dataset:
+        """A class that defines a dataset and its load variables.
+
+        Attributes:
+            long_name (str): A human readable title for the dataset.
+            short_name (str): used to compose the short variable names for tiles/arcgis. All short variable names will be prepended
+            with the short name of the data set it comes from, i.e. `nri__ex_loss`.
+            module_name (str): A string that matches both the Python module name for the dataset and the `NAME` property on the ETL class.
+            description (str): A human readable description of the dataset.
+            load_fields (LoadField): A list of type LoadField that will drive the score ETL and side effects (tiles, downloadables).
+        """
+
         @dataclass
         class LoadField:
+            """A class to define the fields to be saved on the dataset's output.
+
+            These fields will be then imported by the score generation ETL.
+
+            Attributes:
+                short_name (str): Used in conjunction with the dataset's `short_name` for files where short names are needed.
+                df_field_name (str): Name for the field in the etl class.
+                long_name (str): Column name for the dataset's output csv.
+                field_type (FieldType): An enum that dictates what type of field this is. This will be used on the `etl_score_post`
+                for the data manipulation.
+                The `by_value` metadata prop will load the field type's Enum value instead of the index, i.e. "string" and not STRING
+                include_in_tiles (bool): Include this field on the tile export.
+                include_in_csv (bool): Include this field on the CSV export.
+                include_in_excel (bool): Include this field on the Excel export.
+            """
+
             short_name: str
             df_field_name: str
             long_name: str
-            field_type: FieldType = field(
-                metadata={"by_value": True}
-            )  # this will load the field type's Enum value
-            # instead of the index, i.e. "string" and not
-            # STRING
-            tile_include: bool
-            csv_download: bool
-            excel_download: bool
+            field_type: FieldType = field(metadata={"by_value": True})
+            include_in_tiles: bool
+            include_in_csv: bool
+            include_in_excel: bool
 
         long_name: str
         short_name: str
         module_name: str
-        last_updated_year: int
         description: str
         input_geoid_tract_field_name: str
         load_fields: List[LoadField]