From aa68a43c84ab4944ad469d94f76a7bf5d2dba1e1 Mon Sep 17 00:00:00 2001
From: Saran Ahluwalia <Saran.S.Ahluwalia@omb.eop.gov>
Date: Wed, 29 Dec 2021 08:47:50 -0500
Subject: [PATCH] draft wip

---
 .../etl/sources/tree_equity_score/etl.py      | 51 ++++++++++++++++++-
 1 file changed, 49 insertions(+), 2 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py b/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py
index 4c93d911..4f662d8a 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py
@@ -5,8 +5,22 @@ from data_pipeline.utils import get_module_logger
 
 logger = get_module_logger(__name__)
 
-
+#
 class TreeEquityScoreETL(ExtractTransformLoad):
+    """
+    From the documentation:
+    Tree equity score methodology: https://www.treeequityscore.org/methodology/
+    A lower Tree Equity Score indicates a greater priority for closing the tree canopy gap
+    In order to estimate a general number of trees associated with an increase in tree
+    canopy, we utilize a basic multiplier of 600 sq-ft (55.74 sq-m) of canopy area
+    per urban tree assuming a medium-size urban tree crown width of 25-30 ft.
+    Sources:
+        1. Tree canopy cover. High resolution tree canopy where available,
+        the National Land Cover Database where it is not.
+        2. Census American Community Survey (ACS) 2018 5-year Block Group population estimates
+        3. Census ACS 2018 5-year city and block group Median Income estimates
+    """
+
     def __init__(self):
         self.TES_URL = "https://national-tes-data-share.s3.amazonaws.com/national_tes_share/"
         self.TES_CSV = self.TMP_PATH / "tes_2021_data.csv"
@@ -83,8 +97,41 @@ class TreeEquityScoreETL(ExtractTransformLoad):
             pd.concat(tes_state_dfs), crs=tes_state_dfs[0].crs
         )
 
+        # rename ID to Tract ID
+        self.df.rename(
+            columns={"geoid": self.GEOID_FIELD_NAME},
+            inplace=True,
+        )
+
     def load(self) -> None:
         logger.info("Saving Tree Equity Score GeoJSON")
         # write nationwide csv
         self.CSV_PATH.mkdir(parents=True, exist_ok=True)
-        self.df.to_file(self.CSV_PATH / "tes_conus.geojson", driver="GeoJSON")
+        self.df = self.df[
+            [
+                self.GEOID_FIELD_NAME,
+                "total_pop",  # Total Population according to ACS Estimates
+                "state",
+                "county",
+                "dep_ratio",  # dependent ratio
+                "child_perc",  # Children (Age 0 -17)
+                "seniorperc",  # Seniors (Age 65+) (ACS 2014 - 2018)
+                "treecanopy",  # Tree canopy cover
+                "area",  # Source: https://www.fs.fed.us/nrs/pubs/gtr/gtr_nrs200.pdf
+                "source",
+                "avg_temp",  # Average Temperature from USGS Earth Explorer
+                "ua_name",
+                "incorpname",
+                "congressio",  # Congressional District
+                "biome",
+                "bgpopdense",
+                "popadjust",  # Adjusted population estimate
+                "tc_gap",  # Tree canopy gap
+                "tc_goal",  # Tree canopy goal
+                "priority",  # Priority community according to the index
+                "tes",  # tree equity score
+                "tesctyscor",  # tree equity score for the county
+                "geometry",  # block group geometry coordinates
+            ]
+        ]
+        self.df.to_csv(self.CSV_PATH / "usa.csv", index=False)