From aa68a43c84ab4944ad469d94f76a7bf5d2dba1e1 Mon Sep 17 00:00:00 2001 From: Saran Ahluwalia Date: Wed, 29 Dec 2021 08:47:50 -0500 Subject: [PATCH] draft wip --- .../etl/sources/tree_equity_score/etl.py | 51 ++++++++++++++++++- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py b/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py index 4c93d911..4f662d8a 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py @@ -5,8 +5,22 @@ from data_pipeline.utils import get_module_logger logger = get_module_logger(__name__) - +# class TreeEquityScoreETL(ExtractTransformLoad): + """ + From the documentation: + Tree equity score methodology: https://www.treeequityscore.org/methodology/ + A lower Tree Equity Score indicates a greater priority for closing the tree canopy gap + In order to estimate a general number of trees associated with an increase in tree + canopy, we utilize a basic multiplier of 600 sq-ft (55.74 sq-m) of canopy area + per urban tree assuming a medium-size urban tree crown width of 25-30 ft. + Sources: + 1. Tree canopy cover. High resolution tree canopy where available, + the National Land Cover Database where it is not. + 2. Census American Community Survey (ACS) 2018 5-year Block Group population estimates + 3. Census ACS 2018 5-year city and block group Median Income estimates + """ + def __init__(self): self.TES_URL = "https://national-tes-data-share.s3.amazonaws.com/national_tes_share/" self.TES_CSV = self.TMP_PATH / "tes_2021_data.csv" @@ -83,8 +97,41 @@ class TreeEquityScoreETL(ExtractTransformLoad): pd.concat(tes_state_dfs), crs=tes_state_dfs[0].crs ) + # rename ID to Tract ID + self.df.rename( + columns={"geoid": self.GEOID_FIELD_NAME}, + inplace=True, + ) + def load(self) -> None: logger.info("Saving Tree Equity Score GeoJSON") # write nationwide csv self.CSV_PATH.mkdir(parents=True, exist_ok=True) - self.df.to_file(self.CSV_PATH / "tes_conus.geojson", driver="GeoJSON") + self.df = self.df[ + [ + self.GEOID_FIELD_NAME, + "total_pop", # Total Population according to ACS Estimates + "state", + "county", + "dep_ratio", # dependent ratio + "child_perc", # Children (Age 0 -17) + "seniorperc", # Seniors (Age 65+) (ACS 2014 - 2018) + "treecanopy", # Tree canopy cover + "area", # Source: https://www.fs.fed.us/nrs/pubs/gtr/gtr_nrs200.pdf + "source", + "avg_temp", # Average Temperature from USGS Earth Explorer + "ua_name", + "incorpname", + "congressio", # Congressional District + "biome", + "bgpopdense", + "popadjust", # Adjusted population estimate + "tc_gap", # Tree canopy gap + "tc_goal", # Tree canopy goal + "priority", # Priority community according to the index + "tes", # tree equity score + "tesctyscor", # tree equity score for the county + "geometry", # block group geometry coordinates + ] + ] + self.df.to_csv(self.CSV_PATH / "usa.csv", index=False)