From 81290ce672ca7cb707a1da7e3f0a4c77cdb77924 Mon Sep 17 00:00:00 2001 From: Rohit Musti Date: Mon, 26 Jul 2021 08:00:57 -0400 Subject: [PATCH] adding tree equity score to the data pipeline (#398) * adding tree equity score to the downloading pipeline so it can be easily compared as a reference index! * removed redundant dependencies --- data/data-pipeline/etl/runner.py | 5 +++ .../etl/sources/tree_equity_score/README.md | 3 ++ .../etl/sources/tree_equity_score/__init__.py | 0 .../etl/sources/tree_equity_score/etl.py | 42 +++++++++++++++++++ 4 files changed, 50 insertions(+) create mode 100644 data/data-pipeline/etl/sources/tree_equity_score/README.md create mode 100644 data/data-pipeline/etl/sources/tree_equity_score/__init__.py create mode 100644 data/data-pipeline/etl/sources/tree_equity_score/etl.py diff --git a/data/data-pipeline/etl/runner.py b/data/data-pipeline/etl/runner.py index 01ade7bc..f6ec09b4 100644 --- a/data/data-pipeline/etl/runner.py +++ b/data/data-pipeline/etl/runner.py @@ -16,6 +16,11 @@ def etl_runner(dataset_to_run: str = None) -> None: # this list comes from YAMLs dataset_list = [ + { + "name": "tree_equity_score", + "module_dir": "tree_equity_score", + "class_name": "TreeEquityScoreETL", + }, { "name": "census_acs", "module_dir": "census_acs", diff --git a/data/data-pipeline/etl/sources/tree_equity_score/README.md b/data/data-pipeline/etl/sources/tree_equity_score/README.md new file mode 100644 index 00000000..92c7bd9c --- /dev/null +++ b/data/data-pipeline/etl/sources/tree_equity_score/README.md @@ -0,0 +1,3 @@ +# Tree Equity Score + +The Tree Equity Score was built by American Forest to assess how equitably trees were planted in a city. More information, checkout [https://treeequityscore.org](https://treeequityscore.org). diff --git a/data/data-pipeline/etl/sources/tree_equity_score/__init__.py b/data/data-pipeline/etl/sources/tree_equity_score/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/etl/sources/tree_equity_score/etl.py b/data/data-pipeline/etl/sources/tree_equity_score/etl.py new file mode 100644 index 00000000..ce188d2b --- /dev/null +++ b/data/data-pipeline/etl/sources/tree_equity_score/etl.py @@ -0,0 +1,42 @@ +import pandas as pd +import geopandas as gpd + +from etl.base import ExtractTransformLoad +from utils import get_module_logger +import os + +logger = get_module_logger(__name__) + + +class TreeEquityScoreETL(ExtractTransformLoad): + def __init__(self): + self.TES_URL = "https://national-tes-data-share.s3.amazonaws.com/national_tes_share/" + self.TES_CSV = self.TMP_PATH / "tes_2021_data.csv" + self.CSV_PATH = self.DATA_PATH / "dataset" / "tree_equity_score" + self.df: gpd.GeoDataFrame + self.states = ["al", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", + "ga", "id", "il", "in", "ia", "ks", "ky", "la", "me", + "md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", + "nj", "nm", "ny", "nc", "nd", "oh", "ok", "or", "pa", + "ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy"] + + def extract(self) -> None: + logger.info(f"Downloading Tree Equity Score Data") + for state in self.states: + super().extract( + f"{self.TES_URL}{state}.zip.zip", + f"{self.TMP_PATH}/{state}", + ) + + def transform(self) -> None: + logger.info(f"Transforming Tree Equity Score Data") + tes_state_dfs = [] + for state in self.states: + tes_state_dfs.append(gpd.read_file(f"{self.TMP_PATH}/{state}/{state}.shp")) + self.df = gpd.GeoDataFrame(pd.concat(tes_state_dfs), crs=tes_state_dfs[0].crs) + + def load(self) -> None: + logger.info(f"Saving Tree Equity Score GeoJSON") + # write nationwide csv + self.CSV_PATH.mkdir(parents=True, exist_ok=True) + self.df.to_file(self.CSV_PATH / "tes_conus.geojson", driver='GeoJSON')