From e539db86ab4c031e2c3d35ef51ee0a49990865dd Mon Sep 17 00:00:00 2001
From: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
Date: Fri, 26 Aug 2022 13:11:51 -0400
Subject: [PATCH 1/2] tuple type

---
 data/data-pipeline/README.md                                 | 5 +++--
 .../data_pipeline/etl/sources/census_acs/etl_imputations.py  | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md
index 517c3ccb..3f46b22a 100644
--- a/data/data-pipeline/README.md
+++ b/data/data-pipeline/README.md
@@ -196,7 +196,7 @@ Here's a list of commands:
 
 ## Local development
 
-You can run the Python code locally without Docker to develop, using Poetry. However, to generate the census data you will need the [GDAL library](https://github.com/OSGeo/gdal) installed locally. Also to generate tiles for a local map, you will need [Mapbox tippecanoe](https://github.com/mapbox/tippecanoe). Please refer to the repos for specific instructions for your OS.
+You can run the Python code locally without Docker to develop, using Poetry. However, to generate the census data you will need the [GDAL library](https://github.com/OSGeo/gdal) installed locally. For score generation, you will need [libspatialindex](https://libspatialindex.org/en/latest/). And to generate tiles for a local map, you will need [Mapbox tippecanoe](https://github.com/mapbox/tippecanoe). Please refer to the repos for specific instructions for your OS.
 
 ### VSCode
 
@@ -218,6 +218,7 @@ To install the above-named executables:
 
 - gdal: `brew install gdal`
 - Tippecanoe: `brew install tippecanoe`
+- spatialindex: `brew install spatialindex`
 
 Note: For MacOS Monterey or M1 Macs, [you might need to follow these steps](https://stackoverflow.com/a/70880741) to install Scipy.
 
@@ -229,7 +230,7 @@ If you want to run tile generation, please install TippeCanoe [following these i
 
 - Start a terminal
 - Change to this directory (`/data/data-pipeline/`)
-- Make sure you have at least Python 3.7 installed: `python -V` or `python3 -V`
+- Make sure you have at least Python 3.8 installed: `python -V` or `python3 -V`
 - We use [Poetry](https://python-poetry.org/) for managing dependencies and building the application. Please follow the instructions on their site to download.
 - Install Poetry requirements with `poetry install`
 
diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
index 408a3341..22381477 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
@@ -1,4 +1,4 @@
-from typing import List, NamedTuple
+from typing import List, NamedTuple, Tuple
 import pandas as pd
 import geopandas as gpd
 
@@ -41,7 +41,7 @@ def _prepare_dataframe_for_imputation(
     impute_var_named_tup_list: List[NamedTuple],
     geo_df: gpd.GeoDataFrame,
     geoid_field: str = "GEOID10_TRACT",
-) -> tuple[list, gpd.GeoDataFrame]:
+) -> Tuple[list, gpd.GeoDataFrame]:
     imputing_cols = [
         impute_var_pair.raw_field_name
         for impute_var_pair in impute_var_named_tup_list

From 1c4d3e4142d83bf86cec32b307c8d33b833549a3 Mon Sep 17 00:00:00 2001
From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com>
Date: Fri, 26 Aug 2022 15:23:20 -0400
Subject: [PATCH 2/2] Score tests (#1847)

* update Python version on README; tuple typing fix

* Alaska tribal points fix (#1821)

* Bump mistune from 0.8.4 to 2.0.3 in /data/data-pipeline (#1777)

Bumps [mistune](https://github.com/lepture/mistune) from 0.8.4 to 2.0.3.
- [Release notes](https://github.com/lepture/mistune/releases)
- [Changelog](https://github.com/lepture/mistune/blob/master/docs/changes.rst)
- [Commits](https://github.com/lepture/mistune/compare/v0.8.4...v2.0.3)

---
updated-dependencies:
- dependency-name: mistune
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* poetry update

* initial pass of score tests

* add threshold tests

* added ses threshold (not donut, not island)

* testing suite -- stopping for the day

* added test for lead proxy indicator

* Refactor score tests to make them less verbose and more direct (#1865)

* Cleanup tests slightly before refactor (#1846)

* Refactor score calculations tests

* Feedback from review

* Refactor output tests like calculatoin tests (#1846) (#1870)

* Reorganize files (#1846)

* Switch from lru_cache to fixture scorpes (#1846)

* Add tests for all factors (#1846)

* Mark smoketests and run as part of be deply (#1846)

* Update renamed var (#1846)

* Switch from named tuple to dataclass (#1846)

This is annoying, but pylint in python3.8 was crashing parsing the named
tuple. We weren't using any namedtuple-specific features, so I made the
type a dataclass just to get pylint to behave.

* Add default timout to requests (#1846)

* Fix type (#1846)

* Fix merge mistake on poetry.lock (#1846)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Matt Bowen <83967628+mattbowen-usds@users.noreply.github.com>
Co-authored-by: matt bowen <matthew.r.bowen@omb.eop.gov>
---
 .github/workflows/deploy_be_staging.yml       |   3 +
 data/data-pipeline/data_pipeline/config.py    |   2 +-
 .../data_pipeline/etl/score/etl_score.py      |  11 +-
 .../data_pipeline/etl/score/etl_score_geo.py  |   2 +-
 .../etl/sources/census_acs/etl_imputations.py |   4 +-
 .../sources/census_acs_median_income/etl.py   |  12 +-
 .../etl/sources/census_decennial/etl.py       |   4 +-
 .../etl/sources/hud_recap/etl.py              |   7 +-
 .../compare_tiles_and_geoJson_files.ipynb     | 354 +++++++++++++
 .../ipython/geojson_compare_tiles.ipynb       | 496 ++++++++++++++++++
 .../data_pipeline/score/field_names.py        |   4 +-
 .../data_pipeline/score/score_narwhal.py      |  23 +-
 .../data_pipeline/tests/conftest.py           |  13 +
 .../data_pipeline/tests/score/fixtures.py     |  12 +
 .../tests/score/test_calculation.py           | 291 ++++++++++
 .../data_pipeline/tests/score/test_output.py  | 205 ++++++++
 .../data_pipeline/tile/generate.py            |   5 +-
 data/data-pipeline/data_pipeline/utils.py     |   4 +-
 data/data-pipeline/pytest.ini                 |   2 +
 19 files changed, 1425 insertions(+), 29 deletions(-)
 create mode 100644 data/data-pipeline/data_pipeline/ipython/compare_tiles_and_geoJson_files.ipynb
 create mode 100644 data/data-pipeline/data_pipeline/ipython/geojson_compare_tiles.ipynb
 create mode 100644 data/data-pipeline/data_pipeline/tests/score/fixtures.py
 create mode 100644 data/data-pipeline/data_pipeline/tests/score/test_calculation.py
 create mode 100644 data/data-pipeline/data_pipeline/tests/score/test_output.py

diff --git a/.github/workflows/deploy_be_staging.yml b/.github/workflows/deploy_be_staging.yml
index fd324c73..8a10cf38 100644
--- a/.github/workflows/deploy_be_staging.yml
+++ b/.github/workflows/deploy_be_staging.yml
@@ -62,6 +62,9 @@ jobs:
       - name: Generate Score Post
         run: |
           poetry run python3 data_pipeline/application.py generate-score-post -s aws
+      - name: Run Smoketests
+        run: |
+          poetry run pytest data_pipeline/ -m smoketest
       - name: Deploy Score to Geoplatform AWS
         run: |
           poetry run s4cmd put ./data_pipeline/data/score/csv/ s3://justice40-data/data-pipeline-staging/${{env.PR_NUMBER}}/${{env.SHA_NUMBER}}/data/score/csv --recursive --force --API-ACL=public-read
diff --git a/data/data-pipeline/data_pipeline/config.py b/data/data-pipeline/data_pipeline/config.py
index c32389ca..23e550a8 100644
--- a/data/data-pipeline/data_pipeline/config.py
+++ b/data/data-pipeline/data_pipeline/config.py
@@ -12,7 +12,7 @@ settings = Dynaconf(
 
 # set root dir
 settings.APP_ROOT = pathlib.Path(data_pipeline.__file__).resolve().parent
-
+settings.REQUESTS_DEFAULT_TIMOUT = 3600
 # To set an environment use:
 # Linux/OSX: export ENV_FOR_DYNACONF=staging
 # Windows: set ENV_FOR_DYNACONF=staging
diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
index 663dc8d7..cfcd123d 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -1,5 +1,5 @@
 import functools
-from collections import namedtuple
+from dataclasses import dataclass
 
 import numpy as np
 import pandas as pd
@@ -496,10 +496,11 @@ class ScoreETL(ExtractTransformLoad):
         # >= some threshold.
         # TODO: Add more fields here.
         #  https://github.com/usds/justice40-tool/issues/970
-        ReversePercentile = namedtuple(
-            typename="ReversePercentile",
-            field_names=["field_name", "low_field_name"],
-        )
+        @dataclass
+        class ReversePercentile:
+            field_name: str
+            low_field_name: str
+
         reverse_percentiles = [
             # This dictionary follows the format:
             # <field name> : <field name for low values>
diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
index 14f72ad2..4ad3cb58 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py
@@ -51,7 +51,7 @@ class GeoScoreETL(ExtractTransformLoad):
 
         ## TODO: We really should not have this any longer changing
         self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
-            field_names.SCORE_N
+            field_names.FINAL_SCORE_N_BOOLEAN
         ]
         self.TARGET_SCORE_RENAME_TO = "SCORE"
 
diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
index 22381477..17180026 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl_imputations.py
@@ -1,4 +1,4 @@
-from typing import List, NamedTuple, Tuple
+from typing import Any, List, NamedTuple, Tuple
 import pandas as pd
 import geopandas as gpd
 
@@ -41,7 +41,7 @@ def _prepare_dataframe_for_imputation(
     impute_var_named_tup_list: List[NamedTuple],
     geo_df: gpd.GeoDataFrame,
     geoid_field: str = "GEOID10_TRACT",
-) -> Tuple[list, gpd.GeoDataFrame]:
+) -> Tuple[Any, gpd.GeoDataFrame]:
     imputing_cols = [
         impute_var_pair.raw_field_name
         for impute_var_pair in impute_var_named_tup_list
diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py
index 32325842..a39f8891 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py
@@ -282,12 +282,20 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
 
         # Download MSA median incomes
         logger.info("Starting download of MSA median incomes.")
-        download = requests.get(self.MSA_MEDIAN_INCOME_URL, verify=None)
+        download = requests.get(
+            self.MSA_MEDIAN_INCOME_URL,
+            verify=None,
+            timeout=settings.REQUESTS_DEFAULT_TIMOUT,
+        )
         self.msa_median_incomes = json.loads(download.content)
 
         # Download state median incomes
         logger.info("Starting download of state median incomes.")
-        download_state = requests.get(self.STATE_MEDIAN_INCOME_URL, verify=None)
+        download_state = requests.get(
+            self.STATE_MEDIAN_INCOME_URL,
+            verify=None,
+            timeout=settings.REQUESTS_DEFAULT_TIMOUT,
+        )
         self.state_median_incomes = json.loads(download_state.content)
         ## NOTE we already have PR's MI here
 
diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py
index 56aa4745..ea503f62 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py
@@ -7,6 +7,7 @@ import pandas as pd
 from data_pipeline.etl.base import ExtractTransformLoad
 from data_pipeline.utils import get_module_logger
 from data_pipeline.score import field_names
+from data_pipeline.config import settings
 
 pd.options.mode.chained_assignment = "raise"
 
@@ -270,7 +271,8 @@ class CensusDecennialETL(ExtractTransformLoad):
                         island["var_list"],
                         island["fips"],
                         county,
-                    )
+                    ),
+                    timeout=settings.REQUESTS_DEFAULT_TIMOUT,
                 )
 
                 df = json.loads(download.content)
diff --git a/data/data-pipeline/data_pipeline/etl/sources/hud_recap/etl.py b/data/data-pipeline/data_pipeline/etl/sources/hud_recap/etl.py
index c5f6ce63..cf611137 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/hud_recap/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/hud_recap/etl.py
@@ -3,6 +3,7 @@ import requests
 
 from data_pipeline.etl.base import ExtractTransformLoad
 from data_pipeline.utils import get_module_logger
+from data_pipeline.config import settings
 
 logger = get_module_logger(__name__)
 
@@ -26,7 +27,11 @@ class HudRecapETL(ExtractTransformLoad):
 
     def extract(self) -> None:
         logger.info("Downloading HUD Recap Data")
-        download = requests.get(self.HUD_RECAP_CSV_URL, verify=None)
+        download = requests.get(
+            self.HUD_RECAP_CSV_URL,
+            verify=None,
+            timeout=settings.REQUESTS_DEFAULT_TIMOUT,
+        )
         file_contents = download.content
         csv_file = open(self.HUD_RECAP_CSV, "wb")
         csv_file.write(file_contents)
diff --git a/data/data-pipeline/data_pipeline/ipython/compare_tiles_and_geoJson_files.ipynb b/data/data-pipeline/data_pipeline/ipython/compare_tiles_and_geoJson_files.ipynb
new file mode 100644
index 00000000..f3585578
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/ipython/compare_tiles_and_geoJson_files.ipynb
@@ -0,0 +1,354 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "c9fab286",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %load_ext lab_black\n",
+    "import json\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "dbd84e10",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "DriverError",
+     "evalue": "/mnt/e/opt/justice40-tool/data/data-pipeline/data_pipeline/data/score/csv/tiles/usa.csv: No such file or directory",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mCPLE_OpenFailedError\u001b[0m                      Traceback (most recent call last)",
+      "\u001b[0;32mfiona/_shim.pyx\u001b[0m in \u001b[0;36mfiona._shim.gdal_open_vector\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mfiona/_err.pyx\u001b[0m in \u001b[0;36mfiona._err.exc_wrap_pointer\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mCPLE_OpenFailedError\u001b[0m: /mnt/e/opt/justice40-tool/data/data-pipeline/data_pipeline/data/score/csv/tiles/usa.csv: No such file or directory",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mDriverError\u001b[0m                               Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_10603/1449522338.py\u001b[0m in \u001b[0;36m<cell line: 3>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Read in the score geojson file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mdata_pipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0metl\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconstants\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mDATA_SCORE_CSV_TILES_FILE_PATH\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnation\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDATA_SCORE_CSV_TILES_FILE_PATH\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/.cache/pypoetry/virtualenvs/data-pipeline-WziHKidv-py3.8/lib/python3.8/site-packages/geopandas/io/file.py\u001b[0m in \u001b[0;36m_read_file\u001b[0;34m(filename, bbox, mask, rows, **kwargs)\u001b[0m\n\u001b[1;32m    158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    159\u001b[0m     \u001b[0;32mwith\u001b[0m \u001b[0mfiona_env\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 160\u001b[0;31m         \u001b[0;32mwith\u001b[0m \u001b[0mreader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath_or_bytes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfeatures\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    161\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    162\u001b[0m             \u001b[0;31m# In a future Fiona release the crs attribute of features will\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.cache/pypoetry/virtualenvs/data-pipeline-WziHKidv-py3.8/lib/python3.8/site-packages/fiona/env.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    406\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    407\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mlocal\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_env\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 408\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    409\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    410\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.cache/pypoetry/virtualenvs/data-pipeline-WziHKidv-py3.8/lib/python3.8/site-packages/fiona/__init__.py\u001b[0m in \u001b[0;36mopen\u001b[0;34m(fp, mode, driver, schema, crs, encoding, layer, vfs, enabled_drivers, crs_wkt, **kwargs)\u001b[0m\n\u001b[1;32m    262\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    263\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'r'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 264\u001b[0;31m             c = Collection(path, mode, driver=driver, encoding=encoding,\n\u001b[0m\u001b[1;32m    265\u001b[0m                            layer=layer, enabled_drivers=enabled_drivers, **kwargs)\n\u001b[1;32m    266\u001b[0m         \u001b[0;32melif\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.cache/pypoetry/virtualenvs/data-pipeline-WziHKidv-py3.8/lib/python3.8/site-packages/fiona/collection.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, path, mode, driver, schema, crs, encoding, layer, vsi, archive, enabled_drivers, crs_wkt, ignore_fields, ignore_geometry, **kwargs)\u001b[0m\n\u001b[1;32m    160\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'r'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    161\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 162\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    163\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'w'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    164\u001b[0m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mWritingSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32mfiona/ogrext.pyx\u001b[0m in \u001b[0;36mfiona.ogrext.Session.start\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;32mfiona/_shim.pyx\u001b[0m in \u001b[0;36mfiona._shim.gdal_open_vector\u001b[0;34m()\u001b[0m\n",
+      "\u001b[0;31mDriverError\u001b[0m: /mnt/e/opt/justice40-tool/data/data-pipeline/data_pipeline/data/score/csv/tiles/usa.csv: No such file or directory"
+     ]
+    }
+   ],
+   "source": [
+    "# Read in the score geojson file\n",
+    "from data_pipeline.etl.score.constants import DATA_SCORE_CSV_TILES_FILE_PATH\n",
+    "nation = gpd.read_file(DATA_SCORE_CSV_TILES_FILE_PATH)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f850529",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5f342d36",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# get the columns of the df and sort the list:\n",
+    "sorted_nation = sorted(nation.columns.to_list())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "97aac08f",
+   "metadata": {},
+   "source": [
+    "CLI to covert a pbf into a json file (requires tippecannoe and jq to be installed)\n",
+    "\n",
+    "```bash\n",
+    "curl https://justice40-data.s3.amazonaws.com/data-pipeline-staging/1822/e6385c172f1d2adf588050375b7c0985035cfb24/data/score/tiles/high/8/67/101.pbf -o uh-1822-e638-8-67-101.pbf | tippecanoe-decode uh-1822-e638-8-67-101.pbf 8 67 101 | jq > cat uh-1822-e638-8-67-101.json\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cbe37ccb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load a random high-tile json (after decoding a pbf) file using json.loads()\n",
+    "with open(\"/Users/vims/Downloads/uh-1822-e638-8-67-101.json\", \"r\") as f:\n",
+    "    random_tile_features = json.loads(f.read())\n",
+    "\n",
+    "# Flatten data around the features key:\n",
+    "flatten_features = pd.json_normalize(random_tile_features, record_path=[\"features\"])\n",
+    "\n",
+    "# index into the feature properties, get keys and turn into a sorted list\n",
+    "random_tile = sorted(list(flatten_features[\"features\"][0][0][\"properties\"].keys()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a33f5126",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "set_dif = set(sorted_nation).symmetric_difference(set(random_tile))\n",
+    "list(set_dif)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d228360b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b6925138",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f2d7ba0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>GEOID10</th>\n",
+       "      <th>SF</th>\n",
+       "      <th>CF</th>\n",
+       "      <th>HRS_ET</th>\n",
+       "      <th>AML_ET</th>\n",
+       "      <th>FUDS_ET</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>71</th>\n",
+       "      <td>27061480300</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Itasca County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75</th>\n",
+       "      <td>27061940000</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Itasca County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>115</th>\n",
+       "      <td>27077460400</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Lake of the Woods County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>127</th>\n",
+       "      <td>27123042001</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Ramsey County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>160</th>\n",
+       "      <td>27123033400</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Ramsey County</td>\n",
+       "      <td>0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74047</th>\n",
+       "      <td>16055000200</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Kootenai County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74068</th>\n",
+       "      <td>16011950500</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Bingham County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74076</th>\n",
+       "      <td>16001010503</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Ada County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74107</th>\n",
+       "      <td>16001001000</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Ada County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74123</th>\n",
+       "      <td>16001002100</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Ada County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>3170 rows × 6 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           GEOID10         SF                        CF HRS_ET AML_ET FUDS_ET\n",
+       "71     27061480300  Minnesota             Itasca County   None   None       0\n",
+       "75     27061940000  Minnesota             Itasca County   None   None       0\n",
+       "115    27077460400  Minnesota  Lake of the Woods County   None   None       0\n",
+       "127    27123042001  Minnesota             Ramsey County   None   None       0\n",
+       "160    27123033400  Minnesota             Ramsey County      0   None       0\n",
+       "...            ...        ...                       ...    ...    ...     ...\n",
+       "74047  16055000200      Idaho           Kootenai County   None   None       0\n",
+       "74068  16011950500      Idaho            Bingham County   None   None       0\n",
+       "74076  16001010503      Idaho                Ada County   None   None       0\n",
+       "74107  16001001000      Idaho                Ada County   None   None       0\n",
+       "74123  16001002100      Idaho                Ada County   None   None       0\n",
+       "\n",
+       "[3170 rows x 6 columns]"
+      ]
+     },
+     "execution_count": 75,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_HRS_GEO = nation[['GEOID10', 'SF', 'CF', 'HRS_ET', 'AML_ET', 'FUDS_ET']]\n",
+    "nation_HRS_GEO.loc[nation_HRS_GEO['FUDS_ET'] == '0']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02eef4b5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "678bea72",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([None, '0', '1'], dtype=object)"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation['HRS_ET'].unique()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.8.10 ('data-pipeline-WziHKidv-py3.8')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "c28609757c27a373a12dad8bc3a2aec46aa91130799a09665fba7d386f9c3756"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/data/data-pipeline/data_pipeline/ipython/geojson_compare_tiles.ipynb b/data/data-pipeline/data_pipeline/ipython/geojson_compare_tiles.ipynb
new file mode 100644
index 00000000..f134f9a6
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/ipython/geojson_compare_tiles.ipynb
@@ -0,0 +1,496 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "27da604f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %load_ext lab_black\n",
+    "import json\n",
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "\n",
+    "# Read in the above json file\n",
+    "nation=gpd.read_file(\"/Users/vims/Downloads/usa-high-1822-637b.json\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "7b7083fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0        None\n",
+       "1        None\n",
+       "2        None\n",
+       "3        None\n",
+       "4        None\n",
+       "         ... \n",
+       "74129    None\n",
+       "74130    None\n",
+       "74131    None\n",
+       "74132    None\n",
+       "74133    None\n",
+       "Name: FUDS_RAW, Length: 74134, dtype: object"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation['FUDS_RAW']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "117477e6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>GEOID10</th>\n",
+       "      <th>SF</th>\n",
+       "      <th>CF</th>\n",
+       "      <th>HRS_ET</th>\n",
+       "      <th>AML_ET</th>\n",
+       "      <th>AML_RAW</th>\n",
+       "      <th>FUDS_ET</th>\n",
+       "      <th>FUDS_RAW</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>27139080202</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Scott County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>27139080204</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Scott County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>27139080100</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Scott County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>27139080302</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Scott County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>27139080400</td>\n",
+       "      <td>Minnesota</td>\n",
+       "      <td>Scott County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74129</th>\n",
+       "      <td>16005001601</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Bannock County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74130</th>\n",
+       "      <td>16005001300</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Bannock County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74131</th>\n",
+       "      <td>16005001000</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Bannock County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74132</th>\n",
+       "      <td>16005000900</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Bannock County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>74133</th>\n",
+       "      <td>16005000800</td>\n",
+       "      <td>Idaho</td>\n",
+       "      <td>Bannock County</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "      <td>False</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>74134 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           GEOID10         SF              CF HRS_ET  AML_ET AML_RAW  FUDS_ET  \\\n",
+       "0      27139080202  Minnesota    Scott County   None   False    None    False   \n",
+       "1      27139080204  Minnesota    Scott County   None   False    None    False   \n",
+       "2      27139080100  Minnesota    Scott County   None   False    None    False   \n",
+       "3      27139080302  Minnesota    Scott County   None   False    None    False   \n",
+       "4      27139080400  Minnesota    Scott County   None   False    None    False   \n",
+       "...            ...        ...             ...    ...     ...     ...      ...   \n",
+       "74129  16005001601      Idaho  Bannock County   None   False    None    False   \n",
+       "74130  16005001300      Idaho  Bannock County   None   False    None    False   \n",
+       "74131  16005001000      Idaho  Bannock County   None   False    None    False   \n",
+       "74132  16005000900      Idaho  Bannock County   None   False    None    False   \n",
+       "74133  16005000800      Idaho  Bannock County   None   False    None    False   \n",
+       "\n",
+       "      FUDS_RAW  \n",
+       "0         None  \n",
+       "1         None  \n",
+       "2         None  \n",
+       "3         None  \n",
+       "4         None  \n",
+       "...        ...  \n",
+       "74129     None  \n",
+       "74130     None  \n",
+       "74131     None  \n",
+       "74132     None  \n",
+       "74133     None  \n",
+       "\n",
+       "[74134 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind = nation[['GEOID10', 'SF', 'CF', 'HRS_ET', 'AML_ET', 'AML_RAW','FUDS_ET', 'FUDS_RAW']]\n",
+    "nation_new_ind"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "id": "0f37acf4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([None, '0', '1'], dtype=object)"
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['HRS_ET'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "id": "4ae865ae",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    8843\n",
+       "1    4045\n",
+       "Name: HRS_ET, dtype: int64"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['HRS_ET'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "id": "2f0d29db",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([False,  True])"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['AML_ET'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "id": "646b3754",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False    72100\n",
+       "True      2034\n",
+       "Name: AML_ET, dtype: int64"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['AML_ET'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "0571df6d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([None, '1'], dtype=object)"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['AML_RAW'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "171fa3c9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1    2034\n",
+       "Name: AML_RAW, dtype: int64"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['AML_RAW'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "370b0769",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([False,  True])"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['FUDS_ET'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "f8afb668",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False    72056\n",
+       "True      2078\n",
+       "Name: FUDS_ET, dtype: int64"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['FUDS_ET'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "id": "f2e3b78a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([None, '0', '1'], dtype=object)"
+      ]
+     },
+     "execution_count": 63,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['FUDS_RAW'].unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "id": "b722e802",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    3170\n",
+       "1    2078\n",
+       "Name: FUDS_RAW, dtype: int64"
+      ]
+     },
+     "execution_count": 64,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nation_new_ind['FUDS_RAW'].value_counts()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py
index 3a721f60..fc68ebbb 100644
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@@ -10,7 +10,9 @@ STATE_FIELD = "State/Territory"
 COUNTY_FIELD = "County Name"
 
 # Definition Narwhal fields
-SCORE_N = "Definition N (communities)"
+FINAL_SCORE_N_BOOLEAN = (
+    "Definition M community, including adjacency index tracts"
+)
 SCORE_N_COMMUNITIES = "Definition N (communities)"
 N_CLIMATE = "Climate Factor (Definition N)"
 N_ENERGY = "Energy Factor (Definition N)"
diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py
index 5fb2923c..66fb3251 100644
--- a/data/data-pipeline/data_pipeline/score/score_narwhal.py
+++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py
@@ -14,20 +14,17 @@ logger = get_module_logger(__name__)
 class ScoreNarwhal(Score):
     """Very similar to Score M, at present."""
 
-    def __init__(self, df: pd.DataFrame) -> None:
-        self.LOW_INCOME_THRESHOLD: float = 0.65
-        self.MAX_COLLEGE_ATTENDANCE_THRESHOLD: float = 0.20
-        self.ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
-        self.MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
-        self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD: float = 0.10
+    LOW_INCOME_THRESHOLD: float = 0.65
+    MAX_COLLEGE_ATTENDANCE_THRESHOLD: float = 0.20
+    ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
+    MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
+    LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD: float = 0.10
 
-        # We define a donut hole DAC as a tract that is entirely surrounded by
-        # DACs (score threshold = 1) and above median for low income, as a starting
-        # point. As we ground-truth, these thresholds might change.
-        self.LOW_INCOME_THRESHOLD_DONUT: float = 0.50
-        self.SCORE_THRESHOLD_DONUT: float = 1.00
-
-        super().__init__(df)
+    # We define a donut hole DAC as a tract that is entirely surrounded by
+    # DACs (score threshold = 1) and above median for low income, as a starting
+    # point. As we ground-truth, these thresholds might change.
+    LOW_INCOME_THRESHOLD_DONUT: float = 0.50
+    SCORE_THRESHOLD_DONUT: float = 1.00
 
     def _combine_island_areas_with_states_and_set_thresholds(
         self,
diff --git a/data/data-pipeline/data_pipeline/tests/conftest.py b/data/data-pipeline/data_pipeline/tests/conftest.py
index f1dc63ac..6fb3d138 100644
--- a/data/data-pipeline/data_pipeline/tests/conftest.py
+++ b/data/data-pipeline/data_pipeline/tests/conftest.py
@@ -52,3 +52,16 @@ def mock_etl(monkeypatch, mock_paths) -> None:
     data_path, tmp_path = mock_paths
     monkeypatch.setattr(ExtractTransformLoad, "DATA_PATH", data_path)
     monkeypatch.setattr(ExtractTransformLoad, "TMP_PATH", tmp_path)
+
+
+def pytest_collection_modifyitems(config, items):
+    keywordexpr = config.option.keyword
+    markexpr = config.option.markexpr
+    if keywordexpr or markexpr:
+        return  # let pytest handle this
+
+    smoketest = "smoketest"
+    skip_mymarker = pytest.mark.skip(reason=f"{smoketest} not selected")
+    for item in items:
+        if smoketest in item.keywords:
+            item.add_marker(skip_mymarker)
diff --git a/data/data-pipeline/data_pipeline/tests/score/fixtures.py b/data/data-pipeline/data_pipeline/tests/score/fixtures.py
new file mode 100644
index 00000000..5a819da0
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/score/fixtures.py
@@ -0,0 +1,12 @@
+import pandas as pd
+import pytest
+from data_pipeline.config import settings
+from data_pipeline.score import field_names
+
+
+@pytest.fixture(scope="session")
+def final_score_df():
+    return pd.read_csv(
+        settings.APP_ROOT / "data" / "score" / "csv" / "full" / "usa.csv",
+        dtype={field_names.GEOID_TRACT_FIELD: str},
+    )
diff --git a/data/data-pipeline/data_pipeline/tests/score/test_calculation.py b/data/data-pipeline/data_pipeline/tests/score/test_calculation.py
new file mode 100644
index 00000000..783474e4
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/score/test_calculation.py
@@ -0,0 +1,291 @@
+# flake8: noqa: W0613,W0611,F811
+from dataclasses import dataclass
+import pytest
+from data_pipeline.score import field_names
+from data_pipeline.utils import get_module_logger
+from data_pipeline.score.score_narwhal import ScoreNarwhal
+from .fixtures import final_score_df  # pylint: disable=unused-import
+
+logger = get_module_logger(__name__)
+
+pytestmark = pytest.mark.smoketest
+
+
+@dataclass
+class PercentileTestConfig:
+    percentile_column_name: str
+    threshold_column_name: str
+    threshold: float
+    percentile_column_need_suffix: bool = True
+
+    @property
+    def full_percentile_column_name(self):
+        if self.percentile_column_need_suffix:
+            return (
+                self.percentile_column_name
+                + field_names.PERCENTILE_FIELD_SUFFIX
+            )
+        return self.percentile_column_name
+
+
+### TODO: we need to blow this out for all eight categories
+def _check_percentile_against_threshold(df, config: PercentileTestConfig):
+    """Note - for the purpose of testing, this fills with False"""
+    is_minimum_flagged_ok = (
+        df[df[config.threshold_column_name].fillna(False)][
+            config.full_percentile_column_name
+        ].min()
+        >= config.threshold
+    )
+
+    is_maximum_not_flagged_ok = (
+        df[~df[config.threshold_column_name].fillna(False)][
+            config.full_percentile_column_name
+        ].max()
+        < config.threshold
+    )
+    errors = []
+    if not is_minimum_flagged_ok:
+        errors.append(
+            f"For column {config.threshold_column_name}, there is someone flagged below {config.threshold} percentile!"
+        )
+    if not is_maximum_not_flagged_ok:
+        errors.append(
+            f"For column {config.threshold_column_name}, there is someone not flagged above {config.threshold} percentile!"
+        )
+    return errors
+
+
+def test_percentile_columns(final_score_df):
+    low_income = PercentileTestConfig(
+        field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
+        field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED,
+        ScoreNarwhal.LOW_INCOME_THRESHOLD,
+    )
+    population_loss = PercentileTestConfig(
+        field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,
+        field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    agricultural_loss = PercentileTestConfig(
+        field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
+        field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    building_loss = PercentileTestConfig(
+        field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
+        field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    flood = PercentileTestConfig(
+        field_names.FUTURE_FLOOD_RISK_FIELD,
+        field_names.HIGH_FUTURE_FLOOD_RISK_FIELD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    wildfire = PercentileTestConfig(
+        field_names.FUTURE_WILDFIRE_RISK_FIELD,
+        field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    low_high_school = PercentileTestConfig(
+        field_names.HIGH_SCHOOL_ED_FIELD,
+        field_names.LOW_HS_EDUCATION_FIELD,
+        ScoreNarwhal.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD,
+        percentile_column_need_suffix=False,
+    )
+    donut_hole_income = PercentileTestConfig(
+        field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
+        field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED_DONUTS,
+        ScoreNarwhal.LOW_INCOME_THRESHOLD_DONUT,
+    )
+    donut_hole_adjacency = PercentileTestConfig(
+        (field_names.SCORE_N_COMMUNITIES + field_names.ADJACENCY_INDEX_SUFFIX),
+        field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD,
+        ScoreNarwhal.SCORE_THRESHOLD_DONUT,
+        percentile_column_need_suffix=False,
+    )
+    diesel = PercentileTestConfig(
+        field_names.DIESEL_FIELD,
+        field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    dot_burden = PercentileTestConfig(
+        field_names.DOT_TRAVEL_BURDEN_FIELD,
+        field_names.DOT_BURDEN_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    traffic_proximity = PercentileTestConfig(
+        field_names.TRAFFIC_FIELD,
+        field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    energy_burden = PercentileTestConfig(
+        field_names.ENERGY_BURDEN_FIELD,
+        field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    pm25 = PercentileTestConfig(
+        field_names.PM25_FIELD,
+        field_names.PM25_EXCEEDS_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    kitchen_plumbing = PercentileTestConfig(
+        field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD,
+        field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    # Leadpaint is handled below in a separate method
+    housing = PercentileTestConfig(
+        field_names.HOUSING_BURDEN_FIELD,
+        field_names.HOUSING_BURDEN_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    non_natural_space = PercentileTestConfig(
+        field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME,
+        field_names.NON_NATURAL_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    rmp = PercentileTestConfig(
+        field_names.RMP_FIELD,
+        field_names.RMP_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    npl = PercentileTestConfig(
+        field_names.NPL_FIELD,
+        field_names.NPL_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    tsdf = PercentileTestConfig(
+        field_names.TSDF_FIELD,
+        field_names.TSDF_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    wastewater = PercentileTestConfig(
+        field_names.WASTEWATER_FIELD,
+        field_names.WASTEWATER_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    ust = PercentileTestConfig(
+        field_names.UST_FIELD,
+        field_names.UST_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    diabetes = PercentileTestConfig(
+        field_names.DIABETES_FIELD,
+        field_names.DIABETES_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    asthma = PercentileTestConfig(
+        field_names.ASTHMA_FIELD,
+        field_names.ASTHMA_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    heart_disease = PercentileTestConfig(
+        field_names.HEART_DISEASE_FIELD,
+        field_names.HEART_DISEASE_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    low_life_expectancy = PercentileTestConfig(
+        field_names.LOW_LIFE_EXPECTANCY_FIELD,
+        field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    unemployment = PercentileTestConfig(
+        field_names.UNEMPLOYMENT_FIELD,
+        field_names.UNEMPLOYMENT_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    low_median_income = PercentileTestConfig(
+        field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD,
+        field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    linguist_isolation = PercentileTestConfig(
+        field_names.LINGUISTIC_ISO_FIELD,
+        field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    poverty = PercentileTestConfig(
+        field_names.POVERTY_LESS_THAN_100_FPL_FIELD,
+        field_names.POVERTY_PCTILE_THRESHOLD,
+        ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD,
+    )
+    errors = []
+    for threshhold_config in (
+        low_income,
+        population_loss,
+        agricultural_loss,
+        building_loss,
+        flood,
+        wildfire,
+        low_high_school,
+        donut_hole_income,
+        donut_hole_adjacency,
+        dot_burden,
+        diesel,
+        traffic_proximity,
+        energy_burden,
+        pm25,
+        kitchen_plumbing,
+        housing,
+        non_natural_space,
+        rmp,
+        npl,
+        tsdf,
+        wastewater,
+        ust,
+        diabetes,
+        asthma,
+        heart_disease,
+        low_life_expectancy,
+        unemployment,
+        low_median_income,
+        linguist_isolation,
+        poverty,
+    ):
+        errors.extend(
+            _check_percentile_against_threshold(
+                final_score_df, threshhold_config
+            )
+        )
+    error_text = "\n".join(errors)
+    assert not errors, error_text
+
+
+def test_lead_paint_indicator(
+    final_score_df,
+):
+    """We need special logic here because this is a combined threshold, so we need this test to have two parts.
+
+    1. We construct our own threshold columns
+    2. We make sure it's the same as the threshold column in the dataframe
+    """
+    lead_pfs = (
+        field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
+    )
+    home_val_pfs = (
+        field_names.MEDIAN_HOUSE_VALUE_FIELD
+        + field_names.PERCENTILE_FIELD_SUFFIX
+    )
+    combined_proxy_boolean = field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD
+
+    tmp_lead_threshold = (
+        final_score_df[lead_pfs] >= ScoreNarwhal.ENVIRONMENTAL_BURDEN_THRESHOLD
+    )
+    tmp_mhv_threshold = (
+        final_score_df[home_val_pfs]
+        <= ScoreNarwhal.MEDIAN_HOUSE_VALUE_THRESHOLD
+    )
+
+    true_combined_proxy = tmp_lead_threshold & tmp_mhv_threshold
+
+    assert (
+        tmp_mhv_threshold.sum() > 0
+    ), "MHV threshold alone does not capture any homes"
+
+    assert final_score_df[combined_proxy_boolean].equals(
+        true_combined_proxy
+    ), "Lead proxy calculated improperly"
+    assert (
+        tmp_lead_threshold.sum() > true_combined_proxy.sum()
+    ), "House value is not further limiting this proxy"
diff --git a/data/data-pipeline/data_pipeline/tests/score/test_output.py b/data/data-pipeline/data_pipeline/tests/score/test_output.py
new file mode 100644
index 00000000..70e95be4
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/score/test_output.py
@@ -0,0 +1,205 @@
+# flake8: noqa: W0613,W0611,F811
+from dataclasses import dataclass
+from typing import List
+import pytest
+import pandas as pd
+from data_pipeline.score import field_names
+from .fixtures import final_score_df  # pylint: disable=unused-import
+
+pytestmark = pytest.mark.smoketest
+
+
+def _helper_test_count_exceeding_threshold(df, col, error_check=1000):
+    """Fills NA with False"""
+    return df[df[col].fillna(False)].shape[0] >= error_check
+
+
+def _helper_single_threshold_test(df, col, socioeconomic_column, score_column):
+    """Note that this fills nulls in the threshold column where nulls exist"""
+    nulls_dont_exist = (
+        df[df[col].fillna(False) & df[socioeconomic_column]][score_column]
+        .isna()
+        .sum()
+        == 0
+    )
+    only_trues = df[df[col].fillna(False) & df[socioeconomic_column]][
+        score_column
+    ].min()
+    return nulls_dont_exist, only_trues
+
+
+@dataclass
+class ThresholdTestConfig:
+    name: str
+    threshhold_columns: List[str]
+    ses_column_name: str = field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
+    score_column_name: str = field_names.SCORE_N_COMMUNITIES
+
+    @property
+    def error_message(self):
+        return f"Eligibility columns have an error, {self.name}"
+
+
+def check_for_threshhold_errors(
+    df: pd.DataFrame, config: ThresholdTestConfig
+) -> List[str]:
+    errors = []
+    for col in config.threshhold_columns:
+        nulls_dont_exist, only_trues = _helper_single_threshold_test(
+            df,
+            col,
+            config.ses_column_name,
+            config.score_column_name,
+        )
+        proper_threshold_identification = (
+            _helper_test_count_exceeding_threshold(df, col)
+        )
+        if not nulls_dont_exist:
+            errors.append(
+                f"For {col}, threshold is not calculated right -- there are NaNs in Score"
+            )
+        if not only_trues:
+            errors.append(
+                f"For {col} and {config.ses_column_name}, threshold is not calculated right "
+                f"-- there are Falses where there should only be Trues"
+            )
+        if not proper_threshold_identification:
+            errors.append(
+                f"Threshold {col} returns too few tracts, are you sure it's nationally-representative?"
+            )
+    if errors:
+        errors.append(config.error_message)
+    return errors
+
+
+def test_threshholds(final_score_df):
+    climate_thresholds = ThresholdTestConfig(
+        "climate",
+        [
+            field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.HIGH_FUTURE_FLOOD_RISK_FIELD,
+            field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD,
+        ],
+    )
+    energy_thresholds = ThresholdTestConfig(
+        "energy",
+        [
+            field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.PM25_EXCEEDS_PCTILE_THRESHOLD,
+        ],
+    )
+    transportation_thresholds = ThresholdTestConfig(
+        "transportation",
+        [
+            field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD,
+            field_names.DOT_BURDEN_PCTILE_THRESHOLD,
+            field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD,
+        ],
+    )
+    housing_thresholds = ThresholdTestConfig(
+        "housing",
+        [
+            field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
+            field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD,
+            field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD,
+            field_names.HOUSING_BURDEN_PCTILE_THRESHOLD,
+            field_names.NON_NATURAL_PCTILE_THRESHOLD,
+        ],
+    )
+    pollution_thresholds = ThresholdTestConfig(
+        "pollution",
+        [
+            field_names.RMP_PCTILE_THRESHOLD,
+            field_names.NPL_PCTILE_THRESHOLD,
+            field_names.TSDF_PCTILE_THRESHOLD,
+            field_names.AML_BOOLEAN,
+            field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
+        ],
+    )
+    water_thresholds = ThresholdTestConfig(
+        "water",
+        [
+            field_names.WASTEWATER_PCTILE_THRESHOLD,
+            field_names.UST_PCTILE_THRESHOLD,
+        ],
+    )
+    health_thresholds = ThresholdTestConfig(
+        "health",
+        [
+            field_names.DIABETES_PCTILE_THRESHOLD,
+            field_names.ASTHMA_PCTILE_THRESHOLD,
+            field_names.HEART_DISEASE_PCTILE_THRESHOLD,
+            field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD,
+        ],
+    )
+    workforce_base_thresholds = ThresholdTestConfig(
+        "workforce (not island areas)",
+        [
+            field_names.UNEMPLOYMENT_PCTILE_THRESHOLD,
+            field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD,
+            field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD,
+            field_names.POVERTY_PCTILE_THRESHOLD,
+        ],
+        ses_column_name=field_names.LOW_HS_EDUCATION_FIELD,
+    )
+    errors = []
+    for threshhold_config in [
+        climate_thresholds,
+        energy_thresholds,
+        transportation_thresholds,
+        housing_thresholds,
+        pollution_thresholds,
+        water_thresholds,
+        health_thresholds,
+        workforce_base_thresholds,
+    ]:
+        errors.extend(
+            check_for_threshhold_errors(final_score_df, threshhold_config)
+        )
+    error_text = "\n".join(errors)
+    assert not errors, error_text
+
+
+def test_max_40_percent_DAC(final_score_df):
+    score_col_with_donuts = field_names.FINAL_SCORE_N_BOOLEAN
+    total_population_col = field_names.TOTAL_POP_FIELD
+    assert (
+        final_score_df[score_col_with_donuts].isna().sum() == 0
+    ), f"Error: {score_col_with_donuts} contains NULLs"
+    assert (
+        final_score_df[final_score_df[score_col_with_donuts]][
+            total_population_col
+        ].sum()
+        / final_score_df[total_population_col].sum()
+    ) < 0.4, "Error: the scoring methodology identifies >40% of people in  the US as disadvantaged"
+    assert (
+        final_score_df[score_col_with_donuts].sum() > 0
+    ), "FYI: You've identified no tracts at all!"
+
+
+def test_donut_hole_addition_to_score_n(final_score_df):
+    score_col_with_donuts = field_names.FINAL_SCORE_N_BOOLEAN
+    score_col = field_names.SCORE_N_COMMUNITIES
+    donut_hole_score_only = (
+        field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX
+    )
+    count_donuts = final_score_df[donut_hole_score_only].sum()
+    count_n = final_score_df[score_col].sum()
+    count_n_with_donuts = final_score_df[score_col_with_donuts].sum()
+    new_donuts = final_score_df[
+        final_score_df[donut_hole_score_only] & ~final_score_df[score_col]
+    ].shape[0]
+
+    assert (
+        new_donuts + count_n == count_n_with_donuts
+    ), "The math doesn't work! The number of new donut hole tracts plus score tracts (base) does not equal the total number of tracts identified"
+
+    assert (
+        count_donuts < count_n
+    ), "There are more donut hole tracts than base tracts. How can it be?"
+
+    assert (
+        new_donuts > 0
+    ), "FYI: The adjacency index is doing nothing. Consider removing it?"
diff --git a/data/data-pipeline/data_pipeline/tile/generate.py b/data/data-pipeline/data_pipeline/tile/generate.py
index d5676b79..82e9404e 100644
--- a/data/data-pipeline/data_pipeline/tile/generate.py
+++ b/data/data-pipeline/data_pipeline/tile/generate.py
@@ -87,6 +87,7 @@ def generate_tiles(data_path: Path, generate_tribal_layer: bool) -> None:
         logger.info("Generating Tribal mbtiles file")
         cmd = "tippecanoe "
         cmd += "--layer=blocks "
+        cmd += "--base-zoom=3 "
         cmd += f"--minimum-zoom={USA_TRIBAL_MIN_ZOOM} --maximum-zoom={USA_TRIBAL_MAX_ZOOM} "
         cmd += f"--output={tribal_tiles_path}/usa.mbtiles "
         cmd += str(tribal_geojson_dir / "usa.json")
@@ -95,10 +96,12 @@ def generate_tiles(data_path: Path, generate_tribal_layer: bool) -> None:
         # generate mvts
         logger.info("Generating Tribal mvt folders and files")
         cmd = "tippecanoe "
+        cmd += "--layer=blocks "
+        cmd += "--base-zoom=3 "
         cmd += "--no-tile-compression "
         cmd += "--drop-densest-as-needed "
         cmd += f"--minimum-zoom={USA_TRIBAL_MIN_ZOOM} --maximum-zoom={USA_TRIBAL_MAX_ZOOM} "
-        cmd += f"--output-to-directory={tribal_tiles_path} --layer=blocks "
+        cmd += f"--output-to-directory={tribal_tiles_path} "
         cmd += str(tribal_geojson_dir / "usa.json")
         call(cmd, shell=True)
 
diff --git a/data/data-pipeline/data_pipeline/utils.py b/data/data-pipeline/data_pipeline/utils.py
index 865e888b..063da627 100644
--- a/data/data-pipeline/data_pipeline/utils.py
+++ b/data/data-pipeline/data_pipeline/utils.py
@@ -149,7 +149,9 @@ def download_file_from_url(
         os.mkdir(download_file_name.parent)
 
     logger.info(f"Downloading {file_url}")
-    response = requests.get(file_url, verify=verify)
+    response = requests.get(
+        file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
+    )
     if response.status_code == 200:
         file_contents = response.content
     else:
diff --git a/data/data-pipeline/pytest.ini b/data/data-pipeline/pytest.ini
index 7022c5f7..17099dfd 100644
--- a/data/data-pipeline/pytest.ini
+++ b/data/data-pipeline/pytest.ini
@@ -1,2 +1,4 @@
 [pytest]
 norecursedirs = .git data
+markers =
+    smoketest: marks a test as depending on the full score output