mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 09:41:26 -08:00
* Add tribal count notebook (#1917) * test without caching * added comment Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
parent
d8dd4cf047
commit
6e0ef33d81
2 changed files with 172 additions and 1 deletions
3
.github/workflows/data-checks.yml
vendored
3
.github/workflows/data-checks.yml
vendored
|
@ -39,6 +39,7 @@ jobs:
|
|||
run: poetry show -v
|
||||
- name: Install dependencies
|
||||
run: poetry install
|
||||
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
# TODO: investigate why caching layer started failing.
|
||||
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
- name: Run tox
|
||||
run: poetry run tox
|
||||
|
|
|
@ -0,0 +1,170 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8f03fec9-e4a0-4621-b94c-ff6459a42032",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This code is to do the one time check described in [ticket 1917]( Run a one-time check on count of federal Indian reservations)\n",
|
||||
"\n",
|
||||
"> We should do a one-time check on the BIA data we import, such that after import that have (for now in our staging environment) 326 federal Indian reservations on our map. The number 326 from https://www.bia.gov/faqs/what-federal-indian-reservation.\n",
|
||||
">\n",
|
||||
"> If that one-time check to make sure we have all reservations fails, there's a list of federal Indian reservation names that we can use to track down which reservations are not shown on our map and troubleshoot."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "76c8eeac-21e0-4ff1-886d-b9cdd6199411",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"module_path = os.path.abspath(os.path.join(\"../..\"))\n",
|
||||
"if module_path not in sys.path:\n",
|
||||
" sys.path.append(module_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "aaf46946-4fa7-4744-8cd6-723ce86a0213",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from data_pipeline.etl.sources.tribal.etl import TribalETL\n",
|
||||
"from data_pipeline.etl.sources.geo_utils import get_tribal_geojson\n",
|
||||
"import geopandas as gpd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "039d244f-6c97-4b1b-af04-6f879cd2cd86",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%load_ext lab_black"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "0f0b3597-844e-4328-b6c9-ac760301383e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2022-09-23 11:56:25,551 [data_pipeline.etl.sources.tribal.etl] INFO Downloading Tribal Data\n",
|
||||
"2022-09-23 11:56:25,552 [data_pipeline.utils] INFO Downloading https://justice40-data.s3.amazonaws.com/data-sources/BIA_National_LAR_json.zip\n",
|
||||
"2022-09-23 11:56:26,068 [data_pipeline.utils] INFO Extracting /home/matt/active/justice40-tool/data/data-pipeline/data_pipeline/data/tmp/downloaded-47ce415c-cc72-4e6f-9cbc-7ad833e08813.zip\n",
|
||||
"2022-09-23 11:56:26,190 [data_pipeline.utils] INFO Downloading https://justice40-data.s3.amazonaws.com/data-sources/Alaska_Native_Villages_json.zip\n",
|
||||
"2022-09-23 11:56:26,290 [data_pipeline.utils] INFO Extracting /home/matt/active/justice40-tool/data/data-pipeline/data_pipeline/data/tmp/downloaded-7c700e59-83cd-4752-889c-159e58c71154.zip\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"etl = TribalETL()\n",
|
||||
"etl.extract()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "51a5a5c7-c02b-4a6f-9f3d-19868e45d7b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"GEOJSON_BASE_PATH = etl.GEOJSON_BASE_PATH"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "c4224aeb-6aa5-47fa-9c6f-e2cd9354720d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bia_national_lar_geojson = (\n",
|
||||
" GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_National_LAR.json\"\n",
|
||||
")\n",
|
||||
"bia_aian_supplemental_geojson = (\n",
|
||||
" GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_AIAN_Supplemental.json\"\n",
|
||||
")\n",
|
||||
"bia_tsa_geojson_geojson = GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_TSA.json\"\n",
|
||||
"alaska_native_villages_geojson = (\n",
|
||||
" GEOJSON_BASE_PATH / \"alaska_native_villages\" / \"AlaskaNativeVillages.gdb.geojson\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "7dd74d24-4f55-45db-bdd9-f4ac945d6a78",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bia_national_lar_df = gpd.read_file(bia_national_lar_geojson)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "ae8dafb7-f997-4977-87da-53f0b4f98a98",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"326"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"len(\n",
|
||||
" sorted(\n",
|
||||
" list(\n",
|
||||
" bia_national_lar_df.LARName.str.replace(r\"\\(.*\\) \", \"\", regex=True).unique()\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "40849cc4-345b-4658-94ac-498154107e9f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Looking at the main BIA LAR data file, and removing paranthecials (so that `'Acoma (Red Lake) LAR','Acoma LAR'` are counted as a single tribal entry), **we have 326 tribal areas**, which is the number we expected."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Add table
Reference in a new issue