mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 21:41:18 -07:00
Issue 1900: Tribal overlap with Census tracts (#1903)
* working notebook * updating notebook * wip * fixing broken tests * adding tribal overlap files * WIP * WIP * WIP, calculated count and names * working * partial cleanup * partial cleanup * updating field names * fixing bug * removing pyogrio * removing unused imports * updating test fixtures to be more realistic * cleaning up notebook * fixing black * fixing flake8 errors * adding tox instructions * updating etl_score * suppressing warning * Use projected CRSes, ignore geom types (#1900) I looked into this a bit, and in general the geometry type mismatch changes very little about the calculation; we have a mix of multipolygons and polygons. The fastest thing to do is just not keep geom type; I did some runs with it set to both True and False, and they're the same within 9 digits of precision. Logically we just want to overlaps, regardless of how the actual geometries are encoded between the frames, so we can in this case ignore the geom types and feel OKAY. I also moved to projected CRSes, since we are actually trying to do area calculations and so like, we should. Again, the change is small in magnitude but logically more sound. * Readd CDC dataset config (#1900) * adding comments to fips code * delete unnecessary loggers Co-authored-by: matt bowen <matthew.r.bowen@omb.eop.gov>
This commit is contained in:
parent
876655d2b2
commit
aca226165c
19 changed files with 1921 additions and 36 deletions
|
@ -290,6 +290,32 @@ datasets:
|
|||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: true
|
||||
- long_name: "Overlap between Census tract boundaries and Tribal area boundaries."
|
||||
short_name: "tribal_overlap"
|
||||
module_name: "tribal_overlap"
|
||||
input_geoid_tract_field_name: "GEOID10_TRACT"
|
||||
load_fields:
|
||||
- short_name: "tribal_count"
|
||||
df_field_name: "COUNT_OF_TRIBAL_AREAS_IN_TRACT"
|
||||
long_name: "Number of Tribal areas within Census tract"
|
||||
field_type: int64
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
- short_name: "tribal_percent"
|
||||
df_field_name: "PERCENT_OF_TRIBAL_AREA_IN_TRACT"
|
||||
long_name: "Percent of the Census tract that is within Tribal areas"
|
||||
field_type: float
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
number_of_decimals_in_output: 6
|
||||
- short_name: "tribal_names"
|
||||
df_field_name: "NAMES_OF_TRIBAL_AREAS_IN_TRACT"
|
||||
long_name: "Names of Tribal areas within Census tract"
|
||||
field_type: string
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
- long_name: "CDC Life Expeectancy"
|
||||
short_name: "cdc_life_expectancy"
|
||||
module_name: "cdc_life_expectancy"
|
||||
|
@ -302,5 +328,4 @@ datasets:
|
|||
include_in_tiles: false
|
||||
include_in_downloadable_files: true
|
||||
create_percentile: false
|
||||
create_reverse_percentile: true
|
||||
|
||||
create_reverse_percentile: true
|
|
@ -15,6 +15,7 @@ from data_pipeline.etl.sources.fsf_flood_risk.etl import (
|
|||
FloodRiskETL,
|
||||
)
|
||||
from data_pipeline.etl.sources.eamlis.etl import AbandonedMineETL
|
||||
from data_pipeline.etl.sources.tribal_overlap.etl import TribalOverlapETL
|
||||
from data_pipeline.etl.sources.us_army_fuds.etl import USArmyFUDS
|
||||
from data_pipeline.etl.sources.nlcd_nature_deprived.etl import NatureDeprivedETL
|
||||
from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
|
||||
|
@ -52,6 +53,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.nature_deprived_df: pd.DataFrame
|
||||
self.eamlis_df: pd.DataFrame
|
||||
self.fuds_df: pd.DataFrame
|
||||
self.tribal_overlap_df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Loading data sets from disk.")
|
||||
|
@ -148,6 +150,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# Load FUDS dataset
|
||||
self.fuds_df = USArmyFUDS.get_data_frame()
|
||||
|
||||
# Load Tribal overlap dataset
|
||||
self.tribal_overlap_df = TribalOverlapETL.get_data_frame()
|
||||
|
||||
# Load GeoCorr Urban Rural Map
|
||||
geocorr_urban_rural_csv = (
|
||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
||||
|
@ -359,6 +364,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.nature_deprived_df,
|
||||
self.eamlis_df,
|
||||
self.fuds_df,
|
||||
self.tribal_overlap_df
|
||||
]
|
||||
|
||||
# Sanity check each data frame before merging.
|
||||
|
@ -469,12 +475,15 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.PERCENT_AGE_UNDER_10,
|
||||
field_names.PERCENT_AGE_10_TO_64,
|
||||
field_names.PERCENT_AGE_OVER_64,
|
||||
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
|
||||
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
|
||||
]
|
||||
|
||||
non_numeric_columns = [
|
||||
self.GEOID_TRACT_FIELD_NAME,
|
||||
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
|
||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||
field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
|
||||
]
|
||||
|
||||
boolean_columns = [
|
||||
|
|
|
@ -229,3 +229,25 @@ def test_compare_to_list_of_expected_state_fips_codes():
|
|||
continental_us_expected=False,
|
||||
alaska_and_hawaii_expected=False,
|
||||
)
|
||||
|
||||
# Missing Hawaii but not Alaska
|
||||
fips_codes_test_5 = [x for x in fips_codes_test_1 if x not in ["15"]]
|
||||
|
||||
# Should raise error because both Hawaii and Alaska are expected
|
||||
with pytest.raises(ValueError) as exception_info:
|
||||
compare_to_list_of_expected_state_fips_codes(
|
||||
actual_state_fips_codes=fips_codes_test_5,
|
||||
alaska_and_hawaii_expected=True,
|
||||
)
|
||||
partial_expected_error_message = (
|
||||
"FIPS state codes expected that are not present in the data:\n"
|
||||
"['15']\n"
|
||||
)
|
||||
assert partial_expected_error_message in str(exception_info.value)
|
||||
|
||||
# Should work as expected
|
||||
compare_to_list_of_expected_state_fips_codes(
|
||||
actual_state_fips_codes=fips_codes_test_5,
|
||||
alaska_and_hawaii_expected=True,
|
||||
additional_fips_codes_not_expected=["15"],
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue