mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 09:01:17 -07:00
Merge branch 'emma-nechamkin/release/score-narwhal' of https://github.com/usds/justice40-tool into emma-nechamkin/release/score-narwhal
This commit is contained in:
commit
932179841f
22 changed files with 2534 additions and 416 deletions
15
.github/workflows/deploy_be_staging.yml
vendored
15
.github/workflows/deploy_be_staging.yml
vendored
|
@ -38,6 +38,12 @@ jobs:
|
|||
uses: snok/install-poetry@v1
|
||||
- name: Print Poetry settings
|
||||
run: poetry show -v
|
||||
- name: Install GDAL/ogr2ogr
|
||||
run: |
|
||||
sudo add-apt-repository ppa:ubuntugis/ppa
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install gdal-bin
|
||||
ogrinfo --version
|
||||
- name: Install dependencies
|
||||
run: poetry add s4cmd && poetry install
|
||||
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
|
||||
|
@ -47,6 +53,9 @@ jobs:
|
|||
aws-access-key-id: ${{ secrets.DATA_DEV_AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.DATA_DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: us-east-1
|
||||
- name: Download census geo data for later user
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py pull-census-data -s aws
|
||||
- name: Generate Score
|
||||
run: |
|
||||
poetry run python3 data_pipeline/application.py score-full-run
|
||||
|
@ -71,12 +80,6 @@ jobs:
|
|||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
repo-token-user-login: "github-actions[bot]"
|
||||
allow-repeats: false
|
||||
- name: Install GDAL/ogr2ogr
|
||||
run: |
|
||||
sudo add-apt-repository ppa:ubuntugis/ppa
|
||||
sudo apt-get update
|
||||
sudo apt-get -y install gdal-bin
|
||||
ogrinfo --version
|
||||
- name: Set timezone for tippecanoe
|
||||
uses: szenius/set-timezone@v1.0
|
||||
with:
|
||||
|
|
|
@ -9,7 +9,8 @@ RUN apt-get update && apt-get install -y \
|
|||
unzip \
|
||||
wget \
|
||||
python3-dev \
|
||||
python3-pip
|
||||
python3-pip \
|
||||
gdal-bin
|
||||
|
||||
# tippeanoe
|
||||
ENV TZ=America/Los_Angeles
|
||||
|
|
|
@ -10,6 +10,7 @@ from data_pipeline.etl.runner import (
|
|||
score_post,
|
||||
)
|
||||
from data_pipeline.etl.sources.census.etl_utils import (
|
||||
check_census_data_source,
|
||||
reset_data_directories as census_reset,
|
||||
zip_census_data,
|
||||
)
|
||||
|
@ -96,6 +97,23 @@ def census_data_download(zip_compress):
|
|||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(help="Retrieve census data from source")
|
||||
@click.option(
|
||||
"-s",
|
||||
"--data-source",
|
||||
default="local",
|
||||
required=False,
|
||||
type=str,
|
||||
help=dataset_cli_help,
|
||||
)
|
||||
def pull_census_data(data_source: str):
|
||||
logger.info("Pulling census data from %s", data_source)
|
||||
data_path = settings.APP_ROOT / "data" / "census"
|
||||
check_census_data_source(data_path, data_source)
|
||||
logger.info("Finished pulling census data")
|
||||
sys.exit()
|
||||
|
||||
|
||||
@cli.command(
|
||||
help="Run all ETL processes or a specific one",
|
||||
)
|
||||
|
|
|
@ -127,6 +127,7 @@ class ExtractTransformLoad:
|
|||
sys.exit()
|
||||
|
||||
# set some of the basic fields
|
||||
if "input_geoid_tract_field_name" in dataset_config:
|
||||
cls.INPUT_GEOID_TRACT_FIELD_NAME = dataset_config[
|
||||
"input_geoid_tract_field_name"
|
||||
]
|
||||
|
|
|
@ -130,6 +130,11 @@ DATASET_LIST = [
|
|||
"module_dir": "census_acs_2010",
|
||||
"class_name": "CensusACS2010ETL",
|
||||
},
|
||||
{
|
||||
"name": "us_army_fuds",
|
||||
"module_dir": "us_army_fuds",
|
||||
"class_name": "USArmyFUDS",
|
||||
},
|
||||
]
|
||||
|
||||
CENSUS_INFO = {
|
||||
|
|
|
@ -117,6 +117,34 @@ datasets:
|
|||
field_type: float
|
||||
include_in_downloadable_files: true
|
||||
include_in_tiles: true
|
||||
- long_name: "Formerly Used Defense Sites"
|
||||
short_name: "FUDS"
|
||||
module_name: "us_army_fuds"
|
||||
load_fields:
|
||||
- short_name: "fuds_count"
|
||||
df_field_name: "ELIGIBLE_FUDS_COUNT_FIELD_NAME"
|
||||
long_name: "Count of eligible Formerly Used Defense Site (FUDS) properties centroids"
|
||||
description_short:
|
||||
"The number of FUDS marked as Eligible and Has Project in the tract."
|
||||
field_type: int64
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: false
|
||||
- short_name: "not_fuds_ct"
|
||||
df_field_name: "INELIGIBLE_FUDS_COUNT_FIELD_NAME"
|
||||
long_name: "Count of ineligible Formerly Used Defense Site (FUDS) properties centroids"
|
||||
description_short:
|
||||
"The number of FUDS marked as Ineligible or Project in the tract."
|
||||
field_type: int64
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: false
|
||||
- short_name: "has_fuds"
|
||||
df_field_name: "ELIGIBLE_FUDS_BINARY_FIELD_NAME"
|
||||
long_name: "Is there at least one Formerly Used Defense Site (FUDS) in the tract?"
|
||||
description_short:
|
||||
"Whether the tract has a FUDS"
|
||||
field_type: bool
|
||||
include_in_tiles: false
|
||||
include_in_downloadable_files: false
|
||||
- long_name: "Example ETL"
|
||||
short_name: "Example"
|
||||
module_name: "example_dataset"
|
||||
|
@ -128,4 +156,3 @@ datasets:
|
|||
field_type: float
|
||||
include_in_tiles: true
|
||||
include_in_downloadable_files: true
|
||||
|
||||
|
|
|
@ -77,7 +77,7 @@ class DatasetsConfig:
|
|||
long_name: str
|
||||
short_name: str
|
||||
module_name: str
|
||||
input_geoid_tract_field_name: str
|
||||
load_fields: List[LoadField]
|
||||
input_geoid_tract_field_name: Optional[str] = None
|
||||
|
||||
datasets: List[Dataset]
|
||||
|
|
|
@ -20,19 +20,20 @@ class GeoFileType(Enum):
|
|||
|
||||
|
||||
class CensusETL(ExtractTransformLoad):
|
||||
SHP_BASE_PATH = ExtractTransformLoad.DATA_PATH / "census" / "shp"
|
||||
GEOJSON_BASE_PATH = ExtractTransformLoad.DATA_PATH / "census" / "geojson"
|
||||
CSV_BASE_PATH = ExtractTransformLoad.DATA_PATH / "census" / "csv"
|
||||
GEOJSON_PATH = ExtractTransformLoad.DATA_PATH / "census" / "geojson"
|
||||
NATIONAL_TRACT_CSV_PATH = CSV_BASE_PATH / "us.csv"
|
||||
NATIONAL_TRACT_JSON_PATH = GEOJSON_BASE_PATH / "us.json"
|
||||
GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
|
||||
|
||||
def __init__(self):
|
||||
self.SHP_BASE_PATH = self.DATA_PATH / "census" / "shp"
|
||||
self.GEOJSON_BASE_PATH = self.DATA_PATH / "census" / "geojson"
|
||||
self.CSV_BASE_PATH = self.DATA_PATH / "census" / "csv"
|
||||
# the fips_states_2010.csv is generated from data here
|
||||
# https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html
|
||||
self.STATE_FIPS_CODES = get_state_fips_codes(self.DATA_PATH)
|
||||
self.GEOJSON_PATH = self.DATA_PATH / "census" / "geojson"
|
||||
self.TRACT_PER_STATE: dict = {} # in-memory dict per state
|
||||
self.TRACT_NATIONAL: list = [] # in-memory global list
|
||||
self.NATIONAL_TRACT_CSV_PATH = self.CSV_BASE_PATH / "us.csv"
|
||||
self.NATIONAL_TRACT_JSON_PATH = self.GEOJSON_BASE_PATH / "us.json"
|
||||
self.GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
|
||||
|
||||
def _path_for_fips_file(
|
||||
self, fips_code: str, file_type: GeoFileType
|
||||
|
|
62
data/data-pipeline/data_pipeline/etl/sources/geo_utils.py
Normal file
62
data/data-pipeline/data_pipeline/etl/sources/geo_utils.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
"""Utililities for turning geographies into tracts, using census data"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from functools import lru_cache
|
||||
import geopandas as gpd
|
||||
from data_pipeline.utils import get_module_logger
|
||||
from .census.etl import CensusETL
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def get_tract_geojson(
|
||||
_tract_data_path: Optional[Path] = None,
|
||||
) -> gpd.GeoDataFrame:
|
||||
logger.info("Loading tract geometry data from census ETL")
|
||||
GEOJSON_PATH = _tract_data_path
|
||||
if GEOJSON_PATH is None:
|
||||
GEOJSON_PATH = CensusETL.NATIONAL_TRACT_JSON_PATH
|
||||
if not GEOJSON_PATH.exists():
|
||||
logger.debug("Census data has not been computed, running")
|
||||
census_etl = CensusETL()
|
||||
census_etl.extract()
|
||||
census_etl.transform()
|
||||
census_etl.load()
|
||||
else:
|
||||
logger.debug("Loading existing tract geojson")
|
||||
tract_data = gpd.read_file(GEOJSON_PATH, include_fields=["GEOID10"])
|
||||
tract_data.rename(columns={"GEOID10": "GEOID10_TRACT"}, inplace=True)
|
||||
return tract_data
|
||||
|
||||
|
||||
def add_tracts_for_geometries(
|
||||
df: gpd.GeoDataFrame, _tract_data_path: Optional[Path] = None
|
||||
) -> gpd.GeoDataFrame:
|
||||
"""Adds tract-geoids to dataframe df that contains spatial geometries
|
||||
|
||||
Depends on CensusETL for the geodata to do its conversion
|
||||
|
||||
Args:
|
||||
df (GeoDataFrame): a geopandas GeoDataFrame with a point geometry column
|
||||
_tract_data_path (Path): an override to directly pass a GEOJSON file of
|
||||
tracts->Geometries, to simplify testing.
|
||||
|
||||
Returns:
|
||||
GeoDataFrame: the above dataframe, with an additional GEOID10_TRACT column that
|
||||
maps the points in DF to census tracts and a geometry column for later
|
||||
spatial analysis
|
||||
"""
|
||||
logger.debug("Appending tract data to dataframe")
|
||||
tract_data = get_tract_geojson(_tract_data_path)
|
||||
assert (
|
||||
tract_data.crs == df.crs
|
||||
), f"Dataframe must be projected to {tract_data.crs}"
|
||||
df = gpd.sjoin(
|
||||
df,
|
||||
tract_data[["GEOID10_TRACT", "geometry"]],
|
||||
how="inner",
|
||||
op="intersects",
|
||||
)
|
||||
return df
|
|
@ -0,0 +1,98 @@
|
|||
from pathlib import Path
|
||||
import geopandas as gpd
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
|
||||
from data_pipeline.utils import get_module_logger, download_file_from_url
|
||||
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class USArmyFUDS(ExtractTransformLoad):
|
||||
"""The Formerly Used Defense Sites (FUDS)"""
|
||||
|
||||
NAME: str = "us_army_fuds"
|
||||
|
||||
ELIGIBLE_FUDS_COUNT_FIELD_NAME: str
|
||||
INELIGIBLE_FUDS_COUNT_FIELD_NAME: str
|
||||
ELIGIBLE_FUDS_BINARY_FIELD_NAME: str
|
||||
GEO_LEVEL: ValidGeoLevel = ValidGeoLevel.CENSUS_TRACT
|
||||
|
||||
def __init__(self):
|
||||
self.FILE_URL: str = (
|
||||
"https://opendata.arcgis.com/api/v3/datasets/"
|
||||
"3f8354667d5b4b1b8ad7a6e00c3cf3b1_1/downloads/"
|
||||
"data?format=geojson&spatialRefId=4326&where=1%3D1"
|
||||
)
|
||||
|
||||
self.OUTPUT_PATH: Path = self.DATA_PATH / "dataset" / "us_army_fuds"
|
||||
|
||||
# Constants for output
|
||||
self.COLUMNS_TO_KEEP = [
|
||||
self.GEOID_TRACT_FIELD_NAME,
|
||||
self.ELIGIBLE_FUDS_COUNT_FIELD_NAME,
|
||||
self.INELIGIBLE_FUDS_COUNT_FIELD_NAME,
|
||||
self.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||
]
|
||||
self.DOWNLOAD_FILE_NAME = self.get_tmp_path() / "fuds.geojson"
|
||||
|
||||
self.raw_df: gpd.GeoDataFrame
|
||||
self.output_df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
logger.info("Starting FUDS data download.")
|
||||
|
||||
download_file_from_url(
|
||||
file_url=self.FILE_URL,
|
||||
download_file_name=self.DOWNLOAD_FILE_NAME,
|
||||
verify=True,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
logger.info("Starting FUDS transform.")
|
||||
# before we try to do any transformation, get the tract data
|
||||
# so it's loaded and the census ETL is out of scope
|
||||
|
||||
logger.info("Loading FUDs data as GeoDataFrame for transform")
|
||||
raw_df = gpd.read_file(
|
||||
filename=self.DOWNLOAD_FILE_NAME,
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
# Note that the length of raw_df will not be exactly the same
|
||||
# because same bases lack coordinated or have coordinates in
|
||||
# Mexico or in the ocean. See the following dataframe:
|
||||
# raw_df[~raw_df.OBJECTID.isin(df_with_tracts.OBJECTID)][
|
||||
# ['OBJECTID', 'CLOSESTCITY', 'COUNTY', 'ELIGIBILITY',
|
||||
# 'STATE', 'LATITUDE', "LONGITUDE"]]
|
||||
logger.debug("Adding tracts to FUDS data")
|
||||
df_with_tracts = add_tracts_for_geometries(raw_df)
|
||||
self.output_df = pd.DataFrame()
|
||||
|
||||
# this will create a boolean series which you can do actually sans np.where
|
||||
df_with_tracts["tmp_fuds"] = (
|
||||
df_with_tracts.ELIGIBILITY == "Eligible"
|
||||
) & (df_with_tracts.HASPROJECTS == "Yes")
|
||||
|
||||
self.output_df[
|
||||
self.ELIGIBLE_FUDS_COUNT_FIELD_NAME
|
||||
] = df_with_tracts.groupby(self.GEOID_TRACT_FIELD_NAME)[
|
||||
"tmp_fuds"
|
||||
].sum()
|
||||
|
||||
self.output_df[self.INELIGIBLE_FUDS_COUNT_FIELD_NAME] = (
|
||||
df_with_tracts[~df_with_tracts.tmp_fuds]
|
||||
.groupby(self.GEOID_TRACT_FIELD_NAME)
|
||||
.size()
|
||||
)
|
||||
self.output_df = (
|
||||
self.output_df.fillna(0).astype("int64").sort_index().reset_index()
|
||||
)
|
||||
|
||||
self.output_df[self.ELIGIBLE_FUDS_BINARY_FIELD_NAME] = np.where(
|
||||
self.output_df[self.ELIGIBLE_FUDS_COUNT_FIELD_NAME] > 0.0,
|
||||
True,
|
||||
False,
|
||||
)
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"type": "FeatureCollection",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "06", "COUNTYFP10": "037", "TRACTCE10": "207400", "GEOID10_TRACT": "06037207400", "NAME10": "2074", "NAMELSAD10": "Census Tract 2074", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 862884, "AWATER10": 6531, "INTPTLAT10": "+34.0561941", "INTPTLON10": "-118.2466502" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -118.25165, 34.057561 ], [ -118.251856, 34.057693 ], [ -118.251973, 34.057769 ], [ -118.253069, 34.058478 ], [ -118.253333, 34.058635 ], [ -118.253175, 34.058788 ], [ -118.252985, 34.058967 ], [ -118.252934, 34.059012 ], [ -118.252592, 34.059315 ], [ -118.252391, 34.059485 ], [ -118.252131, 34.059695 ], [ -118.251474, 34.060224 ], [ -118.251082, 34.060543 ], [ -118.250554, 34.060988 ], [ -118.249996, 34.061475 ], [ -118.248871, 34.06247 ], [ -118.248822, 34.062513 ], [ -118.248754, 34.062434 ], [ -118.247476, 34.060942 ], [ -118.247368, 34.060818 ], [ -118.247013, 34.06041 ], [ -118.24698, 34.060373 ], [ -118.246769, 34.060147 ], [ -118.246548, 34.059926 ], [ -118.246318, 34.059712 ], [ -118.246079, 34.059505 ], [ -118.245633, 34.059146 ], [ -118.245532, 34.059066 ], [ -118.245262, 34.058851 ], [ -118.244952, 34.058609 ], [ -118.244638, 34.05837 ], [ -118.244425, 34.058215 ], [ -118.244007, 34.057917 ], [ -118.243393, 34.057507 ], [ -118.243099, 34.057319 ], [ -118.24245, 34.056913 ], [ -118.241377, 34.056241 ], [ -118.241204, 34.056133 ], [ -118.240288, 34.055562 ], [ -118.239443, 34.055035 ], [ -118.238512, 34.054454 ], [ -118.238227, 34.054289 ], [ -118.238023, 34.054178 ], [ -118.237887, 34.054108 ], [ -118.2379, 34.054002 ], [ -118.237936, 34.053725 ], [ -118.237945, 34.053651 ], [ -118.237976, 34.052819 ], [ -118.238039, 34.05107 ], [ -118.239698, 34.052451 ], [ -118.239867, 34.051906 ], [ -118.240115, 34.0514 ], [ -118.240172, 34.051284 ], [ -118.240271, 34.051083 ], [ -118.240856, 34.050405 ], [ -118.242151, 34.051344 ], [ -118.242382, 34.051511 ], [ -118.24334, 34.050273 ], [ -118.244519, 34.051003 ], [ -118.245067, 34.051354 ], [ -118.245606, 34.051703 ], [ -118.246677, 34.052395 ], [ -118.247754, 34.053091 ], [ -118.248466, 34.053552 ], [ -118.248818, 34.05378 ], [ -118.249888, 34.054472 ], [ -118.25095, 34.055158 ], [ -118.251081, 34.055241 ], [ -118.250895, 34.055373 ], [ -118.250712, 34.05553 ], [ -118.250052, 34.056232 ], [ -118.249838, 34.056391 ], [ -118.25165, 34.057561 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "13", "COUNTYFP10": "121", "TRACTCE10": "011900", "GEOID10_TRACT": "13121011900", "NAME10": "119", "NAMELSAD10": "Census Tract 119", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 1530847, "AWATER10": 0, "INTPTLAT10": "+33.7539369", "INTPTLON10": "-084.3826910" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -84.393243, 33.754604 ], [ -84.393434, 33.754711 ], [ -84.393836, 33.75492 ], [ -84.39376, 33.755141 ], [ -84.394037, 33.756265 ], [ -84.394411, 33.757235 ], [ -84.394982, 33.758491 ], [ -84.394325, 33.758955 ], [ -84.393831, 33.759308 ], [ -84.393459, 33.759573 ], [ -84.393366, 33.759627 ], [ -84.393273, 33.759663 ], [ -84.393187, 33.759685 ], [ -84.392783, 33.75973 ], [ -84.392071, 33.759729 ], [ -84.390564, 33.759722 ], [ -84.389801, 33.759719 ], [ -84.389083, 33.759716 ], [ -84.387584, 33.759709 ], [ -84.387539, 33.759708 ], [ -84.386062, 33.759685 ], [ -84.384198, 33.759666 ], [ -84.38422, 33.758392 ], [ -84.384242, 33.757117 ], [ -84.384268, 33.755571 ], [ -84.384283, 33.75473 ], [ -84.384287, 33.754521 ], [ -84.384305, 33.754462 ], [ -84.382272, 33.754439 ], [ -84.381907, 33.754434 ], [ -84.380277, 33.754417 ], [ -84.3802, 33.754414 ], [ -84.379455, 33.754397 ], [ -84.379157, 33.75439 ], [ -84.378673, 33.754379 ], [ -84.378332, 33.75438 ], [ -84.378297, 33.75437 ], [ -84.378044, 33.754368 ], [ -84.377363, 33.754378 ], [ -84.377298, 33.754379 ], [ -84.377099, 33.754376 ], [ -84.376604, 33.754371 ], [ -84.375544, 33.754355 ], [ -84.374384, 33.754337 ], [ -84.37336, 33.754322 ], [ -84.372422, 33.754309 ], [ -84.37215, 33.754305 ], [ -84.371286, 33.754295 ], [ -84.369769, 33.754278 ], [ -84.368828, 33.754282 ], [ -84.368562, 33.754283 ], [ -84.368027, 33.754285 ], [ -84.367498, 33.754287 ], [ -84.366551, 33.75429 ], [ -84.366444, 33.754291 ], [ -84.365863, 33.754297 ], [ -84.365599, 33.754312 ], [ -84.365617, 33.754242 ], [ -84.365791, 33.753851 ], [ -84.366268, 33.75328 ], [ -84.366323, 33.753215 ], [ -84.3666, 33.752984 ], [ -84.366842, 33.752754 ], [ -84.366935, 33.752666 ], [ -84.36698, 33.752629 ], [ -84.367086, 33.752523 ], [ -84.367248, 33.75237 ], [ -84.368362, 33.752078 ], [ -84.369133, 33.751836 ], [ -84.369871, 33.751612 ], [ -84.370491, 33.751434 ], [ -84.370976, 33.751284 ], [ -84.37217, 33.750916 ], [ -84.373348, 33.750533 ], [ -84.374128, 33.750253 ], [ -84.375093, 33.749926 ], [ -84.376294, 33.749564 ], [ -84.376636, 33.749461 ], [ -84.376945, 33.749372 ], [ -84.37768, 33.749186 ], [ -84.378404, 33.74904 ], [ -84.378835, 33.748964 ], [ -84.379047, 33.748935 ], [ -84.379541, 33.748892 ], [ -84.379663, 33.748881 ], [ -84.380133, 33.748853 ], [ -84.380525, 33.748853 ], [ -84.380758, 33.748868 ], [ -84.381016, 33.748884 ], [ -84.381506, 33.748923 ], [ -84.382132, 33.748903 ], [ -84.38251, 33.748886 ], [ -84.382727, 33.748877 ], [ -84.383153, 33.748907 ], [ -84.383313, 33.748923 ], [ -84.383493, 33.748941 ], [ -84.383746, 33.749 ], [ -84.383896, 33.749035 ], [ -84.384064, 33.749089 ], [ -84.384277, 33.749158 ], [ -84.384328, 33.74918 ], [ -84.384564, 33.749282 ], [ -84.38487, 33.749449 ], [ -84.385214, 33.749686 ], [ -84.385654, 33.749989 ], [ -84.386389, 33.750471 ], [ -84.387563, 33.75124 ], [ -84.387886, 33.751452 ], [ -84.388865, 33.752093 ], [ -84.389895, 33.752768 ], [ -84.390844, 33.753391 ], [ -84.39132, 33.753703 ], [ -84.391525, 33.753837 ], [ -84.392156, 33.754065 ], [ -84.392373, 33.754172 ], [ -84.392834, 33.754399 ], [ -84.39318, 33.754569 ], [ -84.393243, 33.754604 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "25", "COUNTYFP10": "025", "TRACTCE10": "030300", "GEOID10_TRACT": "25025030300", "NAME10": "303", "NAMELSAD10": "Census Tract 303", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 691377, "AWATER10": 234496, "INTPTLAT10": "+42.3600562", "INTPTLON10": "-071.0532861" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -71.045566, 42.359733 ], [ -71.049073, 42.354939 ], [ -71.049333, 42.354585 ], [ -71.049396, 42.354498 ], [ -71.049595, 42.354497 ], [ -71.050434, 42.354846 ], [ -71.050471, 42.354898 ], [ -71.050892, 42.35506 ], [ -71.05106, 42.355131 ], [ -71.050981, 42.355309 ], [ -71.050889, 42.355475 ], [ -71.050856, 42.355555 ], [ -71.050762, 42.356011 ], [ -71.050749, 42.356124 ], [ -71.050816, 42.35664 ], [ -71.051009, 42.356937 ], [ -71.051198, 42.357241 ], [ -71.05137, 42.357474 ], [ -71.051411, 42.357539 ], [ -71.051508, 42.357692 ], [ -71.051613, 42.357921 ], [ -71.051784, 42.358295 ], [ -71.051941, 42.358637 ], [ -71.051976, 42.358699 ], [ -71.052005, 42.358693 ], [ -71.052065, 42.358682 ], [ -71.052158, 42.358666 ], [ -71.052294, 42.358646 ], [ -71.052749, 42.358576 ], [ -71.053192, 42.358496 ], [ -71.053248, 42.358478 ], [ -71.053321, 42.358455 ], [ -71.053518, 42.358356 ], [ -71.053765, 42.358183 ], [ -71.053961, 42.358012 ], [ -71.054265, 42.357737 ], [ -71.05437, 42.357662 ], [ -71.054524, 42.357551 ], [ -71.054848, 42.35735 ], [ -71.05502, 42.357245 ], [ -71.05519, 42.357143 ], [ -71.055539, 42.356971 ], [ -71.055759, 42.356913 ], [ -71.056292, 42.356874 ], [ -71.05659, 42.356852 ], [ -71.057191, 42.356822 ], [ -71.05771, 42.356777 ], [ -71.057993, 42.356789 ], [ -71.058235, 42.356832 ], [ -71.058737, 42.356988 ], [ -71.058561, 42.357161 ], [ -71.05829, 42.35741 ], [ -71.058759, 42.357577 ], [ -71.059299, 42.357766 ], [ -71.059613, 42.357863 ], [ -71.060354, 42.358092 ], [ -71.061259, 42.358283 ], [ -71.06151, 42.358336 ], [ -71.061714, 42.358318 ], [ -71.061977, 42.358246 ], [ -71.062375, 42.358095 ], [ -71.062642, 42.357977 ], [ -71.062727, 42.358311 ], [ -71.062817, 42.358665 ], [ -71.062823, 42.358714 ], [ -71.062846, 42.358889 ], [ -71.062862, 42.359204 ], [ -71.062875, 42.359483 ], [ -71.062864, 42.36009 ], [ -71.062911, 42.361229 ], [ -71.062762, 42.361642 ], [ -71.062626, 42.361842 ], [ -71.062499, 42.362001 ], [ -71.062354, 42.362143 ], [ -71.062268, 42.362205 ], [ -71.062195, 42.362258 ], [ -71.061856, 42.36243 ], [ -71.061669, 42.362493 ], [ -71.061223, 42.362633 ], [ -71.060878, 42.362731 ], [ -71.060042, 42.362967 ], [ -71.059606, 42.36307 ], [ -71.059491, 42.363104 ], [ -71.058769, 42.363318 ], [ -71.058559, 42.363381 ], [ -71.0584, 42.363412 ], [ -71.058216, 42.363431 ], [ -71.058037, 42.363481 ], [ -71.057979, 42.363511 ], [ -71.057882, 42.363546 ], [ -71.057776, 42.363542 ], [ -71.057709, 42.363543 ], [ -71.05757, 42.36342 ], [ -71.057332, 42.36318 ], [ -71.057051, 42.362987 ], [ -71.056227, 42.362386 ], [ -71.056176, 42.362357 ], [ -71.05525, 42.36183 ], [ -71.055228, 42.361869 ], [ -71.055183, 42.361919 ], [ -71.055187, 42.361941 ], [ -71.055159, 42.361989 ], [ -71.055123, 42.362045 ], [ -71.055026, 42.362149 ], [ -71.05489, 42.362265 ], [ -71.054661, 42.36238 ], [ -71.054626, 42.362404 ], [ -71.054581, 42.362434 ], [ -71.054494, 42.362511 ], [ -71.054407, 42.362634 ], [ -71.054311, 42.362802 ], [ -71.054296, 42.36283 ], [ -71.05419, 42.362973 ], [ -71.054061, 42.363108 ], [ -71.053826, 42.363303 ], [ -71.053709, 42.363367 ], [ -71.053585, 42.363405 ], [ -71.053549, 42.363416 ], [ -71.053199, 42.363474 ], [ -71.053043, 42.363495 ], [ -71.052769, 42.36353 ], [ -71.05246, 42.363586 ], [ -71.05224, 42.363626 ], [ -71.052061, 42.36371 ], [ -71.051895, 42.363501 ], [ -71.051661, 42.363192 ], [ -71.051647, 42.36311 ], [ -71.051414, 42.363386 ], [ -71.05135, 42.36347 ], [ -71.051195, 42.36372 ], [ -71.051115, 42.363979 ], [ -71.051088, 42.364065 ], [ -71.05109, 42.364175 ], [ -71.0496, 42.364044 ], [ -71.049409, 42.364045 ], [ -71.046389, 42.363935 ], [ -71.045985, 42.362294 ], [ -71.045918, 42.361164 ], [ -71.0455, 42.359825 ], [ -71.045566, 42.359733 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "28", "COUNTYFP10": "047", "TRACTCE10": "003800", "GEOID10_TRACT": "28047003800", "NAME10": "38", "NAMELSAD10": "Census Tract 38", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 2304789, "AWATER10": 3104014, "INTPTLAT10": "+30.3577592", "INTPTLON10": "-089.1130708" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -89.101237, 30.347697 ], [ -89.117538, 30.342797 ], [ -89.124278, 30.343971 ], [ -89.124335, 30.353194 ], [ -89.124336, 30.353446 ], [ -89.124338, 30.353697 ], [ -89.124555, 30.354007 ], [ -89.124595, 30.353991 ], [ -89.124991, 30.354701 ], [ -89.125114, 30.354921 ], [ -89.125679, 30.355921 ], [ -89.127359, 30.358407 ], [ -89.127508, 30.358574 ], [ -89.127077, 30.35871 ], [ -89.124073, 30.359753 ], [ -89.12318, 30.360048 ], [ -89.122255, 30.360367 ], [ -89.121353, 30.360674 ], [ -89.120354, 30.36101 ], [ -89.117854, 30.36182 ], [ -89.116359, 30.362304 ], [ -89.11492, 30.362785 ], [ -89.113579, 30.363225 ], [ -89.112509, 30.363583 ], [ -89.11135, 30.363984 ], [ -89.11121, 30.364005 ], [ -89.110283, 30.364326 ], [ -89.109295, 30.364647 ], [ -89.108217, 30.365012 ], [ -89.107137, 30.365376 ], [ -89.105342, 30.365959 ], [ -89.102779, 30.36682 ], [ -89.101505, 30.367176 ], [ -89.100242, 30.367636 ], [ -89.098984, 30.368 ], [ -89.097738, 30.368327 ], [ -89.097572, 30.368365 ], [ -89.096742, 30.368555 ], [ -89.096574, 30.368614 ], [ -89.095317, 30.368959 ], [ -89.095334, 30.371183 ], [ -89.095338, 30.371317 ], [ -89.093988, 30.371319 ], [ -89.09397, 30.371327 ], [ -89.093034, 30.371329 ], [ -89.092869, 30.371322 ], [ -89.09153, 30.371326 ], [ -89.090312, 30.371327 ], [ -89.090136, 30.371327 ], [ -89.088809, 30.371327 ], [ -89.088797, 30.372373 ], [ -89.087557, 30.372377 ], [ -89.087432, 30.372371 ], [ -89.087429, 30.371074 ], [ -89.087429, 30.370979 ], [ -89.087431, 30.36924 ], [ -89.087424, 30.368559 ], [ -89.087394, 30.368228 ], [ -89.087398, 30.3681 ], [ -89.087408, 30.367653 ], [ -89.087405, 30.367552 ], [ -89.088805, 30.367086 ], [ -89.090137, 30.366643 ], [ -89.090263, 30.366603 ], [ -89.091459, 30.366215 ], [ -89.092643, 30.365831 ], [ -89.092912, 30.365758 ], [ -89.093006, 30.365732 ], [ -89.093168, 30.365712 ], [ -89.094308, 30.36534 ], [ -89.094388, 30.365301 ], [ -89.094683, 30.365183 ], [ -89.094739, 30.365156 ], [ -89.094852, 30.365118 ], [ -89.095644, 30.364853 ], [ -89.096427, 30.364604 ], [ -89.096534, 30.364567 ], [ -89.097512, 30.364275 ], [ -89.097679, 30.364234 ], [ -89.098915, 30.363843 ], [ -89.10016, 30.363411 ], [ -89.100979, 30.363155 ], [ -89.101422, 30.362993 ], [ -89.101423, 30.362631 ], [ -89.101426, 30.36174 ], [ -89.101417, 30.361088 ], [ -89.101237, 30.347697 ] ] ] } }
|
||||
]
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
from pathlib import Path
|
||||
from collections import namedtuple
|
||||
import geopandas as gpd
|
||||
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
|
||||
|
||||
|
||||
def test_add_tracts_for_geometries():
|
||||
field_names = ["latitude", "longitude", "expected_geoid"]
|
||||
DataPoint = namedtuple("DataPoint", field_names)
|
||||
# Pulled the tract IDs from the census geocoder
|
||||
records = [
|
||||
DataPoint(33.75649254612824, -84.39215035031984, "13121011900"),
|
||||
DataPoint(34.05289139656212, -118.2402117966315, "06037207400"),
|
||||
DataPoint(42.357500146415475, -71.0563146836545, "25025030300"),
|
||||
DataPoint(30.368185144529168, -89.0930992763473, "28047003800"),
|
||||
]
|
||||
df = gpd.GeoDataFrame.from_records(records, columns=field_names)
|
||||
df = gpd.GeoDataFrame(
|
||||
df,
|
||||
geometry=gpd.points_from_xy(
|
||||
x=df["longitude"],
|
||||
y=df["latitude"],
|
||||
),
|
||||
crs="epsg:4326",
|
||||
)
|
||||
tract_data = Path(__file__).parent / "data" / "us.geojson"
|
||||
enriched_df = add_tracts_for_geometries(df, _tract_data_path=tract_data)
|
||||
assert (df["expected_geoid"] == enriched_df["GEOID10_TRACT"]).all()
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Count of eligible Formerly Used Defense Site (FUDS) properties centroids,Count of ineligible Formerly Used Defense Site (FUDS) properties centroids,Is there at least one Formerly Used Defense Site (FUDS) in the tract?
|
||||
06027000800,3,14,True
|
||||
06061021322,1,2,True
|
||||
06069000802,1,0,True
|
||||
15001021010,1,2,True
|
||||
15001021101,0,1,False
|
||||
15001021402,1,2,True
|
||||
15001021800,1,2,True
|
||||
15003010201,2,1,True
|
||||
15007040603,0,2,False
|
||||
15007040604,1,2,True
|
||||
15007040700,1,2,True
|
||||
15009030100,0,1,False
|
||||
15009030201,1,2,True
|
||||
15009030402,1,2,True
|
||||
15009030800,1,2,True
|
|
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Count of eligible Formerly Used Defense Site (FUDS) properties centroids,Count of ineligible Formerly Used Defense Site (FUDS) properties centroids,Is there at least one Formerly Used Defense Site (FUDS) in the tract?
|
||||
06027000800,3,14,True
|
||||
06061021322,1,2,True
|
||||
06069000802,1,0,True
|
||||
15001021010,1,2,True
|
||||
15001021101,0,1,False
|
||||
15001021402,1,2,True
|
||||
15001021800,1,2,True
|
||||
15003010201,2,1,True
|
||||
15007040603,0,2,False
|
||||
15007040604,1,2,True
|
||||
15007040700,1,2,True
|
||||
15009030100,0,1,False
|
||||
15009030201,1,2,True
|
||||
15009030402,1,2,True
|
||||
15009030800,1,2,True
|
|
|
@ -0,0 +1,187 @@
|
|||
# pylint: disable=protected-access
|
||||
from unittest import mock
|
||||
import pathlib
|
||||
from data_pipeline.etl.base import ValidGeoLevel
|
||||
|
||||
from data_pipeline.etl.sources.us_army_fuds.etl import (
|
||||
USArmyFUDS,
|
||||
)
|
||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
def _fake_add_tracts_for_geometries(df):
|
||||
"""The actual geojoin is too slow for tests. Use precomputed results."""
|
||||
lookups = {
|
||||
(-121.39361572299998, 38.87463378900003): "06061021322",
|
||||
(-121.40020751999998, 38.897583008000026): "06061021322",
|
||||
(-121.40020751999998, 38.75158691400003): "06061021322",
|
||||
(-157.84301757799997, 21.53619384800004): "15003010201",
|
||||
(-157.85168456999997, 21.553405762000068): "15003010201",
|
||||
(-157.90679931599996, 21.554199219000054): "15003010201",
|
||||
(-159.52191162099996, 21.976623535000044): "15007040700",
|
||||
(-159.52996826199998, 21.93762207000003): "15007040700",
|
||||
(-159.52111816399997, 21.922607422000056): "15007040700",
|
||||
(-156.14270019499997, 20.840393066000047): "15009030100",
|
||||
(-155.85968017599998, 20.26519775400004): "15001021800",
|
||||
(-155.73327636699997, 20.166809082000043): "15001021800",
|
||||
(-155.89270019499997, 20.23522949200003): "15001021800",
|
||||
(-156.26019287099996, 20.899414062000062): "15009030201",
|
||||
(-156.22076415999996, 20.91241455100004): "15009030201",
|
||||
(-156.20739746099997, 20.890991211000028): "15009030201",
|
||||
(-159.46496581999997, 21.90460205100004): "15007040603",
|
||||
(-159.46441650399998, 21.905212402000075): "15007040603",
|
||||
(-154.82519531299997, 19.49182128900003): "15001021101",
|
||||
(-121.06768798799999, 36.61480712900004): "06069000802",
|
||||
(-117.391601563, 36.33343505900007): "06027000800",
|
||||
(-117.85546874999994, 36.46960449200003): "06027000800",
|
||||
(-117.23529052699996, 36.387634277000075): "06027000800",
|
||||
(-118.15270996099997, 36.725219727000024): "06027000800",
|
||||
(-118.13891601599994, 36.56683349600007): "06027000800",
|
||||
(-117.311096191, 36.783386230000076): "06027000800",
|
||||
(-118.00030517599998, 36.283813477000024): "06027000800",
|
||||
(-116.86248779299996, 36.46124267600004): "06027000800",
|
||||
(-117.16418456999997, 36.60681152300003): "06027000800",
|
||||
(-117.06939697299998, 36.158386230000076): "06027000800",
|
||||
(-117.873596191, 36.487609863000046): "06027000800",
|
||||
(-116.82971191399997, 36.283386230000076): "06027000800",
|
||||
(-117.21667480499997, 35.95843505900007): "06027000800",
|
||||
(-118.04998779299996, 36.59478759800004): "06027000800",
|
||||
(-117.03576660199997, 36.27801513700007): "06027000800",
|
||||
(-116.10028076199995, 35.83380127000004): "06027000800",
|
||||
(-117.86499023399995, 36.14422607400007): "06027000800",
|
||||
(-155.10320912843935, 19.497857096442765): "15001021010",
|
||||
(-155.91378674587037, 19.516632121497878): "15001021402",
|
||||
(-156.3306524489697, 20.825377142028497): "15009030402",
|
||||
(-156.5429023670438, 20.917074254751412): "15009030800",
|
||||
(-159.48416820625405, 21.907546119100093): "15007040604",
|
||||
}
|
||||
df["GEOID10_TRACT"] = df.geometry.apply(
|
||||
lambda point: lookups[(point.x, point.y)]
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
class TestUSArmyFUDSETL(TestETL):
|
||||
"""Tests the FUDS ETL.
|
||||
|
||||
This uses pytest-snapshot.
|
||||
To update individual snapshots: $ poetry run pytest
|
||||
data_pipeline/tests/sources/us_army_fuds/test_etl.py::TestClassNameETL::<testname>
|
||||
--snapshot-update
|
||||
"""
|
||||
|
||||
_ETL_CLASS = USArmyFUDS
|
||||
|
||||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||
_SAMPLE_DATA_FILE_NAME = "fuds.geojson"
|
||||
_SAMPLE_DATA_ZIP_FILE_NAME = "fuds.geojson"
|
||||
_EXTRACT_TMP_FOLDER_NAME = "USArmyFUDS"
|
||||
|
||||
def setup_method(self, _method, filename=__file__):
|
||||
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||
|
||||
This code can be copied identically between all child classes.
|
||||
"""
|
||||
super().setup_method(_method=_method, filename=filename)
|
||||
|
||||
def test_init(self, mock_etl, mock_paths):
|
||||
"""Tests that the mock NationalRiskIndexETL class instance was
|
||||
initiliazed correctly.
|
||||
|
||||
Validates the following conditions:
|
||||
- self.DATA_PATH points to the "data" folder in the temp directory
|
||||
- self.TMP_PATH points to the "data/tmp" folder in the temp directory
|
||||
- self.INPUT_PATH points to the correct path in the temp directory
|
||||
- self.OUTPUT_PATH points to the correct path in the temp directory
|
||||
"""
|
||||
# setup
|
||||
etl = self._ETL_CLASS()
|
||||
# validation
|
||||
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
||||
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
||||
assert etl.NAME == "us_army_fuds"
|
||||
assert etl.GEO_LEVEL == ValidGeoLevel.CENSUS_TRACT
|
||||
assert etl.COLUMNS_TO_KEEP == [
|
||||
etl.GEOID_TRACT_FIELD_NAME,
|
||||
etl.ELIGIBLE_FUDS_COUNT_FIELD_NAME,
|
||||
etl.INELIGIBLE_FUDS_COUNT_FIELD_NAME,
|
||||
etl.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||
]
|
||||
|
||||
def test_get_output_file_path(self, mock_etl, mock_paths):
|
||||
"""Tests the right file name is returned."""
|
||||
etl = self._ETL_CLASS()
|
||||
data_path, tmp_path = mock_paths
|
||||
|
||||
output_file_path = etl._get_output_file_path()
|
||||
expected_output_file_path = (
|
||||
data_path / "dataset" / self._ETL_CLASS.NAME / "usa.csv"
|
||||
)
|
||||
assert output_file_path == expected_output_file_path
|
||||
|
||||
def test_fixtures_contain_shared_tract_ids_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_fixtures_contain_shared_tract_ids_base(
|
||||
mock_etl, mock_paths
|
||||
)
|
||||
|
||||
def test_transform_base(self, snapshot, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_transform_base(
|
||||
snapshot=snapshot, mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
|
||||
def test_transform_sets_output_df_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_transform_sets_output_df_base(
|
||||
mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
|
||||
def test_validate_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_validate_base(mock_etl=mock_etl, mock_paths=mock_paths)
|
||||
|
||||
def test_full_etl_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_full_etl_base(mock_etl, mock_paths)
|
||||
|
||||
def test_get_data_frame_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_get_data_frame_base(mock_etl, mock_paths)
|
||||
|
||||
def test_tracts_without_fuds_not_in_results(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
etl = self._setup_etl_instance_and_run_extract(
|
||||
mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
etl.transform()
|
||||
etl.validate()
|
||||
etl.load()
|
||||
df = etl.get_data_frame()
|
||||
assert len(df[etl.GEOID_TRACT_FIELD_NAME]) == len(
|
||||
self._FIXTURES_SHARED_TRACT_IDS
|
||||
)
|
945
data/data-pipeline/poetry.lock
generated
945
data/data-pipeline/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -19,7 +19,7 @@ packages = [
|
|||
CensusData = "^1.13"
|
||||
click = "8.0.4" # pinning for now per https://github.com/psf/black/issues/2964
|
||||
dynaconf = "^3.1.4"
|
||||
geopandas = "^0.9.0"
|
||||
geopandas = "^0.11.0"
|
||||
ipdb = "^0.13.9"
|
||||
ipython = "^7.31.1"
|
||||
jupyter = "^1.0.0"
|
||||
|
@ -40,6 +40,7 @@ types-requests = "^2.25.0"
|
|||
us = "^2.0.2"
|
||||
xlsxwriter = "^2.0.0"
|
||||
pydantic = "^1.9.0"
|
||||
Rtree = "^1.0.0"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
black = {version = "^21.6b0", allow-prereleases = true}
|
||||
|
@ -58,6 +59,7 @@ pytest-snapshot = "^0.8.1"
|
|||
nb-black = "^1.0.7"
|
||||
seaborn = "^0.11.2"
|
||||
papermill = "^2.3.4"
|
||||
jupyterlab = "^3.4.4"
|
||||
|
||||
[build-system]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue