mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 12:01:17 -07:00
Add FUDS ETL (#1817)
* Add spatial join method (#1871) Since we'll need to figure out the tracts for a large number of points in future tickets, add a utility to handle grabbing the tract geometries and adding tract data to a point dataset. * Add FUDS, also jupyter lab (#1871) * Add YAML configs for FUDS (#1871) * Allow input geoid to be optional (#1871) * Add FUDS ETL, tests, test-datae noteobook (#1871) This adds the ETL class for Formerly Used Defense Sites (FUDS). This is different from most other ETLs since these FUDS are not provided by tract, but instead by geographic point, so we need to assign FUDS to tracts and then do calculations from there. * Floats -> Ints, as I intended (#1871) * Floats -> Ints, as I intended (#1871) * Formatting fixes (#1871) * Add test false positive GEOIDs (#1871) * Add gdal binaries (#1871) * Refactor pandas code to be more idiomatic (#1871) Per Emma, the more pandas-y way of doing my counts is using np.where to add the values i need, then groupby and size. It is definitely more compact, and also I think more correct! * Update configs per Emma suggestions (#1871) * Type fixed! (#1871) * Remove spurious import from vscode (#1871) * Snapshot update after changing col name (#1871) * Move up GDAL (#1871) * Adjust geojson strategy (#1871) * Try running census separately first (#1871) * Fix import order (#1871) * Cleanup cache strategy (#1871) * Download census data from S3 instead of re-calculating (#1871) * Clarify pandas code per Emma (#1871)
This commit is contained in:
parent
13e79087d1
commit
d5fbb802e8
22 changed files with 2534 additions and 416 deletions
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"type": "FeatureCollection",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "06", "COUNTYFP10": "037", "TRACTCE10": "207400", "GEOID10_TRACT": "06037207400", "NAME10": "2074", "NAMELSAD10": "Census Tract 2074", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 862884, "AWATER10": 6531, "INTPTLAT10": "+34.0561941", "INTPTLON10": "-118.2466502" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -118.25165, 34.057561 ], [ -118.251856, 34.057693 ], [ -118.251973, 34.057769 ], [ -118.253069, 34.058478 ], [ -118.253333, 34.058635 ], [ -118.253175, 34.058788 ], [ -118.252985, 34.058967 ], [ -118.252934, 34.059012 ], [ -118.252592, 34.059315 ], [ -118.252391, 34.059485 ], [ -118.252131, 34.059695 ], [ -118.251474, 34.060224 ], [ -118.251082, 34.060543 ], [ -118.250554, 34.060988 ], [ -118.249996, 34.061475 ], [ -118.248871, 34.06247 ], [ -118.248822, 34.062513 ], [ -118.248754, 34.062434 ], [ -118.247476, 34.060942 ], [ -118.247368, 34.060818 ], [ -118.247013, 34.06041 ], [ -118.24698, 34.060373 ], [ -118.246769, 34.060147 ], [ -118.246548, 34.059926 ], [ -118.246318, 34.059712 ], [ -118.246079, 34.059505 ], [ -118.245633, 34.059146 ], [ -118.245532, 34.059066 ], [ -118.245262, 34.058851 ], [ -118.244952, 34.058609 ], [ -118.244638, 34.05837 ], [ -118.244425, 34.058215 ], [ -118.244007, 34.057917 ], [ -118.243393, 34.057507 ], [ -118.243099, 34.057319 ], [ -118.24245, 34.056913 ], [ -118.241377, 34.056241 ], [ -118.241204, 34.056133 ], [ -118.240288, 34.055562 ], [ -118.239443, 34.055035 ], [ -118.238512, 34.054454 ], [ -118.238227, 34.054289 ], [ -118.238023, 34.054178 ], [ -118.237887, 34.054108 ], [ -118.2379, 34.054002 ], [ -118.237936, 34.053725 ], [ -118.237945, 34.053651 ], [ -118.237976, 34.052819 ], [ -118.238039, 34.05107 ], [ -118.239698, 34.052451 ], [ -118.239867, 34.051906 ], [ -118.240115, 34.0514 ], [ -118.240172, 34.051284 ], [ -118.240271, 34.051083 ], [ -118.240856, 34.050405 ], [ -118.242151, 34.051344 ], [ -118.242382, 34.051511 ], [ -118.24334, 34.050273 ], [ -118.244519, 34.051003 ], [ -118.245067, 34.051354 ], [ -118.245606, 34.051703 ], [ -118.246677, 34.052395 ], [ -118.247754, 34.053091 ], [ -118.248466, 34.053552 ], [ -118.248818, 34.05378 ], [ -118.249888, 34.054472 ], [ -118.25095, 34.055158 ], [ -118.251081, 34.055241 ], [ -118.250895, 34.055373 ], [ -118.250712, 34.05553 ], [ -118.250052, 34.056232 ], [ -118.249838, 34.056391 ], [ -118.25165, 34.057561 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "13", "COUNTYFP10": "121", "TRACTCE10": "011900", "GEOID10_TRACT": "13121011900", "NAME10": "119", "NAMELSAD10": "Census Tract 119", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 1530847, "AWATER10": 0, "INTPTLAT10": "+33.7539369", "INTPTLON10": "-084.3826910" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -84.393243, 33.754604 ], [ -84.393434, 33.754711 ], [ -84.393836, 33.75492 ], [ -84.39376, 33.755141 ], [ -84.394037, 33.756265 ], [ -84.394411, 33.757235 ], [ -84.394982, 33.758491 ], [ -84.394325, 33.758955 ], [ -84.393831, 33.759308 ], [ -84.393459, 33.759573 ], [ -84.393366, 33.759627 ], [ -84.393273, 33.759663 ], [ -84.393187, 33.759685 ], [ -84.392783, 33.75973 ], [ -84.392071, 33.759729 ], [ -84.390564, 33.759722 ], [ -84.389801, 33.759719 ], [ -84.389083, 33.759716 ], [ -84.387584, 33.759709 ], [ -84.387539, 33.759708 ], [ -84.386062, 33.759685 ], [ -84.384198, 33.759666 ], [ -84.38422, 33.758392 ], [ -84.384242, 33.757117 ], [ -84.384268, 33.755571 ], [ -84.384283, 33.75473 ], [ -84.384287, 33.754521 ], [ -84.384305, 33.754462 ], [ -84.382272, 33.754439 ], [ -84.381907, 33.754434 ], [ -84.380277, 33.754417 ], [ -84.3802, 33.754414 ], [ -84.379455, 33.754397 ], [ -84.379157, 33.75439 ], [ -84.378673, 33.754379 ], [ -84.378332, 33.75438 ], [ -84.378297, 33.75437 ], [ -84.378044, 33.754368 ], [ -84.377363, 33.754378 ], [ -84.377298, 33.754379 ], [ -84.377099, 33.754376 ], [ -84.376604, 33.754371 ], [ -84.375544, 33.754355 ], [ -84.374384, 33.754337 ], [ -84.37336, 33.754322 ], [ -84.372422, 33.754309 ], [ -84.37215, 33.754305 ], [ -84.371286, 33.754295 ], [ -84.369769, 33.754278 ], [ -84.368828, 33.754282 ], [ -84.368562, 33.754283 ], [ -84.368027, 33.754285 ], [ -84.367498, 33.754287 ], [ -84.366551, 33.75429 ], [ -84.366444, 33.754291 ], [ -84.365863, 33.754297 ], [ -84.365599, 33.754312 ], [ -84.365617, 33.754242 ], [ -84.365791, 33.753851 ], [ -84.366268, 33.75328 ], [ -84.366323, 33.753215 ], [ -84.3666, 33.752984 ], [ -84.366842, 33.752754 ], [ -84.366935, 33.752666 ], [ -84.36698, 33.752629 ], [ -84.367086, 33.752523 ], [ -84.367248, 33.75237 ], [ -84.368362, 33.752078 ], [ -84.369133, 33.751836 ], [ -84.369871, 33.751612 ], [ -84.370491, 33.751434 ], [ -84.370976, 33.751284 ], [ -84.37217, 33.750916 ], [ -84.373348, 33.750533 ], [ -84.374128, 33.750253 ], [ -84.375093, 33.749926 ], [ -84.376294, 33.749564 ], [ -84.376636, 33.749461 ], [ -84.376945, 33.749372 ], [ -84.37768, 33.749186 ], [ -84.378404, 33.74904 ], [ -84.378835, 33.748964 ], [ -84.379047, 33.748935 ], [ -84.379541, 33.748892 ], [ -84.379663, 33.748881 ], [ -84.380133, 33.748853 ], [ -84.380525, 33.748853 ], [ -84.380758, 33.748868 ], [ -84.381016, 33.748884 ], [ -84.381506, 33.748923 ], [ -84.382132, 33.748903 ], [ -84.38251, 33.748886 ], [ -84.382727, 33.748877 ], [ -84.383153, 33.748907 ], [ -84.383313, 33.748923 ], [ -84.383493, 33.748941 ], [ -84.383746, 33.749 ], [ -84.383896, 33.749035 ], [ -84.384064, 33.749089 ], [ -84.384277, 33.749158 ], [ -84.384328, 33.74918 ], [ -84.384564, 33.749282 ], [ -84.38487, 33.749449 ], [ -84.385214, 33.749686 ], [ -84.385654, 33.749989 ], [ -84.386389, 33.750471 ], [ -84.387563, 33.75124 ], [ -84.387886, 33.751452 ], [ -84.388865, 33.752093 ], [ -84.389895, 33.752768 ], [ -84.390844, 33.753391 ], [ -84.39132, 33.753703 ], [ -84.391525, 33.753837 ], [ -84.392156, 33.754065 ], [ -84.392373, 33.754172 ], [ -84.392834, 33.754399 ], [ -84.39318, 33.754569 ], [ -84.393243, 33.754604 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "25", "COUNTYFP10": "025", "TRACTCE10": "030300", "GEOID10_TRACT": "25025030300", "NAME10": "303", "NAMELSAD10": "Census Tract 303", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 691377, "AWATER10": 234496, "INTPTLAT10": "+42.3600562", "INTPTLON10": "-071.0532861" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -71.045566, 42.359733 ], [ -71.049073, 42.354939 ], [ -71.049333, 42.354585 ], [ -71.049396, 42.354498 ], [ -71.049595, 42.354497 ], [ -71.050434, 42.354846 ], [ -71.050471, 42.354898 ], [ -71.050892, 42.35506 ], [ -71.05106, 42.355131 ], [ -71.050981, 42.355309 ], [ -71.050889, 42.355475 ], [ -71.050856, 42.355555 ], [ -71.050762, 42.356011 ], [ -71.050749, 42.356124 ], [ -71.050816, 42.35664 ], [ -71.051009, 42.356937 ], [ -71.051198, 42.357241 ], [ -71.05137, 42.357474 ], [ -71.051411, 42.357539 ], [ -71.051508, 42.357692 ], [ -71.051613, 42.357921 ], [ -71.051784, 42.358295 ], [ -71.051941, 42.358637 ], [ -71.051976, 42.358699 ], [ -71.052005, 42.358693 ], [ -71.052065, 42.358682 ], [ -71.052158, 42.358666 ], [ -71.052294, 42.358646 ], [ -71.052749, 42.358576 ], [ -71.053192, 42.358496 ], [ -71.053248, 42.358478 ], [ -71.053321, 42.358455 ], [ -71.053518, 42.358356 ], [ -71.053765, 42.358183 ], [ -71.053961, 42.358012 ], [ -71.054265, 42.357737 ], [ -71.05437, 42.357662 ], [ -71.054524, 42.357551 ], [ -71.054848, 42.35735 ], [ -71.05502, 42.357245 ], [ -71.05519, 42.357143 ], [ -71.055539, 42.356971 ], [ -71.055759, 42.356913 ], [ -71.056292, 42.356874 ], [ -71.05659, 42.356852 ], [ -71.057191, 42.356822 ], [ -71.05771, 42.356777 ], [ -71.057993, 42.356789 ], [ -71.058235, 42.356832 ], [ -71.058737, 42.356988 ], [ -71.058561, 42.357161 ], [ -71.05829, 42.35741 ], [ -71.058759, 42.357577 ], [ -71.059299, 42.357766 ], [ -71.059613, 42.357863 ], [ -71.060354, 42.358092 ], [ -71.061259, 42.358283 ], [ -71.06151, 42.358336 ], [ -71.061714, 42.358318 ], [ -71.061977, 42.358246 ], [ -71.062375, 42.358095 ], [ -71.062642, 42.357977 ], [ -71.062727, 42.358311 ], [ -71.062817, 42.358665 ], [ -71.062823, 42.358714 ], [ -71.062846, 42.358889 ], [ -71.062862, 42.359204 ], [ -71.062875, 42.359483 ], [ -71.062864, 42.36009 ], [ -71.062911, 42.361229 ], [ -71.062762, 42.361642 ], [ -71.062626, 42.361842 ], [ -71.062499, 42.362001 ], [ -71.062354, 42.362143 ], [ -71.062268, 42.362205 ], [ -71.062195, 42.362258 ], [ -71.061856, 42.36243 ], [ -71.061669, 42.362493 ], [ -71.061223, 42.362633 ], [ -71.060878, 42.362731 ], [ -71.060042, 42.362967 ], [ -71.059606, 42.36307 ], [ -71.059491, 42.363104 ], [ -71.058769, 42.363318 ], [ -71.058559, 42.363381 ], [ -71.0584, 42.363412 ], [ -71.058216, 42.363431 ], [ -71.058037, 42.363481 ], [ -71.057979, 42.363511 ], [ -71.057882, 42.363546 ], [ -71.057776, 42.363542 ], [ -71.057709, 42.363543 ], [ -71.05757, 42.36342 ], [ -71.057332, 42.36318 ], [ -71.057051, 42.362987 ], [ -71.056227, 42.362386 ], [ -71.056176, 42.362357 ], [ -71.05525, 42.36183 ], [ -71.055228, 42.361869 ], [ -71.055183, 42.361919 ], [ -71.055187, 42.361941 ], [ -71.055159, 42.361989 ], [ -71.055123, 42.362045 ], [ -71.055026, 42.362149 ], [ -71.05489, 42.362265 ], [ -71.054661, 42.36238 ], [ -71.054626, 42.362404 ], [ -71.054581, 42.362434 ], [ -71.054494, 42.362511 ], [ -71.054407, 42.362634 ], [ -71.054311, 42.362802 ], [ -71.054296, 42.36283 ], [ -71.05419, 42.362973 ], [ -71.054061, 42.363108 ], [ -71.053826, 42.363303 ], [ -71.053709, 42.363367 ], [ -71.053585, 42.363405 ], [ -71.053549, 42.363416 ], [ -71.053199, 42.363474 ], [ -71.053043, 42.363495 ], [ -71.052769, 42.36353 ], [ -71.05246, 42.363586 ], [ -71.05224, 42.363626 ], [ -71.052061, 42.36371 ], [ -71.051895, 42.363501 ], [ -71.051661, 42.363192 ], [ -71.051647, 42.36311 ], [ -71.051414, 42.363386 ], [ -71.05135, 42.36347 ], [ -71.051195, 42.36372 ], [ -71.051115, 42.363979 ], [ -71.051088, 42.364065 ], [ -71.05109, 42.364175 ], [ -71.0496, 42.364044 ], [ -71.049409, 42.364045 ], [ -71.046389, 42.363935 ], [ -71.045985, 42.362294 ], [ -71.045918, 42.361164 ], [ -71.0455, 42.359825 ], [ -71.045566, 42.359733 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "28", "COUNTYFP10": "047", "TRACTCE10": "003800", "GEOID10_TRACT": "28047003800", "NAME10": "38", "NAMELSAD10": "Census Tract 38", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 2304789, "AWATER10": 3104014, "INTPTLAT10": "+30.3577592", "INTPTLON10": "-089.1130708" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -89.101237, 30.347697 ], [ -89.117538, 30.342797 ], [ -89.124278, 30.343971 ], [ -89.124335, 30.353194 ], [ -89.124336, 30.353446 ], [ -89.124338, 30.353697 ], [ -89.124555, 30.354007 ], [ -89.124595, 30.353991 ], [ -89.124991, 30.354701 ], [ -89.125114, 30.354921 ], [ -89.125679, 30.355921 ], [ -89.127359, 30.358407 ], [ -89.127508, 30.358574 ], [ -89.127077, 30.35871 ], [ -89.124073, 30.359753 ], [ -89.12318, 30.360048 ], [ -89.122255, 30.360367 ], [ -89.121353, 30.360674 ], [ -89.120354, 30.36101 ], [ -89.117854, 30.36182 ], [ -89.116359, 30.362304 ], [ -89.11492, 30.362785 ], [ -89.113579, 30.363225 ], [ -89.112509, 30.363583 ], [ -89.11135, 30.363984 ], [ -89.11121, 30.364005 ], [ -89.110283, 30.364326 ], [ -89.109295, 30.364647 ], [ -89.108217, 30.365012 ], [ -89.107137, 30.365376 ], [ -89.105342, 30.365959 ], [ -89.102779, 30.36682 ], [ -89.101505, 30.367176 ], [ -89.100242, 30.367636 ], [ -89.098984, 30.368 ], [ -89.097738, 30.368327 ], [ -89.097572, 30.368365 ], [ -89.096742, 30.368555 ], [ -89.096574, 30.368614 ], [ -89.095317, 30.368959 ], [ -89.095334, 30.371183 ], [ -89.095338, 30.371317 ], [ -89.093988, 30.371319 ], [ -89.09397, 30.371327 ], [ -89.093034, 30.371329 ], [ -89.092869, 30.371322 ], [ -89.09153, 30.371326 ], [ -89.090312, 30.371327 ], [ -89.090136, 30.371327 ], [ -89.088809, 30.371327 ], [ -89.088797, 30.372373 ], [ -89.087557, 30.372377 ], [ -89.087432, 30.372371 ], [ -89.087429, 30.371074 ], [ -89.087429, 30.370979 ], [ -89.087431, 30.36924 ], [ -89.087424, 30.368559 ], [ -89.087394, 30.368228 ], [ -89.087398, 30.3681 ], [ -89.087408, 30.367653 ], [ -89.087405, 30.367552 ], [ -89.088805, 30.367086 ], [ -89.090137, 30.366643 ], [ -89.090263, 30.366603 ], [ -89.091459, 30.366215 ], [ -89.092643, 30.365831 ], [ -89.092912, 30.365758 ], [ -89.093006, 30.365732 ], [ -89.093168, 30.365712 ], [ -89.094308, 30.36534 ], [ -89.094388, 30.365301 ], [ -89.094683, 30.365183 ], [ -89.094739, 30.365156 ], [ -89.094852, 30.365118 ], [ -89.095644, 30.364853 ], [ -89.096427, 30.364604 ], [ -89.096534, 30.364567 ], [ -89.097512, 30.364275 ], [ -89.097679, 30.364234 ], [ -89.098915, 30.363843 ], [ -89.10016, 30.363411 ], [ -89.100979, 30.363155 ], [ -89.101422, 30.362993 ], [ -89.101423, 30.362631 ], [ -89.101426, 30.36174 ], [ -89.101417, 30.361088 ], [ -89.101237, 30.347697 ] ] ] } }
|
||||
]
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
from pathlib import Path
|
||||
from collections import namedtuple
|
||||
import geopandas as gpd
|
||||
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
|
||||
|
||||
|
||||
def test_add_tracts_for_geometries():
|
||||
field_names = ["latitude", "longitude", "expected_geoid"]
|
||||
DataPoint = namedtuple("DataPoint", field_names)
|
||||
# Pulled the tract IDs from the census geocoder
|
||||
records = [
|
||||
DataPoint(33.75649254612824, -84.39215035031984, "13121011900"),
|
||||
DataPoint(34.05289139656212, -118.2402117966315, "06037207400"),
|
||||
DataPoint(42.357500146415475, -71.0563146836545, "25025030300"),
|
||||
DataPoint(30.368185144529168, -89.0930992763473, "28047003800"),
|
||||
]
|
||||
df = gpd.GeoDataFrame.from_records(records, columns=field_names)
|
||||
df = gpd.GeoDataFrame(
|
||||
df,
|
||||
geometry=gpd.points_from_xy(
|
||||
x=df["longitude"],
|
||||
y=df["latitude"],
|
||||
),
|
||||
crs="epsg:4326",
|
||||
)
|
||||
tract_data = Path(__file__).parent / "data" / "us.geojson"
|
||||
enriched_df = add_tracts_for_geometries(df, _tract_data_path=tract_data)
|
||||
assert (df["expected_geoid"] == enriched_df["GEOID10_TRACT"]).all()
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Count of eligible Formerly Used Defense Site (FUDS) properties centroids,Count of ineligible Formerly Used Defense Site (FUDS) properties centroids,Is there at least one Formerly Used Defense Site (FUDS) in the tract?
|
||||
06027000800,3,14,True
|
||||
06061021322,1,2,True
|
||||
06069000802,1,0,True
|
||||
15001021010,1,2,True
|
||||
15001021101,0,1,False
|
||||
15001021402,1,2,True
|
||||
15001021800,1,2,True
|
||||
15003010201,2,1,True
|
||||
15007040603,0,2,False
|
||||
15007040604,1,2,True
|
||||
15007040700,1,2,True
|
||||
15009030100,0,1,False
|
||||
15009030201,1,2,True
|
||||
15009030402,1,2,True
|
||||
15009030800,1,2,True
|
|
|
@ -0,0 +1,16 @@
|
|||
GEOID10_TRACT,Count of eligible Formerly Used Defense Site (FUDS) properties centroids,Count of ineligible Formerly Used Defense Site (FUDS) properties centroids,Is there at least one Formerly Used Defense Site (FUDS) in the tract?
|
||||
06027000800,3,14,True
|
||||
06061021322,1,2,True
|
||||
06069000802,1,0,True
|
||||
15001021010,1,2,True
|
||||
15001021101,0,1,False
|
||||
15001021402,1,2,True
|
||||
15001021800,1,2,True
|
||||
15003010201,2,1,True
|
||||
15007040603,0,2,False
|
||||
15007040604,1,2,True
|
||||
15007040700,1,2,True
|
||||
15009030100,0,1,False
|
||||
15009030201,1,2,True
|
||||
15009030402,1,2,True
|
||||
15009030800,1,2,True
|
|
|
@ -0,0 +1,187 @@
|
|||
# pylint: disable=protected-access
|
||||
from unittest import mock
|
||||
import pathlib
|
||||
from data_pipeline.etl.base import ValidGeoLevel
|
||||
|
||||
from data_pipeline.etl.sources.us_army_fuds.etl import (
|
||||
USArmyFUDS,
|
||||
)
|
||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
def _fake_add_tracts_for_geometries(df):
|
||||
"""The actual geojoin is too slow for tests. Use precomputed results."""
|
||||
lookups = {
|
||||
(-121.39361572299998, 38.87463378900003): "06061021322",
|
||||
(-121.40020751999998, 38.897583008000026): "06061021322",
|
||||
(-121.40020751999998, 38.75158691400003): "06061021322",
|
||||
(-157.84301757799997, 21.53619384800004): "15003010201",
|
||||
(-157.85168456999997, 21.553405762000068): "15003010201",
|
||||
(-157.90679931599996, 21.554199219000054): "15003010201",
|
||||
(-159.52191162099996, 21.976623535000044): "15007040700",
|
||||
(-159.52996826199998, 21.93762207000003): "15007040700",
|
||||
(-159.52111816399997, 21.922607422000056): "15007040700",
|
||||
(-156.14270019499997, 20.840393066000047): "15009030100",
|
||||
(-155.85968017599998, 20.26519775400004): "15001021800",
|
||||
(-155.73327636699997, 20.166809082000043): "15001021800",
|
||||
(-155.89270019499997, 20.23522949200003): "15001021800",
|
||||
(-156.26019287099996, 20.899414062000062): "15009030201",
|
||||
(-156.22076415999996, 20.91241455100004): "15009030201",
|
||||
(-156.20739746099997, 20.890991211000028): "15009030201",
|
||||
(-159.46496581999997, 21.90460205100004): "15007040603",
|
||||
(-159.46441650399998, 21.905212402000075): "15007040603",
|
||||
(-154.82519531299997, 19.49182128900003): "15001021101",
|
||||
(-121.06768798799999, 36.61480712900004): "06069000802",
|
||||
(-117.391601563, 36.33343505900007): "06027000800",
|
||||
(-117.85546874999994, 36.46960449200003): "06027000800",
|
||||
(-117.23529052699996, 36.387634277000075): "06027000800",
|
||||
(-118.15270996099997, 36.725219727000024): "06027000800",
|
||||
(-118.13891601599994, 36.56683349600007): "06027000800",
|
||||
(-117.311096191, 36.783386230000076): "06027000800",
|
||||
(-118.00030517599998, 36.283813477000024): "06027000800",
|
||||
(-116.86248779299996, 36.46124267600004): "06027000800",
|
||||
(-117.16418456999997, 36.60681152300003): "06027000800",
|
||||
(-117.06939697299998, 36.158386230000076): "06027000800",
|
||||
(-117.873596191, 36.487609863000046): "06027000800",
|
||||
(-116.82971191399997, 36.283386230000076): "06027000800",
|
||||
(-117.21667480499997, 35.95843505900007): "06027000800",
|
||||
(-118.04998779299996, 36.59478759800004): "06027000800",
|
||||
(-117.03576660199997, 36.27801513700007): "06027000800",
|
||||
(-116.10028076199995, 35.83380127000004): "06027000800",
|
||||
(-117.86499023399995, 36.14422607400007): "06027000800",
|
||||
(-155.10320912843935, 19.497857096442765): "15001021010",
|
||||
(-155.91378674587037, 19.516632121497878): "15001021402",
|
||||
(-156.3306524489697, 20.825377142028497): "15009030402",
|
||||
(-156.5429023670438, 20.917074254751412): "15009030800",
|
||||
(-159.48416820625405, 21.907546119100093): "15007040604",
|
||||
}
|
||||
df["GEOID10_TRACT"] = df.geometry.apply(
|
||||
lambda point: lookups[(point.x, point.y)]
|
||||
)
|
||||
return df
|
||||
|
||||
|
||||
class TestUSArmyFUDSETL(TestETL):
|
||||
"""Tests the FUDS ETL.
|
||||
|
||||
This uses pytest-snapshot.
|
||||
To update individual snapshots: $ poetry run pytest
|
||||
data_pipeline/tests/sources/us_army_fuds/test_etl.py::TestClassNameETL::<testname>
|
||||
--snapshot-update
|
||||
"""
|
||||
|
||||
_ETL_CLASS = USArmyFUDS
|
||||
|
||||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||
_SAMPLE_DATA_FILE_NAME = "fuds.geojson"
|
||||
_SAMPLE_DATA_ZIP_FILE_NAME = "fuds.geojson"
|
||||
_EXTRACT_TMP_FOLDER_NAME = "USArmyFUDS"
|
||||
|
||||
def setup_method(self, _method, filename=__file__):
|
||||
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||
|
||||
This code can be copied identically between all child classes.
|
||||
"""
|
||||
super().setup_method(_method=_method, filename=filename)
|
||||
|
||||
def test_init(self, mock_etl, mock_paths):
|
||||
"""Tests that the mock NationalRiskIndexETL class instance was
|
||||
initiliazed correctly.
|
||||
|
||||
Validates the following conditions:
|
||||
- self.DATA_PATH points to the "data" folder in the temp directory
|
||||
- self.TMP_PATH points to the "data/tmp" folder in the temp directory
|
||||
- self.INPUT_PATH points to the correct path in the temp directory
|
||||
- self.OUTPUT_PATH points to the correct path in the temp directory
|
||||
"""
|
||||
# setup
|
||||
etl = self._ETL_CLASS()
|
||||
# validation
|
||||
assert etl.GEOID_FIELD_NAME == "GEOID10"
|
||||
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
|
||||
assert etl.NAME == "us_army_fuds"
|
||||
assert etl.GEO_LEVEL == ValidGeoLevel.CENSUS_TRACT
|
||||
assert etl.COLUMNS_TO_KEEP == [
|
||||
etl.GEOID_TRACT_FIELD_NAME,
|
||||
etl.ELIGIBLE_FUDS_COUNT_FIELD_NAME,
|
||||
etl.INELIGIBLE_FUDS_COUNT_FIELD_NAME,
|
||||
etl.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||
]
|
||||
|
||||
def test_get_output_file_path(self, mock_etl, mock_paths):
|
||||
"""Tests the right file name is returned."""
|
||||
etl = self._ETL_CLASS()
|
||||
data_path, tmp_path = mock_paths
|
||||
|
||||
output_file_path = etl._get_output_file_path()
|
||||
expected_output_file_path = (
|
||||
data_path / "dataset" / self._ETL_CLASS.NAME / "usa.csv"
|
||||
)
|
||||
assert output_file_path == expected_output_file_path
|
||||
|
||||
def test_fixtures_contain_shared_tract_ids_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_fixtures_contain_shared_tract_ids_base(
|
||||
mock_etl, mock_paths
|
||||
)
|
||||
|
||||
def test_transform_base(self, snapshot, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_transform_base(
|
||||
snapshot=snapshot, mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
|
||||
def test_transform_sets_output_df_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_transform_sets_output_df_base(
|
||||
mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
|
||||
def test_validate_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
super().test_validate_base(mock_etl=mock_etl, mock_paths=mock_paths)
|
||||
|
||||
def test_full_etl_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_full_etl_base(mock_etl, mock_paths)
|
||||
|
||||
def test_get_data_frame_base(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
return super().test_get_data_frame_base(mock_etl, mock_paths)
|
||||
|
||||
def test_tracts_without_fuds_not_in_results(self, mock_etl, mock_paths):
|
||||
with mock.patch(
|
||||
"data_pipeline.etl.sources.us_army_fuds.etl.add_tracts_for_geometries",
|
||||
new=_fake_add_tracts_for_geometries,
|
||||
):
|
||||
etl = self._setup_etl_instance_and_run_extract(
|
||||
mock_etl=mock_etl, mock_paths=mock_paths
|
||||
)
|
||||
etl.transform()
|
||||
etl.validate()
|
||||
etl.load()
|
||||
df = etl.get_data_frame()
|
||||
assert len(df[etl.GEOID_TRACT_FIELD_NAME]) == len(
|
||||
self._FIXTURES_SHARED_TRACT_IDS
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue