diff --git a/data/data-pipeline/data_pipeline/ipython/eAMLIS_and_lat_long.ipynb b/data/data-pipeline/data_pipeline/ipython/eAMLIS_and_lat_long.ipynb
index f72a8c52..0dfb7cd4 100644
--- a/data/data-pipeline/data_pipeline/ipython/eAMLIS_and_lat_long.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/eAMLIS_and_lat_long.ipynb
@@ -10,13 +10,14 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "INFO: Pandarallel will run on 8 workers.\n",
+ "INFO: Pandarallel will run on 4 workers.\n",
"INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.\n"
]
}
],
"source": [
"# These are probably more imports than we need. Copied from comparison tool\n",
+ "import geopandas\n",
"import pandas as pd\n",
"import os\n",
"import sys\n",
@@ -29,7 +30,7 @@
"pandarallel.initialize(\n",
" progress_bar=True,\n",
" # If nb_workers is not set, it defaults to available cores.\n",
- " nb_workers=8,\n",
+ " # nb_workers=8,\n",
")\n",
"\n",
"\n",
@@ -53,49 +54,168 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 2,
"id": "4c907bdb",
- "metadata": {},
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "2022-07-11 19:42:16,399 [data_pipeline.etl.sources.census.etl_utils] INFO Downloading fips from S3 repository\n",
- "2022-07-11 19:42:16,403 [data_pipeline.utils] INFO Downloading https://justice40-data.s3.amazonaws.com/data-sources/fips_states_2010.zip\n",
- "2022-07-11 19:42:16,801 [data_pipeline.utils] INFO Extracting /Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/tmp/downloaded-b6413152-455e-47a5-817a-a96e3a892044.zip\n"
+ "2022-07-15 22:38:19,438 [data_pipeline.etl.sources.census.etl_utils] INFO Downloading fips from S3 repository\n",
+ "2022-07-15 22:38:19,443 [data_pipeline.utils] INFO Downloading https://justice40-data.s3.amazonaws.com/data-sources/fips_states_2010.zip\n",
+ "2022-07-15 22:38:19,867 [data_pipeline.utils] INFO Extracting /Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/tmp/downloaded-8d08eaa9-3c28-469c-8e52-f9ef049d9fce.zip\n"
]
}
],
"source": [
"# Define some input fields\n",
- "LAT_FIELD = \"Latitude\"\n",
- "LONG_FIELD = \"Longitude\"\n",
- "KEY_FIELD = \"AMLIS Key\"\n",
+ "EAMLIS_LAT_FIELD = \"Latitude\"\n",
+ "EAMLIS_LONG_FIELD = \"Longitude\"\n",
+ "EAMLIS_KEY_FIELD = \"AMLIS Key\"\n",
+ "\n",
+ "FUDS_LAT_FIELD = \"LATITUDE\"\n",
+ "FUDS_LONG_FIELD = \"LONGITUDE\"\n",
+ "FUDS_COLUMNS_TO_KEEP = [\n",
+ " FUDS_LAT_FIELD,\n",
+ " FUDS_LONG_FIELD,\n",
+ " \"FUDSUNIQUEPROPERTYNUMBER\",\n",
+ " \"CURRENTOWNER\",\n",
+ " \"ELIGIBILITY\",\n",
+ " \"EMSMGMTACTIONPLANLINK\",\n",
+ " \"FEATUREDESCRIPTION\",\n",
+ " \"FEATURENAME\",\n",
+ " \"FUDSINSTALLATIONID\",\n",
+ " \"HASPROJECTS\",\n",
+ " \"STATUS\",\n",
+ " \"PROPERTY_HISTORY\",\n",
+ "]\n",
"\n",
"\n",
- "# TODO: switch to whole US\n",
+ "# Geojson input fields\n",
"GEOJSON_PATH = CensusETL().GEOJSON_PATH / \"us.json\"\n",
- "# GEOJSON_PATH = CensusETL().GEOJSON_PATH / \"02.json\"\n",
"GEOJSON_TRACT_ID_FIELD = \"GEOID10\"\n",
"\n",
- "# Choose output directory:\n",
- "OUTPUT_DIR = ExtractTransformLoad.DATA_PATH / \"abandoned_mine_lands\"\n",
+ "\n",
+ "# Choose output directories:\n",
+ "FUDS_OUTPUT_DIR = ExtractTransformLoad.DATA_PATH / \"formerly_used_defense_sites\"\n",
"# Create directory if it doesn't exist\n",
- "OUTPUT_DIR.mkdir(parents=True, exist_ok=True)"
+ "FUDS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ "# Choose output directory:\n",
+ "EAMLIS_OUTPUT_DIR = ExtractTransformLoad.DATA_PATH / \"abandoned_mine_lands\"\n",
+ "# Create directory if it doesn't exist\n",
+ "EAMLIS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "d760ddc5",
+ "execution_count": 10,
+ "id": "a3796020",
"metadata": {},
+ "outputs": [],
+ "source": [
+ "# METHOD DEFINITIONS\n",
+ "def get_census_tract_for_one_coordinate(\n",
+ " geom_point: shapely.geometry.point.Point,\n",
+ " census_tract_gdf: geopandas.geodataframe.GeoDataFrame,\n",
+ ") -> str:\n",
+ " # (predicate=\"within\") and left join from mines to score[[\"tract\", \"geometry\"]].\n",
+ "\n",
+ " # TODO: consider switching this order from `all polygons CONTAIN single point` (e.g., all tracts CONTAIN single mine) to \n",
+ " # `all points WITHIN single polygon`. However, this requires refactoring the whole method to \n",
+ " # iterate through each polygon rather than each point. \n",
+ " \n",
+ " contains_result = census_tract_gdf.contains(geom_point)\n",
+ " count_of_census_tract_matches = len(census_tract_gdf[contains_result])\n",
+ "\n",
+ " if count_of_census_tract_matches == 0:\n",
+ " warnings.warn(\n",
+ " f\"Warning: no tract matches for {geom_point}\",\n",
+ " DeprecationWarning,\n",
+ " stacklevel=2,\n",
+ " )\n",
+ " census_tract_id = None\n",
+ "\n",
+ " elif count_of_census_tract_matches > 1:\n",
+ " warnings.warn(\n",
+ " f\"Warning: too many tract matches for {geom_point}\",\n",
+ " DeprecationWarning,\n",
+ " stacklevel=2,\n",
+ " )\n",
+ " census_tract_id = None\n",
+ "\n",
+ " else:\n",
+ " # With only one tract returned, extract the ID.\n",
+ " census_tract_id = census_tract_gdf[contains_result][\n",
+ " GEOJSON_TRACT_ID_FIELD\n",
+ " ].values[0]\n",
+ "\n",
+ " return census_tract_id\n",
+ "\n",
+ "\n",
+ "def get_census_tracts_for_geom_points(\n",
+ " points_gdf: geopandas.geodataframe.GeoDataFrame,\n",
+ " census_tract_gdf: geopandas.geodataframe.GeoDataFrame,\n",
+ ") -> geopandas.geodataframe.GeoDataFrame:\n",
+ " geometry_column_name = \"geometry\"\n",
+ " result_gdf = points_gdf.parallel_apply(\n",
+ " lambda frame: get_census_tract_for_one_coordinate(\n",
+ " geom_point=frame[geometry_column_name], census_tract_gdf=census_tract_gdf\n",
+ " ),\n",
+ " axis=1,\n",
+ " )\n",
+ " return result_gdf\n",
+ "\n",
+ "\n",
+ "def get_census_tracts_for_dataframe_with_lat_long(\n",
+ " coordinates_df: pd.DataFrame,\n",
+ " latitude_column: str,\n",
+ " longitude_column: str,\n",
+ " census_tract_gdf: geopandas.geodataframe.GeoDataFrame,\n",
+ "):\n",
+ " # First, convert the plain DataFrame into a geopandas data frame with lat/long geometry points.\n",
+ " coordinates_geopandas_gdf = geopandas.GeoDataFrame(\n",
+ " coordinates_df,\n",
+ " geometry=geopandas.points_from_xy(\n",
+ " x=coordinates_df[longitude_column],\n",
+ " y=coordinates_df[latitude_column],\n",
+ " ),\n",
+ " )\n",
+ "\n",
+ " # Find the tract IDs for each point.\n",
+ " tract_results = get_census_tracts_for_geom_points(\n",
+ " points_gdf=coordinates_geopandas_gdf, census_tract_gdf=census_tract_gdf\n",
+ " )\n",
+ "\n",
+ " # Join the tract IDs back on the original dataframe\n",
+ " coordinates_with_tracts_df = coordinates_df\n",
+ " coordinates_with_tracts_df[\n",
+ " ExtractTransformLoad.GEOID_TRACT_FIELD_NAME\n",
+ " ] = tract_results\n",
+ "\n",
+ " # Remove unnecessary `geometry` column\n",
+ " # For unclear reasons, the initial `GeoDataFrame` creates a `geometry` column on the input dataframe that we don't want.\n",
+ " coordinates_with_tracts_df = coordinates_with_tracts_df.drop(\"geometry\", axis=1)\n",
+ "\n",
+ " return coordinates_with_tracts_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "d760ddc5",
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Code took 884.5575151443481 seconds.\n",
+ "Code took 308.0567150115967 seconds.\n",
" STATEFP10 COUNTYFP10 TRACTCE10 GEOID10 NAME10 \\\n",
"0 27 139 080202 27139080202 802.02 \n",
"1 27 139 080204 27139080204 802.04 \n",
@@ -153,10 +273,9 @@
}
],
"source": [
- "# Load geojson\n",
- "import geopandas\n",
- "\n",
"t1 = time.time()\n",
+ "\n",
+ "# Takes ~4 minutes with all of USA.\n",
"census_tract_gdf = geopandas.read_file(\n",
" GEOJSON_PATH,\n",
" # Use `pyogrio` because it's vectorized and faster.\n",
@@ -170,9 +289,824 @@
"print(census_tract_gdf)"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "02ae1a0c",
+ "metadata": {},
+ "source": [
+ "# Start work on FUDS"
+ ]
+ },
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 17,
+ "id": "f8badb15",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2022-07-15 22:52:44,802 [data_pipeline.utils] INFO Downloading https://justice40-data.s3.amazonaws.com/data-sources/fuds_all_fy2019.csv.zip\n",
+ "2022-07-15 22:53:12,094 [data_pipeline.utils] INFO Extracting /Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/tmp/abandoned_mine_lands/downloaded-17fdff8f-5857-40b5-8de7-c79944f99095.zip\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " LATITUDE | \n",
+ " LONGITUDE | \n",
+ " FUDSUNIQUEPROPERTYNUMBER | \n",
+ " CURRENTOWNER | \n",
+ " ELIGIBILITY | \n",
+ " EMSMGMTACTIONPLANLINK | \n",
+ " FEATUREDESCRIPTION | \n",
+ " FEATURENAME | \n",
+ " FUDSINSTALLATIONID | \n",
+ " HASPROJECTS | \n",
+ " STATUS | \n",
+ " PROPERTY_HISTORY | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 30.098611 | \n",
+ " -93.722222 | \n",
+ " K06TX0667 | \n",
+ " LOCAL: CITY INDIVIDUAL OWNERS | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " The site was initially acquired in 1946 and us... | \n",
+ " ORANGE PORT OF NAV SHIP STOR | \n",
+ " TX69799F675300 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " The site was initially acquired in 1946 and us... | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 33.809700 | \n",
+ " -95.628304 | \n",
+ " K06TX0305 | \n",
+ " DOD: USACE -AGRICULTURAL, RECREATIONAL, AND FL... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " Camp Maxey was activated in July 1942. It was ... | \n",
+ " CAMP MAXEY | \n",
+ " TX69799F668600 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " Camp Maxey was activated in July 1942. It was ... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 35.746111 | \n",
+ " -95.412778 | \n",
+ " K06OK0186 | \n",
+ " LOCAL: CITY CITY MUNICIPAL AIRFIELD\\n | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " HATBOX DOWNTOWN AIRPORT DATES BACK TO 1912 AND... | \n",
+ " MUSKOGEE AUX AF | \n",
+ " OK69799F639800 | \n",
+ " Yes | \n",
+ " Properties with all projects at site closeout | \n",
+ " HATBOX DOWNTOWN AIRPORT DATES BACK TO 1912 AND... | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 36.226944 | \n",
+ " -95.330000 | \n",
+ " K06OK0025 | \n",
+ " PRIV: PRIVATE CURRENTLY USED AS AN INDUSTRIAL ... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " The DoD began use in the early 1940s when the ... | \n",
+ " OKLAHOMA ORDNANCE WORKS | \n",
+ " OK69799F636200 | \n",
+ " Yes | \n",
+ " Properties with all projects at site closeout | \n",
+ " The DoD began use in the early 1940s when the ... | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 36.023333 | \n",
+ " -102.541667 | \n",
+ " K06TX0268 | \n",
+ " LOCAL: CITY THE SITE IS NOW USED AS A MUNICIPA... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " In 1942, the DoD acquired 6,235.16 acres for u... | \n",
+ " DALHART AAF | \n",
+ " TX69799F665100 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " In 1942, the DoD acquired 6,235.16 acres for u... | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10080 | \n",
+ " 51.379444 | \n",
+ " 179.293889 | \n",
+ " F10AK0858 | \n",
+ " FWS: USFWSTRIBE: NATIVE AMERICAN ANCSA NATIVE ... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " AMCHITKA AF AUXILIARY FIELD | \n",
+ " AK09799F709900 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10100 | \n",
+ " 59.266111 | \n",
+ " -135.448889 | \n",
+ " F10AK1016 | \n",
+ " OTHER: Private Landowner has not been identifi... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " HAINES FAIRBANKS PIPELINE | \n",
+ " AK09799F980700 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10101 | \n",
+ " 61.200278 | \n",
+ " -149.900278 | \n",
+ " F10AK1023 | \n",
+ " STATE: STATE ALL BUILDINGS TURNED OVER TO THE ... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " CAMP ANCHORAGE ARMY | \n",
+ " AK09799FA25200 | \n",
+ " Yes | \n",
+ " Properties with all projects at site closeout | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10102 | \n",
+ " 60.555556 | \n",
+ " -151.267778 | \n",
+ " F10AK1024 | \n",
+ " NaN | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " FORT KENAI ARMY POST | \n",
+ " AK09799FA25300 | \n",
+ " Yes | \n",
+ " Properties with all projects at site closeout | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10105 | \n",
+ " 63.919444 | \n",
+ " -145.303889 | \n",
+ " F10AK1033 | \n",
+ " OTHER: Native AllotmentBLM: BLMPUBLC: AT&T STA... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " CANOL PIPELINE | \n",
+ " AK09799FA27900 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2866 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " LATITUDE LONGITUDE FUDSUNIQUEPROPERTYNUMBER \\\n",
+ "0 30.098611 -93.722222 K06TX0667 \n",
+ "1 33.809700 -95.628304 K06TX0305 \n",
+ "4 35.746111 -95.412778 K06OK0186 \n",
+ "6 36.226944 -95.330000 K06OK0025 \n",
+ "9 36.023333 -102.541667 K06TX0268 \n",
+ "... ... ... ... \n",
+ "10080 51.379444 179.293889 F10AK0858 \n",
+ "10100 59.266111 -135.448889 F10AK1016 \n",
+ "10101 61.200278 -149.900278 F10AK1023 \n",
+ "10102 60.555556 -151.267778 F10AK1024 \n",
+ "10105 63.919444 -145.303889 F10AK1033 \n",
+ "\n",
+ " CURRENTOWNER ELIGIBILITY \\\n",
+ "0 LOCAL: CITY INDIVIDUAL OWNERS Eligible \n",
+ "1 DOD: USACE -AGRICULTURAL, RECREATIONAL, AND FL... Eligible \n",
+ "4 LOCAL: CITY CITY MUNICIPAL AIRFIELD\\n Eligible \n",
+ "6 PRIV: PRIVATE CURRENTLY USED AS AN INDUSTRIAL ... Eligible \n",
+ "9 LOCAL: CITY THE SITE IS NOW USED AS A MUNICIPA... Eligible \n",
+ "... ... ... \n",
+ "10080 FWS: USFWSTRIBE: NATIVE AMERICAN ANCSA NATIVE ... Eligible \n",
+ "10100 OTHER: Private Landowner has not been identifi... Eligible \n",
+ "10101 STATE: STATE ALL BUILDINGS TURNED OVER TO THE ... Eligible \n",
+ "10102 NaN Eligible \n",
+ "10105 OTHER: Native AllotmentBLM: BLMPUBLC: AT&T STA... Eligible \n",
+ "\n",
+ " EMSMGMTACTIONPLANLINK \\\n",
+ "0 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "1 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "4 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "6 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "9 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "... ... \n",
+ "10080 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "10100 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "10101 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "10102 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "10105 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "\n",
+ " FEATUREDESCRIPTION \\\n",
+ "0 The site was initially acquired in 1946 and us... \n",
+ "1 Camp Maxey was activated in July 1942. It was ... \n",
+ "4 HATBOX DOWNTOWN AIRPORT DATES BACK TO 1912 AND... \n",
+ "6 The DoD began use in the early 1940s when the ... \n",
+ "9 In 1942, the DoD acquired 6,235.16 acres for u... \n",
+ "... ... \n",
+ "10080 NaN \n",
+ "10100 NaN \n",
+ "10101 NaN \n",
+ "10102 NaN \n",
+ "10105 NaN \n",
+ "\n",
+ " FEATURENAME FUDSINSTALLATIONID HASPROJECTS \\\n",
+ "0 ORANGE PORT OF NAV SHIP STOR TX69799F675300 Yes \n",
+ "1 CAMP MAXEY TX69799F668600 Yes \n",
+ "4 MUSKOGEE AUX AF OK69799F639800 Yes \n",
+ "6 OKLAHOMA ORDNANCE WORKS OK69799F636200 Yes \n",
+ "9 DALHART AAF TX69799F665100 Yes \n",
+ "... ... ... ... \n",
+ "10080 AMCHITKA AF AUXILIARY FIELD AK09799F709900 Yes \n",
+ "10100 HAINES FAIRBANKS PIPELINE AK09799F980700 Yes \n",
+ "10101 CAMP ANCHORAGE ARMY AK09799FA25200 Yes \n",
+ "10102 FORT KENAI ARMY POST AK09799FA25300 Yes \n",
+ "10105 CANOL PIPELINE AK09799FA27900 Yes \n",
+ "\n",
+ " STATUS \\\n",
+ "0 Properties with projects \n",
+ "1 Properties with projects \n",
+ "4 Properties with all projects at site closeout \n",
+ "6 Properties with all projects at site closeout \n",
+ "9 Properties with projects \n",
+ "... ... \n",
+ "10080 Properties with projects \n",
+ "10100 Properties with projects \n",
+ "10101 Properties with all projects at site closeout \n",
+ "10102 Properties with all projects at site closeout \n",
+ "10105 Properties with projects \n",
+ "\n",
+ " PROPERTY_HISTORY \n",
+ "0 The site was initially acquired in 1946 and us... \n",
+ "1 Camp Maxey was activated in July 1942. It was ... \n",
+ "4 HATBOX DOWNTOWN AIRPORT DATES BACK TO 1912 AND... \n",
+ "6 The DoD began use in the early 1940s when the ... \n",
+ "9 In 1942, the DoD acquired 6,235.16 acres for u... \n",
+ "... ... \n",
+ "10080 NaN \n",
+ "10100 NaN \n",
+ "10101 NaN \n",
+ "10102 NaN \n",
+ "10105 NaN \n",
+ "\n",
+ "[2866 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Data accessed from: \n",
+ "# \"https://opendata.arcgis.com/api/v3/datasets/3f8354667d5b4b1b8ad7a6e00c3cf3b1_1/downloads/data?format=csv&spatialRefId=4326&where=1%3D1\"\n",
+ "\n",
+ "# fuds_url = \"https://opendata.arcgis.com/api/v3/datasets/3f8354667d5b4b1b8ad7a6e00c3cf3b1_1/downloads/data?format=csv&spatialRefId=4326&where=1%3D1\"\n",
+ "\n",
+ "# Create temporary path\n",
+ "fuds_tmp_path = ExtractTransformLoad.DATA_PATH / \"tmp\" / \"abandoned_mine_lands\"\n",
+ "# Create directory if it doesn't exist\n",
+ "fuds_tmp_path.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ "fuds_path_in_s3 = (\n",
+ " settings.AWS_JUSTICE40_DATASOURCES_URL + \"/fuds_all_fy2019.csv.zip\"\n",
+ ")\n",
+ "\n",
+ "unzip_file_from_url(\n",
+ " file_url=fuds_path_in_s3,\n",
+ " download_path=fuds_tmp_path,\n",
+ " unzipped_file_path=fuds_tmp_path,\n",
+ ")\n",
+ "\n",
+ "fuds_path = fuds_tmp_path / \"fuds_all_fy2019.csv\"\n",
+ "\n",
+ "fuds_source_df = pd.read_csv(\n",
+ " filepath_or_buffer=fuds_path\n",
+ ")\n",
+ "\n",
+ "# Only keep \"eligible\" sites with projects.\n",
+ "# TODO: confirm this is an appropriate interpretation of the eligible field.\n",
+ "fuds_source_df = fuds_source_df[fuds_source_df[\"ELIGIBILITY\"] == \"Eligible\"]\n",
+ "fuds_source_df = fuds_source_df[fuds_source_df[\"HASPROJECTS\"] == \"Yes\"]\n",
+ "\n",
+ "# Drop columns that are not meaningful.\n",
+ "fuds_source_df = fuds_source_df[FUDS_COLUMNS_TO_KEEP]\n",
+ "\n",
+ "fuds_source_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "700e5d7b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# TODO: delete! \n",
+ "# fuds_source_df_backup = fuds_source_df\n",
+ "\n",
+ "# fuds_source_df = fuds_source_df[0:100]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "5c47a326",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/Users/lucas/.virtualenvs/scoring2/lib/python3.9/site-packages/geopandas/array.py:275: ShapelyDeprecationWarning: The array interface is deprecated and will no longer work in Shapely 2.0. Convert the '.coords' to a numpy array instead.\n",
+ " return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "23c8c927612849cba95965eb6dc88f38",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=717), Label(value='0 / 717'))), HB…"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_31785/1938123.py:46: DeprecationWarning: Warning: no tract matches for POINT (-157.93194444 21.20222222)\n",
+ " lambda frame: get_census_tract_for_one_coordinate(\n",
+ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_31785/1938123.py:46: DeprecationWarning: Warning: no tract matches for POINT EMPTY\n",
+ " lambda frame: get_census_tract_for_one_coordinate(\n",
+ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_31785/1938123.py:46: DeprecationWarning: Warning: no tract matches for POINT (-162.082001 5.88724)\n",
+ " lambda frame: get_census_tract_for_one_coordinate(\n",
+ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_31785/1938123.py:46: DeprecationWarning: Warning: no tract matches for POINT (-157.93055556 21.24666667)\n",
+ " lambda frame: get_census_tract_for_one_coordinate(\n",
+ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_31785/1938123.py:46: DeprecationWarning: Warning: no tract matches for POINT EMPTY\n",
+ " lambda frame: get_census_tract_for_one_coordinate(\n",
+ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_31785/1938123.py:46: DeprecationWarning: Warning: no tract matches for POINT (76.454 38.33174)\n",
+ " lambda frame: get_census_tract_for_one_coordinate(\n",
+ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_31785/1938123.py:46: DeprecationWarning: Warning: no tract matches for POINT (-170.0581 -14.27345)\n",
+ " lambda frame: get_census_tract_for_one_coordinate(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Code took 2248.5825169086456 seconds.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " LATITUDE | \n",
+ " LONGITUDE | \n",
+ " FUDSUNIQUEPROPERTYNUMBER | \n",
+ " CURRENTOWNER | \n",
+ " ELIGIBILITY | \n",
+ " EMSMGMTACTIONPLANLINK | \n",
+ " FEATUREDESCRIPTION | \n",
+ " FEATURENAME | \n",
+ " FUDSINSTALLATIONID | \n",
+ " HASPROJECTS | \n",
+ " STATUS | \n",
+ " PROPERTY_HISTORY | \n",
+ " GEOID10_TRACT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 30.098611 | \n",
+ " -93.722222 | \n",
+ " K06TX0667 | \n",
+ " LOCAL: CITY INDIVIDUAL OWNERS | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " The site was initially acquired in 1946 and us... | \n",
+ " ORANGE PORT OF NAV SHIP STOR | \n",
+ " TX69799F675300 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " The site was initially acquired in 1946 and us... | \n",
+ " 48361020200 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 33.809700 | \n",
+ " -95.628304 | \n",
+ " K06TX0305 | \n",
+ " DOD: USACE -AGRICULTURAL, RECREATIONAL, AND FL... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " Camp Maxey was activated in July 1942. It was ... | \n",
+ " CAMP MAXEY | \n",
+ " TX69799F668600 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " Camp Maxey was activated in July 1942. It was ... | \n",
+ " 48277000102 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 35.746111 | \n",
+ " -95.412778 | \n",
+ " K06OK0186 | \n",
+ " LOCAL: CITY CITY MUNICIPAL AIRFIELD\\n | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " HATBOX DOWNTOWN AIRPORT DATES BACK TO 1912 AND... | \n",
+ " MUSKOGEE AUX AF | \n",
+ " OK69799F639800 | \n",
+ " Yes | \n",
+ " Properties with all projects at site closeout | \n",
+ " HATBOX DOWNTOWN AIRPORT DATES BACK TO 1912 AND... | \n",
+ " 40101000100 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 36.226944 | \n",
+ " -95.330000 | \n",
+ " K06OK0025 | \n",
+ " PRIV: PRIVATE CURRENTLY USED AS AN INDUSTRIAL ... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " The DoD began use in the early 1940s when the ... | \n",
+ " OKLAHOMA ORDNANCE WORKS | \n",
+ " OK69799F636200 | \n",
+ " Yes | \n",
+ " Properties with all projects at site closeout | \n",
+ " The DoD began use in the early 1940s when the ... | \n",
+ " 40097040400 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 36.023333 | \n",
+ " -102.541667 | \n",
+ " K06TX0268 | \n",
+ " LOCAL: CITY THE SITE IS NOW USED AS A MUNICIPA... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/inventor... | \n",
+ " In 1942, the DoD acquired 6,235.16 acres for u... | \n",
+ " DALHART AAF | \n",
+ " TX69799F665100 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " In 1942, the DoD acquired 6,235.16 acres for u... | \n",
+ " 48205950200 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10080 | \n",
+ " 51.379444 | \n",
+ " 179.293889 | \n",
+ " F10AK0858 | \n",
+ " FWS: USFWSTRIBE: NATIVE AMERICAN ANCSA NATIVE ... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " AMCHITKA AF AUXILIARY FIELD | \n",
+ " AK09799F709900 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " NaN | \n",
+ " 02016000100 | \n",
+ "
\n",
+ " \n",
+ " 10100 | \n",
+ " 59.266111 | \n",
+ " -135.448889 | \n",
+ " F10AK1016 | \n",
+ " OTHER: Private Landowner has not been identifi... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " HAINES FAIRBANKS PIPELINE | \n",
+ " AK09799F980700 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " NaN | \n",
+ " 02100000100 | \n",
+ "
\n",
+ " \n",
+ " 10101 | \n",
+ " 61.200278 | \n",
+ " -149.900278 | \n",
+ " F10AK1023 | \n",
+ " STATE: STATE ALL BUILDINGS TURNED OVER TO THE ... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " CAMP ANCHORAGE ARMY | \n",
+ " AK09799FA25200 | \n",
+ " Yes | \n",
+ " Properties with all projects at site closeout | \n",
+ " NaN | \n",
+ " 02020001400 | \n",
+ "
\n",
+ " \n",
+ " 10102 | \n",
+ " 60.555556 | \n",
+ " -151.267778 | \n",
+ " F10AK1024 | \n",
+ " NaN | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " FORT KENAI ARMY POST | \n",
+ " AK09799FA25300 | \n",
+ " Yes | \n",
+ " Properties with all projects at site closeout | \n",
+ " NaN | \n",
+ " 02122000600 | \n",
+ "
\n",
+ " \n",
+ " 10105 | \n",
+ " 63.919444 | \n",
+ " -145.303889 | \n",
+ " F10AK1033 | \n",
+ " OTHER: Native AllotmentBLM: BLMPUBLC: AT&T STA... | \n",
+ " Eligible | \n",
+ " https://fudsportal.usace.army.mil/ems/ems/inve... | \n",
+ " NaN | \n",
+ " CANOL PIPELINE | \n",
+ " AK09799FA27900 | \n",
+ " Yes | \n",
+ " Properties with projects | \n",
+ " NaN | \n",
+ " 02240000400 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2866 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " LATITUDE LONGITUDE FUDSUNIQUEPROPERTYNUMBER \\\n",
+ "0 30.098611 -93.722222 K06TX0667 \n",
+ "1 33.809700 -95.628304 K06TX0305 \n",
+ "4 35.746111 -95.412778 K06OK0186 \n",
+ "6 36.226944 -95.330000 K06OK0025 \n",
+ "9 36.023333 -102.541667 K06TX0268 \n",
+ "... ... ... ... \n",
+ "10080 51.379444 179.293889 F10AK0858 \n",
+ "10100 59.266111 -135.448889 F10AK1016 \n",
+ "10101 61.200278 -149.900278 F10AK1023 \n",
+ "10102 60.555556 -151.267778 F10AK1024 \n",
+ "10105 63.919444 -145.303889 F10AK1033 \n",
+ "\n",
+ " CURRENTOWNER ELIGIBILITY \\\n",
+ "0 LOCAL: CITY INDIVIDUAL OWNERS Eligible \n",
+ "1 DOD: USACE -AGRICULTURAL, RECREATIONAL, AND FL... Eligible \n",
+ "4 LOCAL: CITY CITY MUNICIPAL AIRFIELD\\n Eligible \n",
+ "6 PRIV: PRIVATE CURRENTLY USED AS AN INDUSTRIAL ... Eligible \n",
+ "9 LOCAL: CITY THE SITE IS NOW USED AS A MUNICIPA... Eligible \n",
+ "... ... ... \n",
+ "10080 FWS: USFWSTRIBE: NATIVE AMERICAN ANCSA NATIVE ... Eligible \n",
+ "10100 OTHER: Private Landowner has not been identifi... Eligible \n",
+ "10101 STATE: STATE ALL BUILDINGS TURNED OVER TO THE ... Eligible \n",
+ "10102 NaN Eligible \n",
+ "10105 OTHER: Native AllotmentBLM: BLMPUBLC: AT&T STA... Eligible \n",
+ "\n",
+ " EMSMGMTACTIONPLANLINK \\\n",
+ "0 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "1 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "4 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "6 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "9 https://fudsportal.usace.army.mil/ems/inventor... \n",
+ "... ... \n",
+ "10080 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "10100 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "10101 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "10102 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "10105 https://fudsportal.usace.army.mil/ems/ems/inve... \n",
+ "\n",
+ " FEATUREDESCRIPTION \\\n",
+ "0 The site was initially acquired in 1946 and us... \n",
+ "1 Camp Maxey was activated in July 1942. It was ... \n",
+ "4 HATBOX DOWNTOWN AIRPORT DATES BACK TO 1912 AND... \n",
+ "6 The DoD began use in the early 1940s when the ... \n",
+ "9 In 1942, the DoD acquired 6,235.16 acres for u... \n",
+ "... ... \n",
+ "10080 NaN \n",
+ "10100 NaN \n",
+ "10101 NaN \n",
+ "10102 NaN \n",
+ "10105 NaN \n",
+ "\n",
+ " FEATURENAME FUDSINSTALLATIONID HASPROJECTS \\\n",
+ "0 ORANGE PORT OF NAV SHIP STOR TX69799F675300 Yes \n",
+ "1 CAMP MAXEY TX69799F668600 Yes \n",
+ "4 MUSKOGEE AUX AF OK69799F639800 Yes \n",
+ "6 OKLAHOMA ORDNANCE WORKS OK69799F636200 Yes \n",
+ "9 DALHART AAF TX69799F665100 Yes \n",
+ "... ... ... ... \n",
+ "10080 AMCHITKA AF AUXILIARY FIELD AK09799F709900 Yes \n",
+ "10100 HAINES FAIRBANKS PIPELINE AK09799F980700 Yes \n",
+ "10101 CAMP ANCHORAGE ARMY AK09799FA25200 Yes \n",
+ "10102 FORT KENAI ARMY POST AK09799FA25300 Yes \n",
+ "10105 CANOL PIPELINE AK09799FA27900 Yes \n",
+ "\n",
+ " STATUS \\\n",
+ "0 Properties with projects \n",
+ "1 Properties with projects \n",
+ "4 Properties with all projects at site closeout \n",
+ "6 Properties with all projects at site closeout \n",
+ "9 Properties with projects \n",
+ "... ... \n",
+ "10080 Properties with projects \n",
+ "10100 Properties with projects \n",
+ "10101 Properties with all projects at site closeout \n",
+ "10102 Properties with all projects at site closeout \n",
+ "10105 Properties with projects \n",
+ "\n",
+ " PROPERTY_HISTORY GEOID10_TRACT \n",
+ "0 The site was initially acquired in 1946 and us... 48361020200 \n",
+ "1 Camp Maxey was activated in July 1942. It was ... 48277000102 \n",
+ "4 HATBOX DOWNTOWN AIRPORT DATES BACK TO 1912 AND... 40101000100 \n",
+ "6 The DoD began use in the early 1940s when the ... 40097040400 \n",
+ "9 In 1942, the DoD acquired 6,235.16 acres for u... 48205950200 \n",
+ "... ... ... \n",
+ "10080 NaN 02016000100 \n",
+ "10100 NaN 02100000100 \n",
+ "10101 NaN 02020001400 \n",
+ "10102 NaN 02122000600 \n",
+ "10105 NaN 02240000400 \n",
+ "\n",
+ "[2866 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "t1 = time.time()\n",
+ "\n",
+ "# Takes ~8 minutes with 2,900 rows.\n",
+ "fuds_source_with_tracts_df = get_census_tracts_for_dataframe_with_lat_long(\n",
+ " coordinates_df=fuds_source_df,\n",
+ " longitude_column=FUDS_LONG_FIELD,\n",
+ " latitude_column=FUDS_LAT_FIELD,\n",
+ " census_tract_gdf=census_tract_gdf,\n",
+ ")\n",
+ "\n",
+ "t2 = time.time()\n",
+ "\n",
+ "print(f\"Code took {str(t2-t1)} seconds.\")\n",
+ "\n",
+ "fuds_source_with_tracts_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "feddd4f4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fuds_source_with_tracts_df.to_csv(\n",
+ " FUDS_OUTPUT_DIR / \"formerly_used_defense_sites.csv\", index=False\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "04c51d58",
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2077"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(fuds_source_with_tracts_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME].unique())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c92d7c00",
+ "metadata": {},
+ "source": [
+ "# Start work on eAMLIS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
"id": "99df4efd",
"metadata": {
"scrolled": true
@@ -182,7 +1116,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_21791/630537932.py:6: DtypeWarning: Columns (14) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+ "2022-07-15 22:26:56,037 [data_pipeline.utils] INFO Downloading https://justice40-data.s3.amazonaws.com/data-sources/eAMLIS export of all data.tsv.zip\n",
+ "2022-07-15 22:27:13,306 [data_pipeline.utils] INFO Extracting /Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/tmp/abandoned_mine_lands/downloaded-60f877ab-7ca0-4e7c-8422-b89d0442a30f.zip\n",
+ "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_30741/2097728537.py:18: DtypeWarning: Columns (14) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" eamlis_source_df = pd.read_csv(\n"
]
},
@@ -395,7 +1331,7 @@
"[5 rows x 41 columns]"
]
},
- "execution_count": 4,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -416,7 +1352,7 @@
" unzipped_file_path=tmp_path,\n",
")\n",
"\n",
- "eamlis_path = tmp_path + \"/eAMLIS export of all data.tsv\"\n",
+ "eamlis_path = tmp_path / \"eAMLIS export of all data.tsv\"\n",
"\n",
"eamlis_source_df = pd.read_csv(\n",
" filepath_or_buffer=eamlis_path,\n",
@@ -428,306 +1364,48 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "3a1fe6e8",
"metadata": {
"scrolled": true
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Index(['AMLIS Key', 'State/Tribe', 'County', 'Congressional District',\n",
- " 'Quadrangle Name', 'Watershed', 'HUC Code', 'FIPS Code', 'Latitude',\n",
- " 'Longitude', 'Funding Source / Program', 'Problem Area Name',\n",
- " 'Problem Area Number', 'Planning Unit Name', 'Planning Unit Number',\n",
- " 'Problem Priority', 'Problem Type', 'Mining Type', 'Ore Types',\n",
- " 'Date Prepared', 'Date Revised', 'Private Owner %', 'State Owner %',\n",
- " 'Other Federal Owner %', 'Park Service Owner %',\n",
- " 'Forest Service Owner %', 'Indian Owner %', 'BLM Owner %',\n",
- " 'Unfunded Standard Units', 'Unfunded Costs', 'Unfunded GPRA Acres',\n",
- " 'Unfunded Metric Units', 'Funded Standard Units', 'Funded Costs',\n",
- " 'Funded GPRA Acres', 'Funded Metric Units', 'Completed Standard Units',\n",
- " 'Completed Costs', 'Completed GPRA Acres', 'Completed Metric Units',\n",
- " 'Unnamed: 40'],\n",
- " dtype='object')\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " AMLIS Key | \n",
- " Latitude | \n",
- " Longitude | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 2 | \n",
- " AK000001 | \n",
- " 61.6 | \n",
- " -149.8 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " AK000003 | \n",
- " 61.6 | \n",
- " -144.0 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " AK000006 | \n",
- " 61.7 | \n",
- " -149.0 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " AK000012 | \n",
- " 61.6 | \n",
- " -148.9 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " AK000015 | \n",
- " 61.7 | \n",
- " -148.2 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " AMLIS Key Latitude Longitude\n",
- "2 AK000001 61.6 -149.8\n",
- "6 AK000003 61.6 -144.0\n",
- "12 AK000006 61.7 -149.0\n",
- "25 AK000012 61.6 -148.9\n",
- "30 AK000015 61.7 -148.2"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"mines_df = eamlis_source_df\n",
"\n",
"print(mines_df.columns)\n",
"\n",
"# TODO: investigate how to combine multiple rows for the same lat/long.\n",
+ "# Probably do something like, groupby([lat, long])[value_of_interest].size().\n",
+ "# TODO: Investigate aggregating over mine severity.\n",
"# This just keeps one of the rows arbitrarily. We might need additional columns of information.\n",
- "mines_unique_df = mines_df.drop_duplicates(subset=[LAT_FIELD, LONG_FIELD], keep=\"last\")\n",
+ "mines_unique_df = mines_df.drop_duplicates(\n",
+ " subset=[EAMLIS_LAT_FIELD, EAMLIS_LONG_FIELD], keep=\"last\"\n",
+ ")\n",
"\n",
"# TODO: investigate whether other columns (such as mine problem severity) are needed.\n",
- "mines_unique_df = mines_unique_df[[KEY_FIELD, LAT_FIELD, LONG_FIELD]]\n",
+ "mines_unique_df = mines_unique_df[\n",
+ " [EAMLIS_KEY_FIELD, EAMLIS_LAT_FIELD, EAMLIS_LONG_FIELD]\n",
+ "]\n",
"\n",
"mines_unique_df.head()"
]
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "aa316a32",
- "metadata": {},
- "outputs": [],
- "source": [
- "# METHOD DEFINITIONS\n",
- "def get_census_tract_for_one_coordinate(\n",
- " geom_point: shapely.geometry.point.Point,\n",
- " census_tract_gdf: geopandas.geodataframe.GeoDataFrame,\n",
- ") -> str:\n",
- " # GEOJSON_TRACT_ID_FIELD\n",
- "\n",
- " # geopandas' contain method works row to row.\n",
- " # So create a duplicate row for the point across the length of the census tract gdf\n",
- " # number_of_census_tracts = len(census_tract_gdf)\n",
- " # point_as_gdf = geopandas.GeoDataFrame([[geom_point] * number_of_census_tracts])\n",
- "\n",
- " # Now run a row-to-row contains\n",
- " # print(point_as_gdf)\n",
- "\n",
- " contains_result = census_tract_gdf.contains(geom_point)\n",
- " count_of_census_tract_matches = len(census_tract_gdf[contains_result])\n",
- "\n",
- " if count_of_census_tract_matches == 0:\n",
- " warnings.warn(\n",
- " f\"Warning: no tract matches for {geom_point}\",\n",
- " DeprecationWarning,\n",
- " stacklevel=2,\n",
- " )\n",
- " census_tract_id = None\n",
- "\n",
- " elif count_of_census_tract_matches > 1:\n",
- " warnings.warn(\n",
- " f\"Warning: too many tract matches for {geom_point}\",\n",
- " DeprecationWarning,\n",
- " stacklevel=2,\n",
- " )\n",
- " census_tract_id = None\n",
- "\n",
- " else:\n",
- " # With only one tract returned, extract the ID.\n",
- " census_tract_id = census_tract_gdf[contains_result][\n",
- " GEOJSON_TRACT_ID_FIELD\n",
- " ].values[0]\n",
- "\n",
- " return census_tract_id\n",
- "\n",
- "\n",
- "def get_census_tracts_for_geom_points(\n",
- " points_gdf: geopandas.geodataframe.GeoDataFrame,\n",
- " census_tract_gdf: geopandas.geodataframe.GeoDataFrame,\n",
- ") -> geopandas.geodataframe.GeoDataFrame:\n",
- " geometry_column_name = \"geometry\"\n",
- " result_gdf = points_gdf.parallel_apply(\n",
- " lambda frame: get_census_tract_for_one_coordinate(\n",
- " geom_point=frame[geometry_column_name], census_tract_gdf=census_tract_gdf\n",
- " ),\n",
- " axis=1,\n",
- " )\n",
- " return result_gdf\n",
- "\n",
- "\n",
- "def get_census_tracts_for_dataframe_with_lat_long(\n",
- " coordinates_df: pd.DataFrame,\n",
- " latitude_column: str = LAT_FIELD,\n",
- " longitude_column: str = LONG_FIELD,\n",
- " census_tract_gdf: geopandas.geodataframe.GeoDataFrame = census_tract_gdf,\n",
- "):\n",
- " # Avoid these side-effects by creating a duplicate.\n",
- " coordinates_df_duplicate = coordinates_df\n",
- "\n",
- " # First, convert the plain DataFrame into a geopandas data frame with lat/long geometry points.\n",
- " coordinates_geopandas_gdf = geopandas.GeoDataFrame(\n",
- " coordinates_df_duplicate,\n",
- " geometry=geopandas.points_from_xy(\n",
- " x=coordinates_df_duplicate[longitude_column],\n",
- " y=coordinates_df_duplicate[latitude_column],\n",
- " ),\n",
- " )\n",
- "\n",
- " # Find the tract IDs for each point.\n",
- " tract_results = get_census_tracts_for_geom_points(\n",
- " points_gdf=coordinates_geopandas_gdf, census_tract_gdf=census_tract_gdf\n",
- " )\n",
- "\n",
- " # Join the tract IDs back on the original dataframe\n",
- " coordinates_with_tracts_df = coordinates_df\n",
- " coordinates_with_tracts_df[\n",
- " ExtractTransformLoad.GEOID_TRACT_FIELD_NAME\n",
- " ] = tract_results\n",
- "\n",
- " # Remove unnecessary `geometry` column\n",
- " # For unclear reasons, the initial `GeoDataFrame` creates a `geometry` column on the input dataframe that we don't want.\n",
- " coordinates_with_tracts_df = coordinates_with_tracts_df.drop(\"geometry\", axis=1)\n",
- "\n",
- " return coordinates_with_tracts_df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "a145f162",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " AMLIS Key Latitude Longitude\n",
- "2 AK000001 61.6 -149.8\n",
- "6 AK000003 61.6 -144.0\n",
- "12 AK000006 61.7 -149.0\n",
- "25 AK000012 61.6 -148.9\n",
- "30 AK000015 61.7 -148.2\n",
- "... ... ... ...\n",
- "57140 WY216747 42.9 -108.1\n",
- "57145 WY242429 41.8 -106.8\n",
- "57146 WY242431 42.5 -108.7\n",
- "57147 WY242441 42.8 -107.4\n",
- "57148 WY242444 42.6 -110.9\n",
- "\n",
- "[3977 rows x 3 columns]\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/lucas/.virtualenvs/scoring2/lib/python3.9/site-packages/geopandas/array.py:275: ShapelyDeprecationWarning: The array interface is deprecated and will no longer work in Shapely 2.0. Convert the '.coords' to a numpy array instead.\n",
- " return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "9a3b14332bc649d3a5f49e11373a9409",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=498), Label(value='0 / 498'))), HB…"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/folders/lx/xmq8p65j71v9xq2bhsd2j5w40000gp/T/ipykernel_21791/3033776402.py:55: DeprecationWarning: Warning: no tract matches for POINT (-130 55.9)\n",
- " lambda frame: get_census_tract_for_one_coordinate(\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Code took 1550.5722029209137 seconds.\n",
- " AMLIS Key Latitude Longitude GEOID10_TRACT\n",
- "2 AK000001 61.6 -149.8 02170000401\n",
- "6 AK000003 61.6 -144.0 02261000100\n",
- "12 AK000006 61.7 -149.0 02170000200\n",
- "25 AK000012 61.6 -148.9 02170001300\n",
- "30 AK000015 61.7 -148.2 02170000200\n",
- "... ... ... ... ...\n",
- "57140 WY216747 42.9 -108.1 56013000300\n",
- "57145 WY242429 41.8 -106.8 56007968100\n",
- "57146 WY242431 42.5 -108.7 56013000300\n",
- "57147 WY242441 42.8 -107.4 56025001800\n",
- "57148 WY242444 42.6 -110.9 56023978100\n",
- "\n",
- "[3977 rows x 4 columns]\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"t1 = time.time()\n",
"\n",
"# Takes ~26 minutes with 4,000 rows.\n",
"mines_unique_with_tracts_df = get_census_tracts_for_dataframe_with_lat_long(\n",
- " coordinates_df=mines_unique_df\n",
+ " coordinates_df=mines_unique_df,\n",
+ " longitude_column=EAMLIS_LONG_FIELD,\n",
+ " latitude_column=EAMLIS_LAT_FIELD,\n",
+ " census_tract_gdf=census_tract_gdf,\n",
")\n",
"\n",
"t2 = time.time()\n",
@@ -739,31 +1417,22 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"id": "52aefca6",
"metadata": {},
"outputs": [],
"source": [
- "mines_unique_with_tracts_df.to_csv(OUTPUT_DIR / \"abandoned_mine_lands.csv\", index=False)"
+ "mines_unique_with_tracts_df.to_csv(\n",
+ " EAMLIS_OUTPUT_DIR / \"abandoned_mine_lands.csv\", index=False\n",
+ ")"
]
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": null,
"id": "d659ce7d",
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "2035"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"len(mines_unique_with_tracts_df[ExtractTransformLoad.GEOID_TRACT_FIELD_NAME].unique())"
]