mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
* starting PR * completed feature * checkpoint * adding new fips and updating counties to 2010 * updated sources to 2010 - 2019 * more cleanup * creating tiles score csv
161 lines
4.1 KiB
Text
161 lines
4.1 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7185e18d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import csv\n",
|
|
"from pathlib import Path\n",
|
|
"import os\n",
|
|
"import sys"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "174bbd09",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"module_path = os.path.abspath(os.path.join(\"..\"))\n",
|
|
"if module_path not in sys.path:\n",
|
|
" sys.path.append(module_path)\n",
|
|
" \n",
|
|
"from utils import unzip_file_from_url\n",
|
|
"from etl.sources.census.etl_utils import get_state_fips_codes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dd090fcc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"DATA_PATH = Path.cwd().parent / \"data\"\n",
|
|
"TMP_PATH: Path = DATA_PATH / \"tmp\"\n",
|
|
"STATE_CSV = DATA_PATH / \"census\" / \"csv\" / \"fips_states_2010.csv\"\n",
|
|
"SCORE_CSV = DATA_PATH / \"score\" / \"csv\" / \"usa.csv\"\n",
|
|
"COUNTY_SCORE_CSV = DATA_PATH / \"score\" / \"csv\" / \"usa-county.csv\"\n",
|
|
"CENSUS_COUNTIES_ZIP_URL = \"https://www2.census.gov/geo/docs/maps-data/data/gazetteer/2020_Gazetteer/2020_Gaz_counties_national.zip\"\n",
|
|
"CENSUS_COUNTIES_TXT = TMP_PATH / \"2020_Gaz_counties_national.txt\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "cf2e266b",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"unzip_file_from_url(CENSUS_COUNTIES_ZIP_URL, TMP_PATH, TMP_PATH)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "9ff96da8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"counties_df = pd.read_csv(CENSUS_COUNTIES_TXT, sep=\"\\t\", dtype={\"GEOID\": \"string\", \"USPS\": \"string\"}, low_memory=False)\n",
|
|
"counties_df = counties_df[['USPS', 'GEOID', 'NAME']]\n",
|
|
"counties_df.rename(columns={\"USPS\": \"State Abbreviation\", \"NAME\": \"County Name\"}, inplace=True)\n",
|
|
"counties_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5af103da",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"states_df = pd.read_csv(STATE_CSV, dtype={\"fips\": \"string\", \"state_abbreviation\": \"string\"})\n",
|
|
"states_df.rename(columns={\"fips\": \"State Code\", \"state_name\": \"State Name\", \"state_abbreviation\": \"State Abbreviation\"}, inplace=True)\n",
|
|
"states_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c8680258",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"county_state_merged = counties_df.join(states_df, rsuffix=' Other')\n",
|
|
"del county_state_merged[\"State Abbreviation Other\"]\n",
|
|
"county_state_merged.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "58dca55a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"score_df = pd.read_csv(SCORE_CSV, dtype={\"GEOID10\": \"string\"})\n",
|
|
"score_df[\"GEOID\"] = score_df.GEOID10.str[:5]\n",
|
|
"score_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "45e04d42",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"score_county_state_merged = score_df.join(county_state_merged, rsuffix='_OTHER')\n",
|
|
"del score_county_state_merged[\"GEOID_OTHER\"]\n",
|
|
"score_county_state_merged.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a5a0b32b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"score_county_state_merged.to_csv(COUNTY_SCORE_CSV, index=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b690937e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|