mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 01:31:25 -08:00
wip on lead score
This commit is contained in:
parent
678ab8c081
commit
4ed43226cf
3 changed files with 189 additions and 2 deletions
|
@ -2,7 +2,7 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 112,
|
||||
"id": "0491828b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -12,7 +12,7 @@
|
|||
"import csv\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"ACS_YEAR = 2019\n",
|
||||
"ACS_YEAR = 2016\n",
|
||||
"\n",
|
||||
"DATA_PATH = Path.cwd().parent / \"data\"\n",
|
||||
"FIPS_CSV_PATH = DATA_PATH / \"fips_states_2010.csv\"\n",
|
||||
|
@ -26,6 +26,49 @@
|
|||
"pd.set_option(\"display.precision\", 2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 116,
|
||||
"id": "6ba65bff",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('B25040_005E',\n",
|
||||
" 'HOUSE HEATING FUEL',\n",
|
||||
" 'Estimate!!Total:!!Fuel oil, kerosene, etc.'),\n",
|
||||
" ('B25040_009E', 'HOUSE HEATING FUEL', 'Estimate!!Total:!!Other fuel'),\n",
|
||||
" ('B25040_010E', 'HOUSE HEATING FUEL', 'Estimate!!Total:!!No fuel used'),\n",
|
||||
" ('B25117_006E',\n",
|
||||
" 'TENURE BY HOUSE HEATING FUEL',\n",
|
||||
" 'Estimate!!Total:!!Owner occupied:!!Fuel oil, kerosene, etc.'),\n",
|
||||
" ('B25117_010E',\n",
|
||||
" 'TENURE BY HOUSE HEATING FUEL',\n",
|
||||
" 'Estimate!!Total:!!Owner occupied:!!Other fuel'),\n",
|
||||
" ('B25117_011E',\n",
|
||||
" 'TENURE BY HOUSE HEATING FUEL',\n",
|
||||
" 'Estimate!!Total:!!Owner occupied:!!No fuel used'),\n",
|
||||
" ('B25117_016E',\n",
|
||||
" 'TENURE BY HOUSE HEATING FUEL',\n",
|
||||
" 'Estimate!!Total:!!Renter occupied:!!Fuel oil, kerosene, etc.'),\n",
|
||||
" ('B25117_020E',\n",
|
||||
" 'TENURE BY HOUSE HEATING FUEL',\n",
|
||||
" 'Estimate!!Total:!!Renter occupied:!!Other fuel'),\n",
|
||||
" ('B25117_021E',\n",
|
||||
" 'TENURE BY HOUSE HEATING FUEL',\n",
|
||||
" 'Estimate!!Total:!!Renter occupied:!!No fuel used')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 116,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"censusdata.search('acs5', 2019, 'label', 'fuel')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
|
143
score/ipython/lead_score_etl.ipynb
Normal file
143
score/ipython/lead_score_etl.ipynb
Normal file
|
@ -0,0 +1,143 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "c21b63a3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import censusdata\n",
|
||||
"import csv\n",
|
||||
"import requests\n",
|
||||
"import us\n",
|
||||
"import zipfile\n",
|
||||
"\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"DATA_PATH = Path.cwd().parent / \"data\"\n",
|
||||
"FIPS_CSV_PATH = DATA_PATH / \"fips_states_2010.csv\"\n",
|
||||
"OUTPUT_PATH = DATA_PATH / \"dataset\" / \"lead_score\"\n",
|
||||
"\n",
|
||||
"GEOID_FIELD_NAME = \"GEOID10\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "6696bc66",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Downloading data for state/territory with abbreviation AL\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/Users/lucas/.virtualenvs/scoring/lib/python3.7/site-packages/urllib3/connectionpool.py:1020: InsecureRequestWarning: Unverified HTTPS request is being made to host 'data.openei.org'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n",
|
||||
" InsecureRequestWarning,\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "NameError",
|
||||
"evalue": "name 'fips' is not defined",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m<ipython-input-11-8867059d74b2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[0;31m# Make the directory if it doesn't exist\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0mzip_file_dir\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmkdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexist_ok\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 19\u001b[0;31m \u001b[0mzip_file_path\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mzip_file_dir\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;34mf\"{fips}-downloaded.zip\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 20\u001b[0m \u001b[0mzip_file\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzip_file_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"wb\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[0mzip_file\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_contents\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mNameError\u001b[0m: name 'fips' is not defined"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# See data at https://data.openei.org/submissions/573\n",
|
||||
"\n",
|
||||
"state_abbreviations = [state.abbr for state in us.states.STATES_AND_TERRITORIES]\n",
|
||||
"\n",
|
||||
"# Download each state / territory individually\n",
|
||||
"dfs = []\n",
|
||||
"for state_abbreviation in state_abbreviations:\n",
|
||||
" print(f\"Downloading data for state/territory with abbreviation {state_abbreviation}\")\n",
|
||||
"\n",
|
||||
" download = requests.get(\n",
|
||||
" f\"https://data.openei.org/files/573/{state_abbreviation}-2018-LEAD-data.zip\",\n",
|
||||
" verify=False,\n",
|
||||
" )\n",
|
||||
" file_contents = download.content\n",
|
||||
" zip_file_dir = DATA_PATH / \"tmp\" / \"lead_score\"\n",
|
||||
"\n",
|
||||
" # Make the directory if it doesn't exist\n",
|
||||
" zip_file_dir.mkdir(parents=True, exist_ok=True)\n",
|
||||
" zip_file_path = zip_file_dir / f\"{state_abbreviation}-downloaded.zip\"\n",
|
||||
" zip_file = open(zip_file_name, \"wb\")\n",
|
||||
" zip_file.write(file_contents)\n",
|
||||
" zip_file.close()\n",
|
||||
"\n",
|
||||
" with zipfile.ZipFile(zip_file_name, \"r\") as zip_ref:\n",
|
||||
" zip_ref.extractall(zip_file_dir)\n",
|
||||
"\n",
|
||||
" # New file name:\n",
|
||||
" tmp_csv_file_path = zip_file_dir / f\"htaindex_data_blkgrps_{fips}.csv\"\n",
|
||||
" tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)\n",
|
||||
"\n",
|
||||
" dfs.append(tmp_df)\n",
|
||||
"\n",
|
||||
"df = pd.concat(dfs)\n",
|
||||
"\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "244e0d03",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Rename and reformat block group ID\n",
|
||||
"df.rename(columns={\"blkgrp\": GEOID_FIELD_NAME}, inplace=True)\n",
|
||||
"df[GEOID_FIELD_NAME] = df[GEOID_FIELD_NAME].str.replace('\"', \"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8275c1ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"OUTPUT_PATH.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
"df.to_csv(path_or_buf=OUTPUT_PATH / \"usa.csv\", index=False)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -5,3 +5,4 @@ jupyter_contrib_nbextensions
|
|||
numpy
|
||||
pandas
|
||||
requests
|
||||
us
|
||||
|
|
Loading…
Add table
Reference in a new issue