mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-27 22:01:16 -07:00
wip
This commit is contained in:
parent
cf13036d20
commit
ebe6180f7c
3 changed files with 180 additions and 4 deletions
135
data/data-pipeline/data_pipeline/ipython/score_explore.ipynb
Normal file
135
data/data-pipeline/data_pipeline/ipython/score_explore.ipynb
Normal file
|
@ -0,0 +1,135 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1b0b3db8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import collections\n",
|
||||
"import functools\n",
|
||||
"import IPython\n",
|
||||
"import itertools\n",
|
||||
"import matplotlib\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"import pathlib\n",
|
||||
"import pypandoc\n",
|
||||
"import requests\n",
|
||||
"import string\n",
|
||||
"import sys\n",
|
||||
"import typing\n",
|
||||
"import us\n",
|
||||
"import zipfile\n",
|
||||
"\n",
|
||||
"from datetime import datetime\n",
|
||||
"from tqdm.notebook import tqdm_notebook\n",
|
||||
"\n",
|
||||
"module_path = os.path.abspath(os.path.join(\"../..\"))\n",
|
||||
"if module_path not in sys.path:\n",
|
||||
" sys.path.append(module_path)\n",
|
||||
"\n",
|
||||
"from data_pipeline.utils import remove_all_from_dir, get_excel_column_name\n",
|
||||
"from data_pipeline.etl.sources.census.etl_utils import get_state_information\n",
|
||||
"\n",
|
||||
"# Turn on TQDM for pandas so that we can have progress bars when running `apply`.\n",
|
||||
"tqdm_notebook.pandas()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "be9bff9f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Suppress scientific notation in pandas (this shows up for census tract IDs)\n",
|
||||
"pd.options.display.float_format = \"{:.2f}\".format\n",
|
||||
"\n",
|
||||
"# Set some global parameters\n",
|
||||
"DATA_DIR = pathlib.Path.cwd().parent / \"data\"\n",
|
||||
"\n",
|
||||
"GEOID_FIELD_NAME = \"GEOID10\"\n",
|
||||
"GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n",
|
||||
"GEOID_STATE_FIELD_NAME = \"GEOID10_STATE\"\n",
|
||||
"COUNTRY_FIELD_NAME = \"Country\"\n",
|
||||
"CENSUS_BLOCK_GROUP_POPULATION_FIELD = \"Total population\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "94407baa",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load CEJST score data\n",
|
||||
"cejst_data_path = DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa.csv\"\n",
|
||||
"cejst_df = pd.read_csv(cejst_data_path, dtype={GEOID_FIELD_NAME: \"string\"})\n",
|
||||
"\n",
|
||||
"cejst_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "29babd55",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"columns_to_plot = [\n",
|
||||
" \"Respiratory hazard index\",\n",
|
||||
" \"Particulate matter (PM2.5)\",\n",
|
||||
" \"Poverty (Less than 200% of federal poverty line)\",\n",
|
||||
" \"Percent individuals age 25 or over with less than high school degree\",\n",
|
||||
" \"Unemployed civilians (percent)\",\n",
|
||||
" \"Linguistic isolation (percent)\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"column_to_plot = columns_to_plot[0]\n",
|
||||
"print(f\"Plotting {column_to_plot}\")\n",
|
||||
"print(cejst_df[\n",
|
||||
" column_to_plot\n",
|
||||
"].hist())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3e5c8dcf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i in cejst_df.columns:\n",
|
||||
" print(i)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue