diff --git a/data/data-pipeline/data_pipeline/data/needs_to_be_moved_to_s3/2014_to_2019_state_median_income.csv b/data/data-pipeline/data_pipeline/data/needs_to_be_moved_to_s3/2014_to_2019_state_median_income.csv new file mode 100644 index 00000000..6f0cea70 --- /dev/null +++ b/data/data-pipeline/data_pipeline/data/needs_to_be_moved_to_s3/2014_to_2019_state_median_income.csv @@ -0,0 +1,53 @@ +GEOID2,Median household income (State) +01,50536 +02,77640 +04,58945 +05,47597 +06,75235 +08,72331 +09,78444 +10,68287 +11,86420 +12,55660 +13,58700 +15,81275 +16,55785 +17,65886 +18,56303 +19,60523 +20,59597 +21,50589 +22,49469 +23,57918 +24,84805 +25,81215 +26,57144 +27,71306 +28,45081 +29,55461 +30,54970 +31,61439 +32,60365 +33,76768 +34,82545 +35,49754 +36,68486 +37,54602 +38,64894 +39,56602 +40,52919 +41,62818 +42,61744 +44,67167 +45,53199 +46,58275 +47,53320 +48,61874 +49,71621 +50,61973 +51,74222 +53,73775 +54,46711 +55,61747 +56,64049 +72,20539 diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index 3bee9cb7..103e4572 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -24,7 +24,20 @@ class CensusACSETL(ExtractTransformLoad): ] self.MEDIAN_INCOME_FIELD = "B19013_001E" self.MEDIAN_INCOME_FIELD_NAME = "Median household income in the past 12 months" + self.MEDIAN_INCOME_STATE_FIELD_NAME = "Median household income (State)" + self.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD_NAME = ( + "Median household income (% of state median household income)" + ) + self.STATE_GEOID_FIELD_NAME = "GEOID2" self.df: pd.DataFrame + self.state_median_income_df: pd.DataFrame + + # TODO: refactor this to put this file on s3 and download it from there + self.STATE_MEDIAN_INCOME_FILE_PATH = ( + self.DATA_PATH + / "needs_to_be_moved_to_s3" + / "2014_to_2019_state_median_income.csv" + ) def _fips_from_censusdata_censusgeo(self, censusgeo: censusdata.censusgeo) -> str: """Create a FIPS code from the proprietary censusgeo index.""" @@ -59,12 +72,36 @@ class CensusACSETL(ExtractTransformLoad): func=self._fips_from_censusdata_censusgeo ) + self.state_median_income_df = pd.read_csv( + # TODO: Replace with reading from S3. + filepath_or_buffer=self.STATE_MEDIAN_INCOME_FILE_PATH, + dtype={self.STATE_GEOID_FIELD_NAME: "string"}, + ) + def transform(self) -> None: logger.info("Starting Census ACS Transform") # Rename median income self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[self.MEDIAN_INCOME_FIELD] + # TODO: handle null values for CBG median income, which are `-666666666`. + + # Join state data on CBG data: + self.df[self.STATE_GEOID_FIELD_NAME] = ( + self.df[self.GEOID_FIELD_NAME].astype(str).str[0:2] + ) + self.df = self.df.merge( + self.state_median_income_df, + how="left", + on=self.STATE_GEOID_FIELD_NAME, + ) + + # Calculate the income of the block group as a fraction of the state income: + self.df[self.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD_NAME] = ( + self.df[self.MEDIAN_INCOME_FIELD_NAME] + / self.df[self.MEDIAN_INCOME_STATE_FIELD_NAME] + ) + # Calculate percent unemployment. # TODO: remove small-sample data that should be `None` instead of a high-variance fraction. self.df[self.UNEMPLOYED_FIELD_NAME] = self.df.B23025_005E / self.df.B23025_003E @@ -98,6 +135,8 @@ class CensusACSETL(ExtractTransformLoad): self.UNEMPLOYED_FIELD_NAME, self.LINGUISTIC_ISOLATION_FIELD_NAME, self.MEDIAN_INCOME_FIELD_NAME, + self.MEDIAN_INCOME_STATE_FIELD_NAME, + self.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD_NAME, ] self.df[columns_to_include].to_csv( diff --git a/data/data-pipeline/data_pipeline/ipython/census_explore.ipynb b/data/data-pipeline/data_pipeline/ipython/census_explore.ipynb new file mode 100644 index 00000000..76d22fd7 --- /dev/null +++ b/data/data-pipeline/data_pipeline/ipython/census_explore.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0491828b", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import censusdata\n", + "import csv\n", + "from pathlib import Path\n", + "import os\n", + "import sys\n", + "\n", + "module_path = os.path.abspath(os.path.join(\"../..\"))\n", + "if module_path not in sys.path:\n", + " sys.path.append(module_path)\n", + "\n", + "from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes\n", + "\n", + "\n", + "ACS_YEAR = 2019\n", + "\n", + "DATA_PATH = Path.cwd().parent / \"data\"\n", + "FIPS_CSV_PATH = DATA_PATH / \"fips_states_2010.csv\"\n", + "\n", + "GEOID_FIELD_NAME = \"GEOID10\"\n", + "UNEMPLOYED_FIELD_NAME = \"Unemployed Civilians (fraction)\"\n", + "\n", + "# Some display settings to make pandas outputs more readable.\n", + "pd.set_option(\"display.expand_frame_repr\", False)\n", + "pd.set_option(\"display.precision\", 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "654f25a1", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Following the tutorial at https://jtleider.github.io/censusdata/example1.html.\n", + "# Full list of fields is at https://www2.census.gov/programs-surveys/acs/summary_file/2019/documentation/user_tools/ACS2019_Table_Shells.xlsx\n", + "censusdata.printtable(censusdata.censustable(src=\"acs5\", year=ACS_YEAR, table=\"B19013\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8999cea4", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "def fips_from_censusdata_censusgeo(censusgeo: censusdata.censusgeo) -> str:\n", + " \"\"\"Create a FIPS code from the proprietary censusgeo index.\"\"\"\n", + " fips = \"\".join([value for (key, value) in censusgeo.params()])\n", + " return fips\n", + "\n", + "\n", + "dfs = []\n", + "for fips in get_state_fips_codes(DATA_PATH):\n", + " print(f\"Fetching data for fips {fips}\")\n", + " dfs.append(\n", + " censusdata.download(\n", + " src=\"acs5\",\n", + " year=ACS_YEAR,\n", + " geo=censusdata.censusgeo(\n", + " [\n", + " (\"state\", fips) \n", + " #, (\"county\", \"*\"), (\"block group\", \"*\")\n", + " ]\n", + " ),\n", + " var=[\"B23025_005E\", \"B23025_003E\", \"B19013_001E\"],\n", + " )\n", + " )\n", + "\n", + "df = pd.concat(dfs)\n", + "\n", + "df[GEOID_FIELD_NAME] = df.index.to_series().apply(func=fips_from_censusdata_censusgeo)\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a269bb1", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "columns_to_include = [\"GEOID2\", \"Median household income (State)\"]\n", + "\n", + "df.rename(columns={\"GEOID10\": \"GEOID2\", \"B19013_001E\": \"Median household income (State)\"}, inplace=True)\n", + "\n", + "df[columns_to_include].to_csv(path_or_buf= \"/Users/lucas/Documents/usds/repos/justice40-tool/data/data-pipeline/data_pipeline/data/needs_to_be_moved_to_s3/2014_to_2019_state_median_income.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91932af5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index 0e7bbdd1..7eb50ce0 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -3,6 +3,11 @@ { "cell_type": "code", "execution_count": null, + "id": "51412a14", + "metadata": { + "scrolled": true + }, + "outputs": [], "source": [ "import collections\n", "import functools\n", @@ -33,15 +38,16 @@ "\n", "# Turn on TQDM for pandas so that we can have progress bars when running `apply`.\n", "tqdm_notebook.pandas()" - ], - "outputs": [], - "metadata": { - "scrolled": true - } + ] }, { "cell_type": "code", "execution_count": null, + "id": "e3234c61", + "metadata": { + "scrolled": true + }, + "outputs": [], "source": [ "# Suppress scientific notation in pandas (this shows up for census tract IDs)\n", "pd.options.display.float_format = \"{:.2f}\".format\n", @@ -72,97 +78,26 @@ "\n", "# Define some suffixes\n", "POPULATION_SUFFIX = \" (priority population)\"" - ], - "outputs": [], - "metadata": { - "scrolled": true - } + ] }, { "cell_type": "code", "execution_count": null, - "source": [ - "# Load CEJST score data\n", - "cejst_data_path = DATA_DIR / \"score\" / \"csv\" / \"full\" / \"usa.csv\"\n", - "cejst_df = pd.read_csv(cejst_data_path, dtype={GEOID_FIELD_NAME: \"string\"})\n", - "\n", - "# Create the CBG's Census Tract ID by dropping the last number from the FIPS CODE of the CBG.\n", - "# The CBG ID is the last one character.\n", - "# For more information, see https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html.\n", - "cejst_df.loc[:, GEOID_TRACT_FIELD_NAME] = (\n", - " cejst_df.loc[:, GEOID_FIELD_NAME].astype(str).str[:-1]\n", - ")\n", - "\n", - "cejst_df.loc[:, GEOID_STATE_FIELD_NAME] = (\n", - " cejst_df.loc[:, GEOID_FIELD_NAME].astype(str).str[0:2]\n", - ")\n", - "\n", - "cejst_df.head()" - ], + "id": "3b1b5ccf", + "metadata": { + "scrolled": false + }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "/usr/local/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3169: DtypeWarning: Columns (87,88,90) have mixed types.Specify dtype option on import or set low_memory=False.\n", " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n" ] }, { - "output_type": "execute_result", "data": { - "text/plain": [ - " GEOID10 Housing burden (percent) Total population \\\n", - "0 010010201001 0.15 692 \n", - "1 010010201002 0.15 1153 \n", - "2 010010202001 0.25 1020 \n", - "3 010010202002 0.25 1152 \n", - "4 010010203001 0.21 2555 \n", - "\n", - " Air toxics cancer risk Respiratory hazard index \\\n", - "0 49.38 0.79 \n", - "1 49.38 0.79 \n", - "2 50.32 0.81 \n", - "3 50.32 0.81 \n", - "4 50.77 0.82 \n", - "\n", - " Diesel particulate matter Particulate matter (PM2.5) Ozone \\\n", - "0 0.28 10.00 40.12 \n", - "1 0.28 10.00 40.12 \n", - "2 0.30 10.07 40.22 \n", - "3 0.30 10.07 40.22 \n", - "4 0.36 10.12 40.31 \n", - "\n", - " Traffic proximity and volume Proximity to RMP sites ... \\\n", - "0 91.02 0.09 ... \n", - "1 2.62 0.07 ... \n", - "2 4.68 0.08 ... \n", - "3 218.65 0.09 ... \n", - "4 69.64 0.08 ... \n", - "\n", - " Score D (top 25th percentile) Score E (percentile) \\\n", - "0 False 0.35 \n", - "1 False 0.11 \n", - "2 False 0.51 \n", - "3 False 0.59 \n", - "4 False 0.47 \n", - "\n", - " Score E (top 25th percentile) GEOID State Abbreviation County Name \\\n", - "0 False 1001 AL Autauga County \n", - "1 False 1001 AL Baldwin County \n", - "2 False 1001 AL Barbour County \n", - "3 False 1001 AL Bibb County \n", - "4 False 1001 AL Blount County \n", - "\n", - " State Code State Name GEOID10_TRACT GEOID10_STATE \n", - "0 1.00 Alabama 01001020100 01 \n", - "1 2.00 Alaska 01001020100 01 \n", - "2 4.00 Arizona 01001020200 01 \n", - "3 5.00 Arkansas 01001020200 01 \n", - "4 6.00 California 01001020300 01 \n", - "\n", - "[5 rows x 93 columns]" - ], "text/html": [ "
\n", "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GEOID10_STATEState nameTotal CBGs in stateTotal population in stateScore A (top 25th percentile) (priority population)Score A (top 25th percentile) (total CBGs)Score A (top 25th percentile) (percent CBGs)Score A (top 25th percentile) (percent population)Score B (top 25th percentile) (priority population)Score B (top 25th percentile) (total CBGs)...Score E (top 25th percentile) (percent CBGs)Score E (top 25th percentile) (percent population)calenviroscreen_priority_community (priority population)calenviroscreen_priority_community (total CBGs)calenviroscreen_priority_community (percent CBGs)calenviroscreen_priority_community (percent population)hud_recap_priority_community (priority population)hud_recap_priority_community (total CBGs)hud_recap_priority_community (percent CBGs)hud_recap_priority_community (percent population)
GEOID10_STATE
01001Alabama34384850771154734513260.390.3215564171323...0.230.19000.000.002351172580.080.05
02002Alaska53473856563868570.110.096386857...0.140.12000.000.00653680.010.01
04004Arizona41786809946195605212300.290.2919608561231...0.300.30000.000.005603533780.090.08
05005Arkansas214729779449607998170.380.32975780826...0.200.18000.000.001012001060.050.03
06006California23212389828471261081071020.310.32125568467065...0.400.42961028756900.250.25174876510130.040.04
\n", + "

5 rows × 32 columns

\n", + "
" + ], + "text/plain": [ + " GEOID10_STATE State name Total CBGs in state \\\n", + "GEOID10_STATE \n", + "01 0 01 Alabama 3438 \n", + "02 0 02 Alaska 534 \n", + "04 0 04 Arizona 4178 \n", + "05 0 05 Arkansas 2147 \n", + "06 0 06 California 23212 \n", + "\n", + " Total population in state \\\n", + "GEOID10_STATE \n", + "01 0 4850771 \n", + "02 0 738565 \n", + "04 0 6809946 \n", + "05 0 2977944 \n", + "06 0 38982847 \n", + "\n", + " Score A (top 25th percentile) (priority population) \\\n", + "GEOID10_STATE \n", + "01 0 1547345 \n", + "02 0 63868 \n", + "04 0 1956052 \n", + "05 0 960799 \n", + "06 0 12610810 \n", + "\n", + " Score A (top 25th percentile) (total CBGs) \\\n", + "GEOID10_STATE \n", + "01 0 1326 \n", + "02 0 57 \n", + "04 0 1230 \n", + "05 0 817 \n", + "06 0 7102 \n", + "\n", + " Score A (top 25th percentile) (percent CBGs) \\\n", + "GEOID10_STATE \n", + "01 0 0.39 \n", + "02 0 0.11 \n", + "04 0 0.29 \n", + "05 0 0.38 \n", + "06 0 0.31 \n", + "\n", + " Score A (top 25th percentile) (percent population) \\\n", + "GEOID10_STATE \n", + "01 0 0.32 \n", + "02 0 0.09 \n", + "04 0 0.29 \n", + "05 0 0.32 \n", + "06 0 0.32 \n", + "\n", + " Score B (top 25th percentile) (priority population) \\\n", + "GEOID10_STATE \n", + "01 0 1556417 \n", + "02 0 63868 \n", + "04 0 1960856 \n", + "05 0 975780 \n", + "06 0 12556846 \n", + "\n", + " Score B (top 25th percentile) (total CBGs) ... \\\n", + "GEOID10_STATE ... \n", + "01 0 1323 ... \n", + "02 0 57 ... \n", + "04 0 1231 ... \n", + "05 0 826 ... \n", + "06 0 7065 ... \n", + "\n", + " Score E (top 25th percentile) (percent CBGs) \\\n", + "GEOID10_STATE \n", + "01 0 0.23 \n", + "02 0 0.14 \n", + "04 0 0.30 \n", + "05 0 0.20 \n", + "06 0 0.40 \n", + "\n", + " Score E (top 25th percentile) (percent population) \\\n", + "GEOID10_STATE \n", + "01 0 0.19 \n", + "02 0 0.12 \n", + "04 0 0.30 \n", + "05 0 0.18 \n", + "06 0 0.42 \n", + "\n", + " calenviroscreen_priority_community (priority population) \\\n", + "GEOID10_STATE \n", + "01 0 0 \n", + "02 0 0 \n", + "04 0 0 \n", + "05 0 0 \n", + "06 0 9610287 \n", + "\n", + " calenviroscreen_priority_community (total CBGs) \\\n", + "GEOID10_STATE \n", + "01 0 0 \n", + "02 0 0 \n", + "04 0 0 \n", + "05 0 0 \n", + "06 0 5690 \n", + "\n", + " calenviroscreen_priority_community (percent CBGs) \\\n", + "GEOID10_STATE \n", + "01 0 0.00 \n", + "02 0 0.00 \n", + "04 0 0.00 \n", + "05 0 0.00 \n", + "06 0 0.25 \n", + "\n", + " calenviroscreen_priority_community (percent population) \\\n", + "GEOID10_STATE \n", + "01 0 0.00 \n", + "02 0 0.00 \n", + "04 0 0.00 \n", + "05 0 0.00 \n", + "06 0 0.25 \n", + "\n", + " hud_recap_priority_community (priority population) \\\n", + "GEOID10_STATE \n", + "01 0 235117 \n", + "02 0 6536 \n", + "04 0 560353 \n", + "05 0 101200 \n", + "06 0 1748765 \n", + "\n", + " hud_recap_priority_community (total CBGs) \\\n", + "GEOID10_STATE \n", + "01 0 258 \n", + "02 0 8 \n", + "04 0 378 \n", + "05 0 106 \n", + "06 0 1013 \n", + "\n", + " hud_recap_priority_community (percent CBGs) \\\n", + "GEOID10_STATE \n", + "01 0 0.08 \n", + "02 0 0.01 \n", + "04 0 0.09 \n", + "05 0 0.05 \n", + "06 0 0.04 \n", + "\n", + " hud_recap_priority_community (percent population) \n", + "GEOID10_STATE \n", + "01 0 0.05 \n", + "02 0 0.01 \n", + "04 0 0.08 \n", + "05 0 0.03 \n", + "06 0 0.04 \n", + "\n", + "[5 rows x 32 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def get_state_distributions(\n", " df: pd.DataFrame, priority_communities_fields: typing.List[str]\n", @@ -1576,391 +1965,14 @@ ")\n", "\n", "state_distribution_df.head()" - ], - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Converting calenviroscreen_priority_community to boolean.\n", - "Converting hud_recap_priority_community to boolean.\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - " 0%| | 0/52 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
GEOID10_STATEState nameTotal CBGs in stateTotal population in stateScore A (top 25th percentile) (priority population)Score A (top 25th percentile) (total CBGs)Score A (top 25th percentile) (percent CBGs)Score A (top 25th percentile) (percent population)Score B (top 25th percentile) (priority population)Score B (top 25th percentile) (total CBGs)...Score E (top 25th percentile) (percent CBGs)Score E (top 25th percentile) (percent population)calenviroscreen_priority_community (priority population)calenviroscreen_priority_community (total CBGs)calenviroscreen_priority_community (percent CBGs)calenviroscreen_priority_community (percent population)hud_recap_priority_community (priority population)hud_recap_priority_community (total CBGs)hud_recap_priority_community (percent CBGs)hud_recap_priority_community (percent population)
GEOID10_STATE
01001Alabama34384850771154734513260.390.3215564171323...0.230.19000.000.002351172580.080.05
02002Alaska53473856563868570.110.096386857...0.140.12000.000.00653680.010.01
04004Arizona41786809946195605212300.290.2919608561231...0.300.30000.000.005603533780.090.08
05005Arkansas214729779449607998170.380.32975780826...0.200.18000.000.001012001060.050.03
06006California23212389828471261081071020.310.32125568467065...0.400.42961028756900.250.25174876510130.040.04
\n", - "

5 rows × 32 columns

\n", - "" - ] - }, - "metadata": {}, - "execution_count": 10 - } - ], - "metadata": { - "scrolled": true - } + ] }, { "cell_type": "code", "execution_count": null, + "id": "d7acf80d", + "metadata": {}, + "outputs": [], "source": [ "def write_markdown_and_docx_content(\n", " markdown_content: str, file_dir: pathlib.PosixPath, file_name_without_extension: str\n", @@ -2101,13 +2113,14 @@ " census_block_group_indices=census_block_group_indices,\n", " df=merged_with_state_information_df,\n", ")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "id": "777a4623", + "metadata": {}, + "outputs": [], "source": [ "# This cell defines a variety of comparison functions. It does not run them.\n", "\n", @@ -2543,13 +2556,59 @@ " comparison_docx_file_paths.append(comparison_docx_file_path)\n", "\n", " return comparison_docx_file_paths" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "id": "908e0ad4", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running comparisons for Score A against CalEnviroScreen 4.0...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "531ec4deb2f54c26ad0f5311fdea0e60", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/8057 [00:00