{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "cf8f39b0-7735-4f7c-9178-61bbf2257951", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "%load_ext lab_black" ] }, { "cell_type": "code", "execution_count": 2, "id": "66639c20-be5e-4bf6-9b58-98338874f7cc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Median value ($) of owner-occupied housing units (percentile)'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "check = pd.read_csv(\n", " \"/Users/emmausds/j40/data_pipeline/data/score/downloadable/codebook.csv\"\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "id": "5e525e4e-6764-4d4d-9119-b4d400ba022f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
score_namecsv_field_typecsv_labelexcel_labelcalculation_notesthreshold_categorynotes
0GEOID10_TRACTstringCensus tract IDCensus tract IDNaNNaNNaN
1County NamestringCounty NameCounty NameNaNNaNNaN
2State/TerritorystringState/TerritoryState/TerritoryNaNNaNNaN
3Total threshold criteria exceededint64Total threshold criteria exceededTotal threshold criteria exceededNaNNaNLists out the total number of criteria (where ...
4Definition M (communities)boolIdentified as disadvantagedIdentified as disadvantagedNaNNaNTrue / False variable for whether a tract is a...
........................
77Percentage households below 100% of federal po...percentagePercentage households below 100% of federal po...Percentage households below 100% of federal po...Because not all data is available for the Nati...NaNNaN
78Greater than or equal to the 90th percentile f...boolGreater than or equal to the 90th percentile f...Greater than or equal to the 90th percentile f...Because not all data is available for the Nati...training and workforce developmentNaN
79Greater than or equal to the 90th percentile f...boolGreater than or equal to the 90th percentile f...Greater than or equal to the 90th percentile f...Because not all data is available for the Nati...training and workforce developmentNaN
80Greater than or equal to the 90th percentile f...boolGreater than or equal to the 90th percentile f...Greater than or equal to the 90th percentile f...Because not all data is available for the Nati...training and workforce developmentNaN
81Percent of population not currently enrolled i...percentagePercent of residents who are not currently enr...Percent of residents who are not currently enr...NaNNaNNaN
\n", "

82 rows × 7 columns

\n", "
" ], "text/plain": [ " score_name csv_field_type \\\n", "0 GEOID10_TRACT string \n", "1 County Name string \n", "2 State/Territory string \n", "3 Total threshold criteria exceeded int64 \n", "4 Definition M (communities) bool \n", ".. ... ... \n", "77 Percentage households below 100% of federal po... percentage \n", "78 Greater than or equal to the 90th percentile f... bool \n", "79 Greater than or equal to the 90th percentile f... bool \n", "80 Greater than or equal to the 90th percentile f... bool \n", "81 Percent of population not currently enrolled i... percentage \n", "\n", " csv_label \\\n", "0 Census tract ID \n", "1 County Name \n", "2 State/Territory \n", "3 Total threshold criteria exceeded \n", "4 Identified as disadvantaged \n", ".. ... \n", "77 Percentage households below 100% of federal po... \n", "78 Greater than or equal to the 90th percentile f... \n", "79 Greater than or equal to the 90th percentile f... \n", "80 Greater than or equal to the 90th percentile f... \n", "81 Percent of residents who are not currently enr... \n", "\n", " excel_label \\\n", "0 Census tract ID \n", "1 County Name \n", "2 State/Territory \n", "3 Total threshold criteria exceeded \n", "4 Identified as disadvantaged \n", ".. ... \n", "77 Percentage households below 100% of federal po... \n", "78 Greater than or equal to the 90th percentile f... \n", "79 Greater than or equal to the 90th percentile f... \n", "80 Greater than or equal to the 90th percentile f... \n", "81 Percent of residents who are not currently enr... \n", "\n", " calculation_notes \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", ".. ... \n", "77 Because not all data is available for the Nati... \n", "78 Because not all data is available for the Nati... \n", "79 Because not all data is available for the Nati... \n", "80 Because not all data is available for the Nati... \n", "81 NaN \n", "\n", " threshold_category \\\n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", ".. ... \n", "77 NaN \n", "78 training and workforce development \n", "79 training and workforce development \n", "80 training and workforce development \n", "81 NaN \n", "\n", " notes \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 Lists out the total number of criteria (where ... \n", "4 True / False variable for whether a tract is a... \n", ".. ... \n", "77 NaN \n", "78 NaN \n", "79 NaN \n", "80 NaN \n", "81 NaN \n", "\n", "[82 rows x 7 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "check" ] }, { "cell_type": "code", "execution_count": 28, "id": "d86c867a-1a55-4ec0-82a6-040841406236", "metadata": {}, "outputs": [], "source": [ "codebook = pd.DataFrame(to_frame_dict)" ] }, { "cell_type": "code", "execution_count": 62, "id": "6215deaf-b004-4da0-a70b-bc54f636601a", "metadata": {}, "outputs": [], "source": [ "details_to_merge = pd.DataFrame(mapping_dictionary)" ] }, { "cell_type": "code", "execution_count": 69, "id": "ac4e65c2-09e6-4978-9440-37b3be057f65", "metadata": {}, "outputs": [], "source": [ "shapefile_codes = pd.read_csv(\n", " \"/Users/emmausds/j40/data_pipeline/data/score/shapefile/columns.csv\"\n", ")" ] }, { "cell_type": "code", "execution_count": 153, "id": "31cfd9ec-5f5f-4642-a51f-6875c2c279a4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)',\n", " 'Expected building loss rate (Natural Hazards Risk Index) (percentile)',\n", " 'Expected population loss rate (Natural Hazards Risk Index) (percentile)',\n", " 'Energy burden (percentile)',\n", " 'PM2.5 in the air (percentile)',\n", " 'Diesel particulate matter exposure (percentile)',\n", " 'Traffic proximity and volume (percentile)',\n", " 'Housing burden (percent) (percentile)',\n", " 'Percent pre-1960s housing (lead paint indicator) (percentile)',\n", " 'Median value ($) of owner-occupied housing units (percentile)',\n", " 'Proximity to hazardous waste sites (percentile)',\n", " 'Proximity to NPL sites (percentile)',\n", " 'Proximity to Risk Management Plan (RMP) facilities (percentile)',\n", " 'Wastewater discharge (percentile)',\n", " 'Current asthma among adults aged greater than or equal to 18 years (percentile)',\n", " 'Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)',\n", " 'Coronary heart disease among adults aged greater than or equal to 18 years (percentile)',\n", " 'Low life expectancy (percentile)',\n", " 'Low median household income as a percent of area median income (percentile)',\n", " 'Linguistic isolation (percent) (percentile)',\n", " 'Unemployment (percent) (percentile)',\n", " 'Percent of individuals below 200% Federal Poverty Line (percentile)',\n", " 'Percent of individuals < 100% Federal Poverty Line (percentile)',\n", " 'Percent individuals age 25 or over with less than high school degree (percentile)',\n", " 'Definition M (percentile)',\n", " 'Low median household income as a percent of territory median income in 2009 (percentile)',\n", " 'Percentage households below 100% of federal poverty line in 2009 for island areas (percentile)',\n", " 'Unemployment (percent) in 2009 for island areas (percentile)']" ] }, "execution_count": 153, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 154, "id": "66dde4fc-48e6-4bdf-b3a6-16c766e94d8a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " - column_name: Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Expected building loss rate (Natural Hazards Risk Index) (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Expected population loss rate (Natural Hazards Risk Index) (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Energy burden (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: PM2.5 in the air (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Diesel particulate matter exposure (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Traffic proximity and volume (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Housing burden (percent) (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Percent pre-1960s housing (lead paint indicator) (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Median value ($) of owner-occupied housing units (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Proximity to hazardous waste sites (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Proximity to NPL sites (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Proximity to Risk Management Plan (RMP) facilities (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Wastewater discharge (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Current asthma among adults aged greater than or equal to 18 years (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Coronary heart disease among adults aged greater than or equal to 18 years (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Low life expectancy (percentile)\n", " notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Low median household income as a percent of area median income (percentile)\n", " notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Linguistic isolation (percent) (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Unemployment (percent) (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Percent of individuals below 200% Federal Poverty Line (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Percent of individuals < 100% Federal Poverty Line (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Percent individuals age 25 or over with less than high school degree (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Definition M (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Low median household income as a percent of territory median income in 2009 (percentile)\n", " notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Percentage households below 100% of federal poverty line in 2009 for island areas (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n", " - column_name: Unemployment (percent) in 2009 for island areas (percentile)\n", " notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\n" ] } ], "source": [ "for col in [col for col in download_codebook.index.to_list() if \"(percentile)\" in col]:\n", " print(f\" - column_name: {col}\")\n", " if \"Low\" not in col:\n", " print(\n", " f\" notes: all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\"\n", " )\n", " else:\n", " print(\n", " f\" notes: (1) this percentile is reversed, meaning the lowest raw numbers become the highest percentiles, and (2) all percentiles are floored (rounded down to the nearest percentile). For example, 89.7th percentile is rounded down to 89 for this field.\"\n", " )" ] }, { "cell_type": "code", "execution_count": 143, "id": "5c08708e-4ebf-4cfe-8efb-7ee6c7930427", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
excel_labelformatshapefile_columnnotescategory
score_name
GEOID10_TRACTCensus tract IDstringNaNNaNNaN
County NameCounty NamestringCFNaNNaN
State/TerritoryState/TerritorystringSFNaNNaN
Total threshold criteria exceededTotal threshold criteria exceededint64TCLists out the total number of criteria (where ...NaN
Definition M (communities)Identified as disadvantagedboolSM_CTrue / False variable for whether a tract is a...NaN
..................
Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)Unemployment (percent) in 2009 (island areas) ...percentageNaNNaNNaN
Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR)Percentage households below 100% of federal po...percentageNaNNaNNaN
Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?Greater than or equal to the 90th percentile f...boolIAULHSEisland area information comes from the dicenni...training and workforce development
Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?Greater than or equal to the 90th percentile f...boolIAPLHSEisland area information comes from the dicenni...training and workforce development
Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?Greater than or equal to the 90th percentile f...boolIALMILHSEisland area information comes from the dicenni...training and workforce development
\n", "

82 rows × 5 columns

\n", "
" ], "text/plain": [ " excel_label \\\n", "score_name \n", "GEOID10_TRACT Census tract ID \n", "County Name County Name \n", "State/Territory State/Territory \n", "Total threshold criteria exceeded Total threshold criteria exceeded \n", "Definition M (communities) Identified as disadvantaged \n", "... ... \n", "Unemployment (percent) in 2009 (island areas) a... Unemployment (percent) in 2009 (island areas) ... \n", "Percentage households below 100% of federal pov... Percentage households below 100% of federal po... \n", "Greater than or equal to the 90th percentile fo... Greater than or equal to the 90th percentile f... \n", "Greater than or equal to the 90th percentile fo... Greater than or equal to the 90th percentile f... \n", "Greater than or equal to the 90th percentile fo... Greater than or equal to the 90th percentile f... \n", "\n", " format \\\n", "score_name \n", "GEOID10_TRACT string \n", "County Name string \n", "State/Territory string \n", "Total threshold criteria exceeded int64 \n", "Definition M (communities) bool \n", "... ... \n", "Unemployment (percent) in 2009 (island areas) a... percentage \n", "Percentage households below 100% of federal pov... percentage \n", "Greater than or equal to the 90th percentile fo... bool \n", "Greater than or equal to the 90th percentile fo... bool \n", "Greater than or equal to the 90th percentile fo... bool \n", "\n", " shapefile_column \\\n", "score_name \n", "GEOID10_TRACT NaN \n", "County Name CF \n", "State/Territory SF \n", "Total threshold criteria exceeded TC \n", "Definition M (communities) SM_C \n", "... ... \n", "Unemployment (percent) in 2009 (island areas) a... NaN \n", "Percentage households below 100% of federal pov... NaN \n", "Greater than or equal to the 90th percentile fo... IAULHSE \n", "Greater than or equal to the 90th percentile fo... IAPLHSE \n", "Greater than or equal to the 90th percentile fo... IALMILHSE \n", "\n", " notes \\\n", "score_name \n", "GEOID10_TRACT NaN \n", "County Name NaN \n", "State/Territory NaN \n", "Total threshold criteria exceeded Lists out the total number of criteria (where ... \n", "Definition M (communities) True / False variable for whether a tract is a... \n", "... ... \n", "Unemployment (percent) in 2009 (island areas) a... NaN \n", "Percentage households below 100% of federal pov... NaN \n", "Greater than or equal to the 90th percentile fo... island area information comes from the dicenni... \n", "Greater than or equal to the 90th percentile fo... island area information comes from the dicenni... \n", "Greater than or equal to the 90th percentile fo... island area information comes from the dicenni... \n", "\n", " category \n", "score_name \n", "GEOID10_TRACT NaN \n", "County Name NaN \n", "State/Territory NaN \n", "Total threshold criteria exceeded NaN \n", "Definition M (communities) NaN \n", "... ... \n", "Unemployment (percent) in 2009 (island areas) a... NaN \n", "Percentage households below 100% of federal pov... NaN \n", "Greater than or equal to the 90th percentile fo... training and workforce development \n", "Greater than or equal to the 90th percentile fo... training and workforce development \n", "Greater than or equal to the 90th percentile fo... training and workforce development \n", "\n", "[82 rows x 5 columns]" ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "download_codebook.dropna(subset=[\"format\"]).reset_index()[\"score_name\"]" ] }, { "cell_type": "code", "execution_count": 137, "id": "7139ce5d-db5e-49dd-8bb3-122c7b73b395", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
excel_labelformatshapefile_columnnotescategory
score_name
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [excel_label, format, shapefile_column, notes, category]\n", "Index: []" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "download_codebook.loc[\n", " sum([download_codebook[col] == \"percentile\" for col in [\"format\"]]) > 0\n", "]" ] }, { "cell_type": "code", "execution_count": 134, "id": "e31ef01c-b225-48f0-bdf5-1efb8d4ed95c", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "Cannot index with multidimensional key", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "Input \u001b[0;32mIn [134]\u001b[0m, in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdownload_codebook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloc\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdownload_codebook\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfilter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlike\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mformat\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpercentile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n", "File \u001b[0;32m/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py:931\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 928\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 930\u001b[0m maybe_callable \u001b[38;5;241m=\u001b[39m com\u001b[38;5;241m.\u001b[39mapply_if_callable(key, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj)\n\u001b[0;32m--> 931\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_getitem_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmaybe_callable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py:1151\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1148\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mtuple\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(labels, MultiIndex)):\n\u001b[1;32m 1150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(key, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mndim\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m key\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m-> 1151\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot index with multidimensional key\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1153\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_iterable(key, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 1155\u001b[0m \u001b[38;5;66;03m# nested tuple slicing\u001b[39;00m\n", "\u001b[0;31mValueError\u001b[0m: Cannot index with multidimensional key" ] } ], "source": [ "download_codebook.loc[download_codebook.filter(like=\"format\") == \"percentile\"]" ] }, { "cell_type": "code", "execution_count": 131, "id": "73268de4-3378-4ac7-bf85-f483a78c3966", "metadata": {}, "outputs": [], "source": [ "download_codebook = pd.concat(\n", " [\n", " codebook.set_index(\"score_name\"),\n", " shapefile_codes.rename(\n", " columns={\"meaning\": \"shapefile_column\", \"column\": \"score_name\"}\n", " ).set_index(\"score_name\"),\n", " details_to_merge.set_index(\"score_name\"),\n", " ],\n", " axis=1,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "6321ed42-aee6-40fc-8bf8-2a4ce4276eca", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" } }, "nbformat": 4, "nbformat_minor": 5 }