Generate Geo-aware scores for all zoom levels (#391)

* generate Geo-aware scores for all zoom levels

* usa high progress

* testing dissolve

* checkpoint

* changing type

* removing breakpoint

* validation notebooks

* quick update

* score validation

* fixes for county merge

* code completed
This commit is contained in:
Jorge Escobar 2021-07-28 16:07:28 -04:00 committed by GitHub
commit b404fdcc43
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 3023 additions and 270 deletions

View file

@ -44,7 +44,7 @@ TODO add mermaid diagram
#### Step 0: Set up your environment
1. After cloning the project locally, change to this directory: `cd score`
1. After cloning the project locally, change to this directory: `cd data/data-pipeline`
1. Choose whether you'd like to run this application using Docker or if you'd like to install the dependencies locally so you can contribute to the project.
- **With Docker:** Follow these [installation instructions](https://docs.docker.com/get-docker/) and skip down to the [Running with Docker section](#running-with-docker) for more information
- **For Local Development:** Skip down to the [Local Development section](#local-development) for more detailed installation instructions
@ -53,7 +53,7 @@ TODO add mermaid diagram
#### Step 1: Run the ETL script for each data source
1. Call the `etl-run` command using the application manager `application.py` **NOTE:** This may take several minutes to execute.
- With Docker: `docker run --rm -it j40_score /bin/sh -c "python3 application.py etl-run"`
- With Docker: `docker run --rm -it j40_data_pipeline /bin/sh -c "python3 application.py etl-run"`
- With Poetry: `poetry run python application.py etl-run`
1. The `etl-run` command will execute the corresponding ETL script for each data source in `etl/sources/`. For example, `etl/sources/ejscreen/etl.py` is the ETL script for EJSCREEN data.
1. Each ETL script will extract the data from its original source, then format the data into `.csv` files that get stored in the relevant folder in `data/dataset/`. For example, HUD Housing data is stored in `data/dataset/hud_housing/usa.csv`
@ -64,7 +64,7 @@ _For example: `poetry run python application.py etl-run ejscreen` would only run
#### Step 2: Calculate the Justice40 score experiments
1. Call the `score-run` command using the application manager `application.py` **NOTE:** This may take several minutes to execute.
- With Docker: `docker run --rm -it j40_score /bin/sh -c "python3 application.py score-run"`
- With Docker: `docker run --rm -it j40_data_pipeline /bin/sh -c "python3 application.py score-run"`
- With Poetry: `poetry run python application.py score-run`
1. The `score-run` command will execute the `etl/score/etl.py` script which loads the data from each of the source files added to the `data/dataset/` directory by the ETL scripts in Step 1.
1. These data sets are merged into a single dataframe using their Census Block Group GEOID as a common key, and the data in each of the columns is standardized in two ways:

View file

@ -9,7 +9,7 @@ from utils import (
temp_folder_cleanup,
)
from etl.sources.census.etl import download_census_csvs
from etl.runner import etl_runner, score_generate
from etl.runner import etl_runner, score_generate, score_geo
logger = get_module_logger(__name__)
@ -88,5 +88,13 @@ def score_run():
score_generate()
@cli.command(
help="Generate Geojson files with scores baked in",
)
def geo_score():
"""CLI command to generate the score"""
score_geo()
if __name__ == "__main__":
cli()

View file

@ -2,6 +2,7 @@ import importlib
from etl.score.etl_score import ScoreETL
from etl.score.etl_score_post import PostScoreETL
from etl.score.etl_score_geo import GeoScoreETL
def etl_runner(dataset_to_run: str = None) -> None:
@ -112,6 +113,23 @@ def score_generate() -> None:
score_post.cleanup()
def score_geo() -> None:
"""Generates the geojson files with score data baked in
Args:
None
Returns:
None
"""
# Score Geo
score_geo = GeoScoreETL()
score_geo.extract()
score_geo.transform()
score_geo.load()
def _find_dataset_index(dataset_list, key, value):
for i, element in enumerate(dataset_list):
if element[key] == value:

View file

@ -0,0 +1,168 @@
import pandas as pd
import geopandas as gpd
import math
from etl.base import ExtractTransformLoad
from utils import get_module_logger
logger = get_module_logger(__name__)
class GeoScoreETL(ExtractTransformLoad):
"""
A class used to generate per state and national GeoJson files with the score baked in
"""
def __init__(self):
self.SCORE_GEOJSON_PATH = self.DATA_PATH / "score" / "geojson"
self.SCORE_LOW_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-low.json"
self.SCORE_HIGH_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-high.json"
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
self.TILE_SCORE_CSV = self.SCORE_CSV_PATH / "tiles" / "usa.csv"
self.CENSUS_USA_GEOJSON = (
self.DATA_PATH / "census" / "geojson" / "us.json"
)
self.TARGET_SCORE_NAME = "Score E (percentile)"
self.TARGET_SCORE_RENAME_TO = "E_SCORE"
self.NUMBER_OF_BUCKETS = 10
self.geojson_usa_df: gpd.GeoDataFrame
self.score_usa_df: pd.DataFrame
self.geojson_score_usa_high: gpd.GeoDataFrame
self.geojson_score_usa_low: gpd.GeoDataFrame
def extract(self) -> None:
logger.info(f"Reading US GeoJSON (~6 minutes)")
self.geojson_usa_df = gpd.read_file(
self.CENSUS_USA_GEOJSON,
dtype={"GEOID10": "string"},
usecols=["GEOID10", "geometry"],
low_memory=False,
)
self.geojson_usa_df.head()
logger.info(f"Reading score CSV")
self.score_usa_df = pd.read_csv(
self.TILE_SCORE_CSV,
dtype={"GEOID10": "string"},
low_memory=False,
)
def transform(self) -> None:
logger.info(f"Pruning Census GeoJSON")
fields = ["GEOID10", "geometry"]
self.geojson_usa_df = self.geojson_usa_df[fields]
logger.info(f"Merging and compressing score CSV with USA GeoJSON")
self.geojson_score_usa_high = self.score_usa_df.merge(
self.geojson_usa_df, on="GEOID10", how="left"
)
self.geojson_score_usa_high = gpd.GeoDataFrame(
self.geojson_score_usa_high, crs="EPSG:4326"
)
usa_simplified = self.geojson_score_usa_high[
["GEOID10", self.TARGET_SCORE_NAME, "geometry"]
].reset_index(drop=True)
usa_simplified.rename(
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
inplace=True,
)
logger.info(f"Aggregating into tracts (~5 minutes)")
usa_tracts = self._aggregate_to_tracts(usa_simplified)
usa_tracts = gpd.GeoDataFrame(
usa_tracts,
columns=[self.TARGET_SCORE_RENAME_TO, "geometry"],
crs="EPSG:4326",
)
logger.info(f"Creating buckets from tracts")
usa_bucketed = self._create_buckets_from_tracts(
usa_tracts, self.NUMBER_OF_BUCKETS
)
logger.info(f"Aggregating buckets")
usa_aggregated = self._aggregate_buckets(usa_bucketed, agg_func="mean")
compressed = self._breakup_multipolygons(
usa_aggregated, self.NUMBER_OF_BUCKETS
)
self.geojson_score_usa_low = gpd.GeoDataFrame(
compressed,
columns=[self.TARGET_SCORE_RENAME_TO, "geometry"],
crs="EPSG:4326",
)
def _aggregate_to_tracts(
self, block_group_df: gpd.GeoDataFrame
) -> gpd.GeoDataFrame:
# The tract identifier is the first 11 digits of the GEOID
block_group_df["tract"] = block_group_df.apply(
lambda row: row["GEOID10"][0:11], axis=1
)
state_tracts = block_group_df.dissolve(by="tract", aggfunc="mean")
return state_tracts
def _create_buckets_from_tracts(
self, state_tracts: gpd.GeoDataFrame, num_buckets: int
) -> gpd.GeoDataFrame:
# assign tracts to buckets by D_SCORE
state_tracts.sort_values(self.TARGET_SCORE_RENAME_TO, inplace=True)
SCORE_bucket = []
bucket_size = math.ceil(
len(state_tracts.index) / self.NUMBER_OF_BUCKETS
)
for i in range(len(state_tracts.index)):
SCORE_bucket.extend([math.floor(i / bucket_size)])
state_tracts[f"{self.TARGET_SCORE_RENAME_TO}_bucket"] = SCORE_bucket
return state_tracts
def _aggregate_buckets(self, state_tracts: gpd.GeoDataFrame, agg_func: str):
# dissolve tracts by bucket
state_attr = state_tracts[
[
self.TARGET_SCORE_RENAME_TO,
f"{self.TARGET_SCORE_RENAME_TO}_bucket",
"geometry",
]
].reset_index(drop=True)
state_dissolve = state_attr.dissolve(
by=f"{self.TARGET_SCORE_RENAME_TO}_bucket", aggfunc=agg_func
)
return state_dissolve
def _breakup_multipolygons(
self, state_bucketed_df: gpd.GeoDataFrame, num_buckets: int
) -> gpd.GeoDataFrame:
compressed = []
for i in range(num_buckets):
for j in range(len(state_bucketed_df["geometry"][i].geoms)):
compressed.append(
[
state_bucketed_df[self.TARGET_SCORE_RENAME_TO][i],
state_bucketed_df["geometry"][i].geoms[j],
]
)
return compressed
def load(self) -> None:
logger.info(f"Writing usa-high (~9 minutes)")
self.geojson_score_usa_high.to_file(
self.SCORE_HIGH_GEOJSON, driver="GeoJSON"
)
logger.info(f"Completed writing usa-high")
logger.info(f"Writing usa-low (~9 minutes)")
self.geojson_score_usa_low.to_file(
self.SCORE_LOW_GEOJSON, driver="GeoJSON"
)
logger.info(f"Completed writing usa-low")

View file

@ -16,10 +16,13 @@ class PostScoreETL(ExtractTransformLoad):
self.CENSUS_COUNTIES_ZIP_URL = "https://www2.census.gov/geo/docs/maps-data/data/gazetteer/Gaz_counties_national.zip"
self.CENSUS_COUNTIES_TXT = self.TMP_PATH / "Gaz_counties_national.txt"
self.CENSUS_COUNTIES_COLS = ["USPS", "GEOID", "NAME"]
self.CENSUS_USA_CSV = self.DATA_PATH / "census" / "csv" / "us.csv"
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
self.STATE_CSV = (
self.DATA_PATH / "census" / "csv" / "fips_states_2010.csv"
)
self.FULL_SCORE_CSV = self.SCORE_CSV_PATH / "full" / "usa.csv"
self.TILR_SCORE_CSV = self.SCORE_CSV_PATH / "tile" / "usa.csv"
@ -87,17 +90,43 @@ class PostScoreETL(ExtractTransformLoad):
# add the tract level column
self.score_df["GEOID"] = self.score_df.GEOID10.str[:5]
# merge state and counties
county_state_merged = self.counties_df.join(
self.states_df, rsuffix=" Other"
# merge state with counties
county_state_merged = self.counties_df.merge(
self.states_df, on="State Abbreviation", how="left"
)
del county_state_merged["State Abbreviation Other"]
# merge county and score
self.score_county_state_merged = self.score_df.join(
county_state_merged, rsuffix="_OTHER"
# merge state + county with score
self.score_county_state_merged = self.score_df.merge(
county_state_merged, on="GEOID", how="left"
)
del self.score_county_state_merged["GEOID_OTHER"]
# check if there are census cbgs without score
logger.info(f"Removing CBG rows without score")
## load cbgs
cbg_usa_df = pd.read_csv(
self.CENSUS_USA_CSV,
names=["GEOID10"],
dtype={"GEOID10": "string"},
low_memory=False,
header=None,
)
# merge census cbgs with score
merged_df = cbg_usa_df.merge(
self.score_county_state_merged, on="GEOID10", how="left"
)
# list the null score cbgs
null_cbg_df = merged_df[merged_df["Score E (percentile)"].isnull()]
# subsctract data sets
removed_df = pd.concat(
[merged_df, null_cbg_df, null_cbg_df]
).drop_duplicates(keep=False)
# set the score to the new df
self.score_county_state_merged = removed_df
def load(self) -> None:
logger.info(f"Saving Full Score CSV with County Information")

View file

@ -9,12 +9,16 @@ logger = get_module_logger(__name__)
class CalEnviroScreenETL(ExtractTransformLoad):
def __init__(self):
self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/data-sources/CalEnviroScreen_4.0_2021.zip"
self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
self.CALENVIROSCREEN_CSV = (
self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
)
self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4"
# Definining some variable names
self.CALENVIROSCREEN_SCORE_FIELD_NAME = "calenviroscreen_score"
self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = "calenviroscreen_percentile"
self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = (
"calenviroscreen_percentile"
)
self.CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME = (
"calenviroscreen_priority_community"
)

View file

@ -2,6 +2,7 @@ import csv
import os
import json
from pathlib import Path
import geopandas as gpd
from .etl_utils import get_state_fips_codes
from utils import unzip_file_from_url, get_module_logger
@ -11,7 +12,7 @@ logger = get_module_logger(__name__)
def download_census_csvs(data_path: Path) -> None:
"""Download all census shape files from the Census FTP and extract the geojson
to generate national and by state Census Block Group CSVs
to generate national and by state Census Block Group CSVs and GeoJSONs
Args:
data_path (pathlib.Path): Name of the directory where the files and directories will
@ -108,4 +109,17 @@ def download_census_csvs(data_path: Path) -> None:
]
)
## create national geojson
logger.info(f"Generating national geojson file")
usa_df = gpd.GeoDataFrame()
for file_name in geojson_dir_path.rglob("*.json"):
logger.info(f"Ingesting {file_name}")
state_gdf = gpd.read_file(file_name)
usa_df = usa_df.append(state_gdf)
usa_df = usa_df.to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
logger.info(f"Writing national geojson file")
usa_df.to_file(geojson_dir_path / "us.json", driver="GeoJSON")
logger.info("Census block groups downloading complete")

View file

@ -106,3 +106,8 @@ class CensusACSETL(ExtractTransformLoad):
self.df[columns_to_include].to_csv(
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
)
def validate(self) -> None:
logger.info(f"Validating Census ACS Data")
pass

View file

@ -0,0 +1,567 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "43c5dbee",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import csv\n",
"from pathlib import Path\n",
"import os\n",
"import sys"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f97c95f6",
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.abspath(os.path.join(\"..\"))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "b8a2b53e",
"metadata": {},
"outputs": [],
"source": [
"DATA_PATH = Path.cwd().parent / \"data\"\n",
"TMP_PATH: Path = DATA_PATH / \"tmp\"\n",
"ACS_YEAR = \"2019\"\n",
"OUTPUT_PATH = (\n",
" DATA_PATH / \"dataset\" / f\"census_acs_{ACS_YEAR}\"\n",
" )\n",
"CENSUS_USA_CSV = (\n",
" DATA_PATH / \"census\" / \"csv\" / \"us.csv\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "0d33e8db",
"metadata": {},
"outputs": [],
"source": [
"cbg_usa_df = pd.read_csv(\n",
" CENSUS_USA_CSV,\n",
" names=['GEOID10'],\n",
" dtype={\"GEOID10\": \"string\"},\n",
" low_memory=False,\n",
" header=None\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "01e6dbe3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100010414002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100010415002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100010417011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>100010417012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>100010422011</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" GEOID10\n",
"0 100010414002\n",
"1 100010415002\n",
"2 100010417011\n",
"3 100010417012\n",
"4 100010422011"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cbg_usa_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "341dbcb6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GEOID10 string\n",
"dtype: object"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cbg_usa_df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "eb25d4bf",
"metadata": {},
"outputs": [],
"source": [
"acs_df = pd.read_csv(\n",
" OUTPUT_PATH / \"usa.csv\",\n",
" dtype={\"GEOID10\": \"string\"},\n",
" low_memory=False,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "d4c9d010",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" <th>Unemployed civilians (percent)</th>\n",
" <th>Linguistic isolation (percent)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>010399620002</td>\n",
" <td>0.077108</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>010399618002</td>\n",
" <td>0.126214</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>010399616004</td>\n",
" <td>0.133172</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>010399616002</td>\n",
" <td>0.028249</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>010399616001</td>\n",
" <td>0.063037</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" GEOID10 Unemployed civilians (percent) \\\n",
"0 010399620002 0.077108 \n",
"1 010399618002 0.126214 \n",
"2 010399616004 0.133172 \n",
"3 010399616002 0.028249 \n",
"4 010399616001 0.063037 \n",
"\n",
" Linguistic isolation (percent) \n",
"0 0.0 \n",
"1 0.0 \n",
"2 0.0 \n",
"3 0.0 \n",
"4 0.0 "
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"acs_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "dd390179",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GEOID10 string\n",
"Unemployed civilians (percent) float64\n",
"Linguistic isolation (percent) float64\n",
"dtype: object"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"acs_df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "236eb093",
"metadata": {},
"outputs": [],
"source": [
"merged_df = cbg_usa_df.merge(\n",
" acs_df, on=\"GEOID10\", how=\"left\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "4fff1845",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" <th>Unemployed civilians (percent)</th>\n",
" <th>Linguistic isolation (percent)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100010414002</td>\n",
" <td>0.030612</td>\n",
" <td>0.065963</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100010415002</td>\n",
" <td>0.118056</td>\n",
" <td>0.010283</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100010417011</td>\n",
" <td>0.042373</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>100010417012</td>\n",
" <td>0.042473</td>\n",
" <td>0.010435</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>100010422011</td>\n",
" <td>0.054358</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" GEOID10 Unemployed civilians (percent) \\\n",
"0 100010414002 0.030612 \n",
"1 100010415002 0.118056 \n",
"2 100010417011 0.042373 \n",
"3 100010417012 0.042473 \n",
"4 100010422011 0.054358 \n",
"\n",
" Linguistic isolation (percent) \n",
"0 0.065963 \n",
"1 0.010283 \n",
"2 0.000000 \n",
"3 0.010435 \n",
"4 0.000000 "
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "f8903557",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" <th>Unemployed civilians (percent)</th>\n",
" <th>Linguistic isolation (percent)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>100019900000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>377</th>\n",
" <td>100030169041</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>392</th>\n",
" <td>100059900000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>400</th>\n",
" <td>100039901000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>416</th>\n",
" <td>100039801001</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>219505</th>\n",
" <td>340057048013</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>219508</th>\n",
" <td>340057048024</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>219758</th>\n",
" <td>340258047001</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>219807</th>\n",
" <td>340259900000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220134</th>\n",
" <td>340076113001</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1462 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" GEOID10 Unemployed civilians (percent) \\\n",
"34 100019900000 NaN \n",
"377 100030169041 NaN \n",
"392 100059900000 NaN \n",
"400 100039901000 NaN \n",
"416 100039801001 NaN \n",
"... ... ... \n",
"219505 340057048013 NaN \n",
"219508 340057048024 NaN \n",
"219758 340258047001 NaN \n",
"219807 340259900000 NaN \n",
"220134 340076113001 NaN \n",
"\n",
" Linguistic isolation (percent) \n",
"34 NaN \n",
"377 NaN \n",
"392 NaN \n",
"400 NaN \n",
"416 NaN \n",
"... ... \n",
"219505 NaN \n",
"219508 NaN \n",
"219758 NaN \n",
"219807 NaN \n",
"220134 0.0 \n",
"\n",
"[1462 rows x 3 columns]"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged_df[merged_df[\"Unemployed civilians (percent)\"].isnull()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b870a21f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,777 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3ab8f7c1",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import csv\n",
"from pathlib import Path\n",
"import os\n",
"import sys"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "8c22494f",
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.abspath(os.path.join(\"..\"))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "eb31e9a1",
"metadata": {},
"outputs": [],
"source": [
"DATA_PATH = Path.cwd().parent / \"data\"\n",
"TMP_PATH: Path = DATA_PATH / \"tmp\"\n",
"OUTPUT_PATH = (\n",
" DATA_PATH / \"score\" / \"csv\" / \"tiles\"\n",
" )\n",
"CENSUS_USA_CSV = (\n",
" DATA_PATH / \"census\" / \"csv\" / \"us.csv\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "95a5f8d8",
"metadata": {},
"outputs": [],
"source": [
"cbg_usa_df = pd.read_csv(\n",
" CENSUS_USA_CSV,\n",
" names=['GEOID10'],\n",
" dtype={\"GEOID10\": \"string\"},\n",
" low_memory=False,\n",
" header=None\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "bdd9ab60",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100010414002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100010415002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100010417011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>100010417012</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>100010422011</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" GEOID10\n",
"0 100010414002\n",
"1 100010415002\n",
"2 100010417011\n",
"3 100010417012\n",
"4 100010422011"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cbg_usa_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "05a40080",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GEOID10 string\n",
"dtype: object"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cbg_usa_df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "114af777",
"metadata": {},
"outputs": [],
"source": [
"score_df = pd.read_csv(\n",
" OUTPUT_PATH / \"usa.csv\",\n",
" dtype={\"GEOID10\": \"string\"},\n",
" low_memory=False,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d5f3ebd4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" <th>Score E (percentile)</th>\n",
" <th>Score E (top 25th percentile)</th>\n",
" <th>GEOID</th>\n",
" <th>State Abbreviation</th>\n",
" <th>County Name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100010414002</td>\n",
" <td>0.808889</td>\n",
" <td>True</td>\n",
" <td>10001</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100010415002</td>\n",
" <td>0.555160</td>\n",
" <td>False</td>\n",
" <td>10001</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100010417011</td>\n",
" <td>0.272392</td>\n",
" <td>False</td>\n",
" <td>10001</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>100010417012</td>\n",
" <td>0.345686</td>\n",
" <td>False</td>\n",
" <td>10001</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>100010422011</td>\n",
" <td>0.472567</td>\n",
" <td>False</td>\n",
" <td>10001</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220256</th>\n",
" <td>340076020004</td>\n",
" <td>0.921941</td>\n",
" <td>True</td>\n",
" <td>34007</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220257</th>\n",
" <td>340076017002</td>\n",
" <td>0.934490</td>\n",
" <td>True</td>\n",
" <td>34007</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220258</th>\n",
" <td>340076015005</td>\n",
" <td>0.889613</td>\n",
" <td>True</td>\n",
" <td>34007</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220259</th>\n",
" <td>340076091032</td>\n",
" <td>0.627822</td>\n",
" <td>False</td>\n",
" <td>34007</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220260</th>\n",
" <td>340076053002</td>\n",
" <td>0.762237</td>\n",
" <td>True</td>\n",
" <td>34007</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>220261 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" GEOID10 Score E (percentile) Score E (top 25th percentile) \\\n",
"0 100010414002 0.808889 True \n",
"1 100010415002 0.555160 False \n",
"2 100010417011 0.272392 False \n",
"3 100010417012 0.345686 False \n",
"4 100010422011 0.472567 False \n",
"... ... ... ... \n",
"220256 340076020004 0.921941 True \n",
"220257 340076017002 0.934490 True \n",
"220258 340076015005 0.889613 True \n",
"220259 340076091032 0.627822 False \n",
"220260 340076053002 0.762237 True \n",
"\n",
" GEOID State Abbreviation County Name \n",
"0 10001 DE Kent County \n",
"1 10001 DE Kent County \n",
"2 10001 DE Kent County \n",
"3 10001 DE Kent County \n",
"4 10001 DE Kent County \n",
"... ... ... ... \n",
"220256 34007 NJ Camden County \n",
"220257 34007 NJ Camden County \n",
"220258 34007 NJ Camden County \n",
"220259 34007 NJ Camden County \n",
"220260 34007 NJ Camden County \n",
"\n",
"[220261 rows x 6 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"score_df"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "f84f9e1d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GEOID10 string\n",
"Score E (percentile) float64\n",
"Score E (top 25th percentile) bool\n",
"GEOID int64\n",
"State Abbreviation object\n",
"County Name object\n",
"dtype: object"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"score_df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "8d61e29e",
"metadata": {},
"outputs": [],
"source": [
"merged_df = cbg_usa_df.merge(\n",
" score_df, on=\"GEOID10\", how=\"left\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "7e8c2f2a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" <th>Score E (percentile)</th>\n",
" <th>Score E (top 25th percentile)</th>\n",
" <th>GEOID</th>\n",
" <th>State Abbreviation</th>\n",
" <th>County Name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>100010414002</td>\n",
" <td>0.808889</td>\n",
" <td>True</td>\n",
" <td>10001.0</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>100010415002</td>\n",
" <td>0.555160</td>\n",
" <td>False</td>\n",
" <td>10001.0</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>100010417011</td>\n",
" <td>0.272392</td>\n",
" <td>False</td>\n",
" <td>10001.0</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>100010417012</td>\n",
" <td>0.345686</td>\n",
" <td>False</td>\n",
" <td>10001.0</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>100010422011</td>\n",
" <td>0.472567</td>\n",
" <td>False</td>\n",
" <td>10001.0</td>\n",
" <td>DE</td>\n",
" <td>Kent County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220329</th>\n",
" <td>340076020004</td>\n",
" <td>0.921941</td>\n",
" <td>True</td>\n",
" <td>34007.0</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220330</th>\n",
" <td>340076017002</td>\n",
" <td>0.934490</td>\n",
" <td>True</td>\n",
" <td>34007.0</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220331</th>\n",
" <td>340076015005</td>\n",
" <td>0.889613</td>\n",
" <td>True</td>\n",
" <td>34007.0</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220332</th>\n",
" <td>340076091032</td>\n",
" <td>0.627822</td>\n",
" <td>False</td>\n",
" <td>34007.0</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220333</th>\n",
" <td>340076053002</td>\n",
" <td>0.762237</td>\n",
" <td>True</td>\n",
" <td>34007.0</td>\n",
" <td>NJ</td>\n",
" <td>Camden County</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>220334 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" GEOID10 Score E (percentile) Score E (top 25th percentile) \\\n",
"0 100010414002 0.808889 True \n",
"1 100010415002 0.555160 False \n",
"2 100010417011 0.272392 False \n",
"3 100010417012 0.345686 False \n",
"4 100010422011 0.472567 False \n",
"... ... ... ... \n",
"220329 340076020004 0.921941 True \n",
"220330 340076017002 0.934490 True \n",
"220331 340076015005 0.889613 True \n",
"220332 340076091032 0.627822 False \n",
"220333 340076053002 0.762237 True \n",
"\n",
" GEOID State Abbreviation County Name \n",
"0 10001.0 DE Kent County \n",
"1 10001.0 DE Kent County \n",
"2 10001.0 DE Kent County \n",
"3 10001.0 DE Kent County \n",
"4 10001.0 DE Kent County \n",
"... ... ... ... \n",
"220329 34007.0 NJ Camden County \n",
"220330 34007.0 NJ Camden County \n",
"220331 34007.0 NJ Camden County \n",
"220332 34007.0 NJ Camden County \n",
"220333 34007.0 NJ Camden County \n",
"\n",
"[220334 rows x 6 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged_df"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "e81b1321",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>GEOID10</th>\n",
" <th>Score E (percentile)</th>\n",
" <th>Score E (top 25th percentile)</th>\n",
" <th>GEOID</th>\n",
" <th>State Abbreviation</th>\n",
" <th>County Name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10614</th>\n",
" <td>515150501002</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10615</th>\n",
" <td>515150501003</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10627</th>\n",
" <td>515150501001</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10628</th>\n",
" <td>515150501005</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10629</th>\n",
" <td>515150501004</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174140</th>\n",
" <td>040190029031</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174143</th>\n",
" <td>040190027012</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174184</th>\n",
" <td>040190027011</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174242</th>\n",
" <td>040194105021</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174243</th>\n",
" <td>040194105011</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>73 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" GEOID10 Score E (percentile) Score E (top 25th percentile) \\\n",
"10614 515150501002 NaN NaN \n",
"10615 515150501003 NaN NaN \n",
"10627 515150501001 NaN NaN \n",
"10628 515150501005 NaN NaN \n",
"10629 515150501004 NaN NaN \n",
"... ... ... ... \n",
"174140 040190029031 NaN NaN \n",
"174143 040190027012 NaN NaN \n",
"174184 040190027011 NaN NaN \n",
"174242 040194105021 NaN NaN \n",
"174243 040194105011 NaN NaN \n",
"\n",
" GEOID State Abbreviation County Name \n",
"10614 NaN NaN NaN \n",
"10615 NaN NaN NaN \n",
"10627 NaN NaN NaN \n",
"10628 NaN NaN NaN \n",
"10629 NaN NaN NaN \n",
"... ... ... ... \n",
"174140 NaN NaN NaN \n",
"174143 NaN NaN NaN \n",
"174184 NaN NaN NaN \n",
"174242 NaN NaN NaN \n",
"174243 NaN NaN NaN \n",
"\n",
"[73 rows x 6 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"merged_df[merged_df[\"Score E (percentile)\"].isnull()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1a7b71d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -2,7 +2,9 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import geopandas as gpd\n",
@ -10,24 +12,24 @@
"import pathlib\n",
"import os\n",
"import sys"
],
"outputs": [],
"metadata": {}
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"module_path = os.path.abspath(os.path.join(\"..\"))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)"
],
"outputs": [],
"metadata": {}
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def merge_and_simplify_file(file_name: str, usa_df: pd.DataFrame):\n",
" state_gdf = gpd.read_file(file_name)\n",
@ -100,104 +102,133 @@
" state_bucketed_df = aggregate_buckets(state_tracts, \"mean\")\n",
" compressed = breakup_multipolygons(state_bucketed_df, num_buckets)\n",
" write_to_file(compressed, file_name)"
],
"outputs": [],
"metadata": {}
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {
"id": "Ia5bqxS2LJqe"
},
"outputs": [],
"source": [
"DATA_DIR = pathlib.Path.cwd().parent / \"data\"\n",
"CENSUS_GEOJSON_DIR = DATA_DIR / \"census\" / \"geojson\"\n",
"CEJST_DATA_PATH = DATA_DIR / \"score\" / \"csv\" / \"usa.csv\"\n",
"score_df = pd.read_csv(CEJST_DATA_PATH, dtype={\"GEOID10\": \"object\"})"
],
"outputs": [],
"metadata": {
"id": "Ia5bqxS2LJqe"
}
"CEJST_DATA_PATH = DATA_DIR / \"score\" / \"csv\" / \"tiles\" / \"usa.csv\"\n",
"score_df = pd.read_csv(CEJST_DATA_PATH, dtype={\"GEOID10\": \"object\"}, low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"master_df = gpd.GeoDataFrame()"
],
"outputs": [],
"execution_count": 7,
"metadata": {
"id": "Dtf5qD50JvCw"
}
},
"outputs": [],
"source": [
"master_df = gpd.GeoDataFrame()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty GeoDataFrame\n",
"Columns: []\n",
"Index: []"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"master_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "PNdw8bERJyKk"
},
"outputs": [],
"source": [
"for file_name in CENSUS_GEOJSON_DIR.rglob('*.json'):\n",
" state_gdf = gpd.read_file(file_name)\n",
" master_df = master_df.append(state_gdf)"
],
"outputs": [],
"metadata": {
"id": "PNdw8bERJyKk"
}
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"master_df = master_df.to_crs(\"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs\")"
],
"outputs": [],
"metadata": {
"id": "B5SS9y2pLwks"
}
},
"outputs": [],
"source": [
"master_df = master_df.to_crs(\"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"master_df.shape"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(220742, 13)"
]
},
"metadata": {
"tags": []
},
"execution_count": 68
}
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_C6vaR9HQeLa",
"outputId": "fab3bc7f-e716-431e-bc76-bd26289ea4a4"
}
},
"outputs": [],
"source": [
"master_df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"master_df.head(2)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "oMoubjqCQiw5",
"outputId": "6195ffbc-6275-40c6-bb6a-e0a6bd1e71f0"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" STATEFP10 ... geometry\n",
"0 01 ... POLYGON ((-85.17240 31.82508, -85.17334 31.824...\n",
"1 01 ... POLYGON ((-85.16283 31.81051, -85.16284 31.813...\n",
"\n",
"[2 rows x 13 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
@ -268,61 +299,61 @@
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" STATEFP10 ... geometry\n",
"0 01 ... POLYGON ((-85.17240 31.82508, -85.17334 31.824...\n",
"1 01 ... POLYGON ((-85.16283 31.81051, -85.16284 31.813...\n",
"\n",
"[2 rows x 13 columns]"
]
},
"execution_count": 69,
"metadata": {
"tags": []
},
"execution_count": 69
"output_type": "execute_result"
}
],
"source": [
"master_df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "bAMmGSgzVml0"
},
"outputs": [],
"source": [
"usa_df = pd.read_csv(CEJST_DATA_PATH, dtype={\"GEOID10\": \"object\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "U7M7dExdV2Vh"
},
"outputs": [],
"source": [
"usa_merged = master_df.merge(usa_df, on=\"GEOID10\", how=\"left\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "oMoubjqCQiw5",
"outputId": "6195ffbc-6275-40c6-bb6a-e0a6bd1e71f0"
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_df = pd.read_csv(CEJST_DATA_PATH, dtype={\"GEOID10\": \"object\"})"
],
"outputs": [],
"metadata": {
"id": "bAMmGSgzVml0"
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_merged = master_df.merge(usa_df, on=\"GEOID10\", how=\"left\")"
],
"outputs": [],
"metadata": {
"id": "U7M7dExdV2Vh"
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_merged.head(2)"
],
"id": "Sr25DUkxWVhg",
"outputId": "1e804075-0f7d-4174-82d7-e21b8519c8bf"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" STATEFP10 COUNTYFP10 ... Score E (percentile) Score E (top 25th percentile)\n",
"0 01 005 ... 0.576986 False\n",
"1 01 005 ... 0.670349 False\n",
"\n",
"[2 rows x 98 columns]"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
@ -598,292 +629,296 @@
"</table>\n",
"<p>2 rows × 98 columns</p>\n",
"</div>"
],
"text/plain": [
" STATEFP10 COUNTYFP10 ... Score E (percentile) Score E (top 25th percentile)\n",
"0 01 005 ... 0.576986 False\n",
"1 01 005 ... 0.670349 False\n",
"\n",
"[2 rows x 98 columns]"
]
},
"execution_count": 72,
"metadata": {
"tags": []
},
"execution_count": 72
"output_type": "execute_result"
}
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Sr25DUkxWVhg",
"outputId": "1e804075-0f7d-4174-82d7-e21b8519c8bf"
}
"source": [
"usa_merged.head(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_merged_compressed = gpd.GeoDataFrame(usa_merged, crs=\"EPSG:4326\")"
],
"outputs": [],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ANMlAB8Qmtu8",
"outputId": "44934741-90a9-4664-fab5-2c39b348d2be"
}
},
"outputs": [],
"source": [
"usa_merged_compressed = gpd.GeoDataFrame(usa_merged, crs=\"EPSG:4326\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_merged_compressed.to_file(CENSUS_GEOJSON_DIR / \"usa_merged.geojson\", driver=\"GeoJSON\")"
],
"outputs": [],
"metadata": {
"id": "PBPD9LQctvPJ"
}
},
"outputs": [],
"source": [
"usa_merged_compressed.to_file(CENSUS_GEOJSON_DIR / \"usa_merged.geojson\", driver=\"GeoJSON\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "qAAEr1z-WZAT"
},
"outputs": [],
"source": [
"usa_simplified = usa_merged[\n",
" [\"GEOID10\", \"Score D (percentile)\", \"geometry\"]\n",
" ].reset_index(drop=True)"
],
"outputs": [],
"metadata": {
"id": "qAAEr1z-WZAT"
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "SCNUjEbzWg-o"
},
"outputs": [],
"source": [
"usa_simplified.rename(\n",
" columns={\"Score D (percentile)\": \"D_SCORE\"}, inplace=True\n",
" )"
],
"outputs": [],
"metadata": {
"id": "SCNUjEbzWg-o"
}
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_cbg_compressed = gpd.GeoDataFrame(\n",
" usa_simplified, columns=[\"D_SCORE\", \"geometry\"], crs=\"EPSG:4326\"\n",
" )"
],
"outputs": [],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ej70uX0AmW0J",
"outputId": "88908f5e-b62d-494f-f0ea-649089b6652a"
}
},
"outputs": [],
"source": [
"usa_cbg_compressed = gpd.GeoDataFrame(\n",
" usa_simplified, columns=[\"D_SCORE\", \"geometry\"], crs=\"EPSG:4326\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_cbg_compressed.to_file(CENSUS_GEOJSON_DIR / \"usa_cbg_scoreD.geojson\", driver=\"GeoJSON\")"
],
"outputs": [],
"metadata": {
"id": "UE12dWmame3I"
}
},
"outputs": [],
"source": [
"usa_cbg_compressed.to_file(CENSUS_GEOJSON_DIR / \"usa_cbg_scoreD.geojson\", driver=\"GeoJSON\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_tracts = aggregate_to_tracts(usa_simplified)"
],
"outputs": [],
"metadata": {
"id": "wWFBduQQXGtM"
}
},
"outputs": [],
"source": [
"usa_tracts = aggregate_to_tracts(usa_simplified)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"source": [
"num_buckets = 10"
],
"outputs": [],
"metadata": {
"id": "L-PTnEWOpDtX"
}
},
"outputs": [],
"source": [
"num_buckets = 10"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"tracts_compressed = gpd.GeoDataFrame(\n",
" usa_tracts, columns=[\"D_SCORE\", \"geometry\"], crs=\"EPSG:4326\"\n",
" )"
],
"outputs": [],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kTJafXcqXC01",
"outputId": "bd197952-76b7-4f99-edef-983f20d7acfb"
}
},
"outputs": [],
"source": [
"tracts_compressed = gpd.GeoDataFrame(\n",
" usa_tracts, columns=[\"D_SCORE\", \"geometry\"], crs=\"EPSG:4326\"\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"tracts_compressed.to_file(CENSUS_GEOJSON_DIR / \"usa_tracts_score.geojson\", driver=\"GeoJSON\")"
],
"outputs": [],
"metadata": {
"id": "E2Nh97IlYhCF"
}
},
"outputs": [],
"source": [
"tracts_compressed.to_file(CENSUS_GEOJSON_DIR / \"usa_tracts_score.geojson\", driver=\"GeoJSON\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_bucketed = create_buckets_from_tracts(usa_tracts)"
],
"outputs": [],
"metadata": {
"id": "557zPMWFZC8R"
}
},
"outputs": [],
"source": [
"usa_bucketed = create_buckets_from_tracts(usa_tracts)"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"usa_aggregated = aggregate_buckets(usa_bucketed, agg_func=\"mean\")"
],
"outputs": [],
"metadata": {
"id": "k6RRdKlsaO0a"
}
},
"outputs": [],
"source": [
"usa_aggregated = aggregate_buckets(usa_bucketed, agg_func=\"mean\")"
]
},
{
"cell_type": "code",
"execution_count": 80,
"source": [
"usa_aggregated.shape"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(10, 2)"
]
},
"metadata": {
"tags": []
},
"execution_count": 80
}
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-cm5eET2pA1Z",
"outputId": "8d5d2e80-ad62-41d5-f1b0-922345f92d62"
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"compressed = breakup_multipolygons(usa_aggregated, num_buckets)"
],
"outputs": [],
"metadata": {
"id": "4ZvJra-RaZ4v"
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"len(compressed)"
],
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"36836"
"(10, 2)"
]
},
"execution_count": 80,
"metadata": {
"tags": []
},
"execution_count": 82
"output_type": "execute_result"
}
],
"source": [
"usa_aggregated.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "4ZvJra-RaZ4v"
},
"outputs": [],
"source": [
"compressed = breakup_multipolygons(usa_aggregated, num_buckets)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RDS7Q2WAb4Rx",
"outputId": "dcd28a31-083d-482e-b000-b4cd1046d4c2"
}
},
"outputs": [
{
"data": {
"text/plain": [
"36836"
]
},
"execution_count": 82,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"len(compressed)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "VXTv8UuXb-qU"
},
"outputs": [],
"source": [
"gdf_compressed = gpd.GeoDataFrame(\n",
" compressed, columns=[\"D_SCORE\", \"geometry\"], crs=\"EPSG:4326\"\n",
" )"
],
"outputs": [],
"metadata": {
"id": "VXTv8UuXb-qU"
}
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"gdf_compressed.shape"
],
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(36836, 2)"
]
},
"metadata": {
"tags": []
},
"execution_count": 84
}
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5v7TyB_rcRgT",
"outputId": "997625cc-c57a-4335-9b27-a08e4f8ad117"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(36836, 2)"
]
},
"execution_count": 84,
"metadata": {
"tags": []
},
"output_type": "execute_result"
}
],
"source": [
"gdf_compressed.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"gdf_compressed.to_file(CENSUS_GEOJSON_DIR / f\"usa_low.geojson\", driver=\"GeoJSON\")"
],
"outputs": [],
"metadata": {
"id": "5eAnPL8McJpn"
}
},
"outputs": [],
"source": [
"gdf_compressed.to_file(CENSUS_GEOJSON_DIR / f\"usa_low.geojson\", driver=\"GeoJSON\")"
]
}
],
"metadata": {
@ -892,9 +927,9 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.9.5 ('.venv': venv)",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python395jvsc74a57bd0935cbd69f49565f763db1e6a6adc70b468d078eb4d5856e64428cea33b57a041"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
@ -906,9 +941,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
}

View file

@ -153,7 +153,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
"version": "3.8.2"
}
},
"nbformat": 4,

View file

@ -1,20 +1,24 @@
appnope==0.1.2; sys_platform == "darwin" and python_version >= "3.7"
argon2-cffi==20.1.0; python_version >= "3.6"
async-generator==1.10; python_full_version >= "3.6.1" and python_version >= "3.7"
attrs==21.2.0; python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.5"
attrs==21.2.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
backcall==0.2.0; python_version >= "3.7"
bleach==3.3.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
censusdata==1.13; python_version >= "2.7"
certifi==2021.5.30; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7"
certifi==2021.5.30; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
cffi==1.14.6; implementation_name == "pypy" and python_version >= "3.6"
chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7"
click-plugins==1.1.1; python_version >= "3.6"
click==8.0.1; python_version >= "3.6"
colorama==0.4.4; python_version >= "3.7" and python_full_version < "3.0.0" and sys_platform == "win32" and platform_system == "Windows" or sys_platform == "win32" and python_version >= "3.7" and python_full_version >= "3.5.0" and platform_system == "Windows"
cligj==0.7.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version < "4" and python_version >= "3.6"
colorama==0.4.4; python_version >= "3.7" and python_full_version < "3.0.0" and platform_system == "Windows" and sys_platform == "win32" or platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.5.0" and sys_platform == "win32"
debugpy==1.3.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
decorator==5.0.9; python_version >= "3.7"
defusedxml==0.7.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
dynaconf==3.1.4
entrypoints==0.3; python_version >= "3.7"
fiona==1.8.20; python_version >= "3.6"
geopandas==0.9.0; python_version >= "3.6"
idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7"
importlib-metadata==3.10.1; python_version < "3.8" and python_version >= "3.7"
ipykernel==6.0.1; python_version >= "3.7"
@ -39,6 +43,7 @@ lxml==4.6.3; python_version >= "2.7" and python_full_version < "3.0.0" or python
markupsafe==2.0.1; python_version >= "3.7"
matplotlib-inline==0.1.2; platform_system == "Darwin" and python_version >= "3.7"
mistune==0.8.4; python_version >= "3.7"
munch==2.5.0; python_version >= "3.6"
nbclient==0.5.3; python_full_version >= "3.6.1" and python_version >= "3.7"
nbconvert==6.1.0; python_version >= "3.7"
nbformat==5.1.3; python_full_version >= "3.6.1" and python_version >= "3.7"
@ -58,6 +63,7 @@ py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" and implem
pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
pygments==2.9.0; python_version >= "3.7"
pyparsing==2.4.7; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
pyproj==3.1.0; python_version >= "3.7"
pyrsistent==0.18.0; python_version >= "3.6"
python-dateutil==2.8.1; python_full_version >= "3.7.1" and python_version >= "3.7"
pytz==2021.1; python_full_version >= "3.7.1" and python_version >= "2.7"
@ -69,7 +75,8 @@ qtconsole==5.1.1; python_version >= "3.6"
qtpy==1.9.0; python_version >= "3.6"
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
send2trash==1.7.1; python_version >= "3.6"
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.7" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.6") and (python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.5")
shapely==1.7.1; python_version >= "3.6"
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "2.7" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.6") and (python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7") and (python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.5")
terminado==0.10.1; python_version >= "3.6"
testpath==0.5.0; python_version >= "3.7"
tornado==6.1; python_full_version >= "3.6.1" and python_version >= "3.7"