diff --git a/.gitignore b/.gitignore index a6379f0b..ffb1f2ae 100644 --- a/.gitignore +++ b/.gitignore @@ -145,3 +145,6 @@ node_modules # serverless .serverless + +# pyenv +.python-version diff --git a/score/README.md b/score/README.md index 70b9b821..97df05ff 100644 --- a/score/README.md +++ b/score/README.md @@ -4,13 +4,16 @@ We use Docker to install the necessary libraries in a container that can be run in any operating system. -To build the docker container the first time, make sure you have Docker running on your machine and that you're in the root directory of the repository. Then run `docker-compose build`. +To build the docker container the first time, make sure you're in the root directory of the repository and run `docker-compose build`. -You can then run the following commands. You probably want to run these in a new terminal tab or window, because they may take a long time to run, especially the last one that generates the census data. +After that, to run commands type the following: -- Get help: `docker run --rm -it j40_score /bin/sh -c "python3 application.py" python3 application.py --help"` +- Get help: `docker run --rm -it j40_score /bin/sh -c "python3 application.py --help"` +- Clean up the census data directories: `docker run --rm -it j40_score /bin/sh -c "python3 application.py census-cleanup"` - Clean up the data directories: `docker run --rm -it j40_score /bin/sh -c "python3 application.py data-cleanup"` -- Generate census data (this may take 2-3 hours): `docker run --rm -it j40_score /bin/sh -c "python3 application.py census-data-download"` +- Generate census data: `docker run --rm -it j40_score /bin/sh -c "python3 application.py census-data-download"` +- Run all ETL processes: `docker run --rm -it j40_score /bin/sh -c "python3 application.py etl-run"` +- Generate Score: `docker run --rm -it j40_score /bin/sh -c "python3 application.py score-run"` ## Log visualization @@ -24,6 +27,8 @@ If you want to visualize logs while running a command, the following temporary w You can run the Python code locally without Docker to develop, using Poetry. However, to generate the census data you will need the [GDAL library](https://github.com/OSGeo/gdal) installed locally. Also to generate tiles for a local map, you will need [Mapbox tippeanoe](https://github.com/mapbox/tippecanoe) +Note: If you are using Windows, please follow [these instructions](https://stackoverflow.com/questions/56958421/pip-install-geopandas-on-windows) to install Geopandas locally. If you want to install TippeCanoe, [follow these instrcutions](https://github.com/GISupportICRC/ArcGIS2Mapbox#installing-tippecanoe-on-windows). + - Start a terminal - Make sure you have Python 3.9 installed: `python -V` or `python3 -V` - We use [Poetry](https://python-poetry.org/) for managing dependencies and building the application. Please follow the instructions on their site to download. diff --git a/score/application.py b/score/application.py index 2d5f71a6..89ee45bf 100644 --- a/score/application.py +++ b/score/application.py @@ -1,26 +1,31 @@ -from config import settings import click -from pathlib import Path -import sys +from config import settings from etl.sources.census.etl_utils import reset_data_directories as census_reset -from utils import remove_files_from_dir, remove_all_from_dir, get_module_logger +from utils import ( + get_module_logger, + data_folder_cleanup, + score_folder_cleanup, + temp_folder_cleanup, +) from etl.sources.census.etl import download_census_csvs +from etl.runner import etl_runner, score_generate - -settings.APP_ROOT = Path.cwd() logger = get_module_logger(__name__) @click.group() def cli(): + """Defines a click group for the commands below""" + pass @cli.command( - help="Clean up all data folders", + help="Clean up all census data folders", ) -def data_cleanup(): +def census_cleanup(): + """CLI command to clean up the census data folder""" data_path = settings.APP_ROOT / "data" @@ -28,32 +33,59 @@ def data_cleanup(): logger.info(f"Initializing all census data") census_reset(data_path) - # dataset directory - logger.info(f"Initializing all dataset directoriees") - remove_all_from_dir(data_path / "dataset") + logger.info("Cleaned up all census data files") - # score directory - logger.info(f"Initializing all score data") - remove_files_from_dir(data_path / "score" / "csv", ".csv") - remove_files_from_dir(data_path / "score" / "geojson", ".json") - # cleanup tmp dir - logger.info(f"Initializing all temp directoriees") - remove_all_from_dir(data_path / "tmp") +@cli.command( + help="Clean up all data folders", +) +def data_cleanup(): + """CLI command to clean up the all the data folders""" - logger.info("Cleaned up all data files") + data_folder_cleanup() + score_folder_cleanup() + temp_folder_cleanup() + + logger.info("Cleaned up all data folders") @cli.command( help="Census data download", ) def census_data_download(): + """CLI command to download all census shape files from the Census FTP and extract the geojson + to generate national and by state Census Block Group CSVs""" + logger.info("Downloading census data") data_path = settings.APP_ROOT / "data" download_census_csvs(data_path) logger.info("Completed downloading census data") - exit() + + +@cli.command( + help="Run all ETL processes or a specific one", +) +@click.option("-d", "--dataset", required=False, type=str) +def etl_run(dataset: str): + """Run a specific or all ETL processes + + Args: + dataset (str): Name of the ETL module to be run (optional) + + Returns: + None + """ + + etl_runner(dataset) + + +@cli.command( + help="Generate Score", +) +def score_run(): + """CLI command to generate the score""" + score_generate() if __name__ == "__main__": diff --git a/score/config.py b/score/config.py index f3fac561..0b82bffd 100644 --- a/score/config.py +++ b/score/config.py @@ -1,4 +1,5 @@ from dynaconf import Dynaconf +from pathlib import Path settings = Dynaconf( envvar_prefix="DYNACONF", @@ -6,6 +7,9 @@ settings = Dynaconf( environments=True, ) +# set root dir +settings.APP_ROOT = Path.cwd() + # To set an environment use: # Linux/OSX: export ENV_FOR_DYNACONF=staging # Windows: set ENV_FOR_DYNACONF=staging diff --git a/score/etl/__init__.oy b/score/etl/__init__.py similarity index 100% rename from score/etl/__init__.oy rename to score/etl/__init__.py diff --git a/score/etl/base.py b/score/etl/base.py new file mode 100644 index 00000000..51c90ffd --- /dev/null +++ b/score/etl/base.py @@ -0,0 +1,63 @@ +from pathlib import Path +import pathlib + +from config import settings +from utils import unzip_file_from_url, remove_all_from_dir + + +class ExtractTransformLoad(object): + """ + A class used to instantiate an ETL object to retrieve and process data from + datasets. + + Attributes: + DATA_PATH (pathlib.Path): Local path where all data will be stored + TMP_PATH (pathlib.Path): Local path where temporary data will be stored + GEOID_FIELD_NAME (str): The common column name for a Census Block Group identifier + GEOID_TRACT_FIELD_NAME (str): The common column name for a Census Tract identifier + """ + + DATA_PATH: Path = settings.APP_ROOT / "data" + TMP_PATH: Path = DATA_PATH / "tmp" + GEOID_FIELD_NAME: str = "GEOID10" + GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT" + + def get_yaml_config(self) -> None: + """Reads the YAML configuration file for the dataset and stores + the properies in the instance (upcoming feature)""" + + pass + + def check_ttl(self) -> None: + """Checks if the ETL process can be run based on a the TLL value on the + YAML config (upcoming feature)""" + + pass + + def extract( + self, source_url: str = None, extract_path: Path = None + ) -> None: + """Extract the data from + a remote source. By default it provides code to get the file from a source url, + unzips it and stores it on an extract_path.""" + + # this can be accessed via super().extract() + if source_url and extract_path: + unzip_file_from_url(source_url, self.TMP_PATH, extract_path) + + def transform(self) -> None: + """Transform the data extracted into a format that can be consumed by the + score generator""" + + raise NotImplementedError + + def load(self) -> None: + """Saves the transformed data in the specified local data folder or remote AWS S3 + bucket""" + + raise NotImplementedError + + def cleanup(self) -> None: + """Clears out any files stored in the TMP folder""" + + remove_all_from_dir(self.TMP_PATH) diff --git a/score/etl/runner.py b/score/etl/runner.py new file mode 100644 index 00000000..3da284f4 --- /dev/null +++ b/score/etl/runner.py @@ -0,0 +1,99 @@ +import importlib + +from etl.score.etl import ScoreETL + + +def etl_runner(dataset_to_run: str = None) -> None: + """Runs all etl processes or a specific one + + Args: + dataset_to_run (str): Run a specific ETL process. If missing, runs all processes (optional) + + Returns: + None + """ + + # this list comes from YAMLs + dataset_list = [ + { + "name": "census_acs", + "module_dir": "census_acs", + "class_name": "CensusACSETL", + }, + {"name": "ejscreen", "module_dir": "ejscreen", "class_name": "EJScreenETL"}, + { + "name": "housing_and_transportation", + "module_dir": "housing_and_transportation", + "class_name": "HousingTransportationETL", + }, + { + "name": "hud_housing", + "module_dir": "hud_housing", + "class_name": "HudHousingETL", + }, + { + "name": "calenviroscreen", + "module_dir": "calenviroscreen", + "class_name": "CalEnviroScreenETL", + }, + {"name": "hud_recap", "module_dir": "hud_recap", "class_name": "HudRecapETL"}, + ] + + if dataset_to_run: + dataset_element = next( + (item for item in dataset_list if item["name"] == dataset_to_run), None + ) + if not dataset_list: + raise ValueError("Invalid dataset name") + else: + # reset the list to just the dataset + dataset_list = [dataset_element] + + # Run the ETLs for the dataset_list + for dataset in dataset_list: + etl_module = importlib.import_module(f"etl.sources.{dataset['module_dir']}.etl") + etl_class = getattr(etl_module, dataset["class_name"]) + etl_instance = etl_class() + + # run extract + etl_instance.extract() + + # run transform + etl_instance.transform() + + # run load + etl_instance.load() + + # cleanup + etl_instance.cleanup() + + # update the front end JSON/CSV of list of data sources + pass + + +def score_generate() -> None: + """Generates the score and saves it on the local data directory + + Args: + None + + Returns: + None + """ + score = ScoreETL() + + # run extract + score.extract() + + # run transform + score.transform() + + # run load + score.load() + + +def _find_dataset_index(dataset_list, key, value): + for i, element in enumerate(dataset_list): + if element[key] == value: + return i + return -1 diff --git a/score/etl/score/__init__.py b/score/etl/score/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/score/etl/score/etl.py b/score/etl/score/etl.py new file mode 100644 index 00000000..a956888a --- /dev/null +++ b/score/etl/score/etl.py @@ -0,0 +1,389 @@ +import collections +import functools +import pandas as pd + +from etl.base import ExtractTransformLoad +from utils import get_module_logger +from etl.sources.census.etl_utils import get_state_fips_codes + +logger = get_module_logger(__name__) + + +class ScoreETL(ExtractTransformLoad): + def __init__(self): + # Define some global parameters + self.BUCKET_SOCIOECONOMIC = "Socioeconomic Factors" + self.BUCKET_SENSITIVE = "Sensitive populations" + self.BUCKET_ENVIRONMENTAL = "Environmental effects" + self.BUCKET_EXPOSURES = "Exposures" + self.BUCKETS = [ + self.BUCKET_SOCIOECONOMIC, + self.BUCKET_SENSITIVE, + self.BUCKET_ENVIRONMENTAL, + self.BUCKET_EXPOSURES, + ] + + # A few specific field names + # TODO: clean this up, I name some fields but not others. + self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)" + self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)" + self.HOUSING_BURDEN_FIELD_NAME = "Housing burden (percent)" + self.POVERTY_FIELD_NAME = "Poverty (Less than 200% of federal poverty line)" + self.HIGH_SCHOOL_FIELD_NAME = ( + "Percent individuals age 25 or over with less than high school degree" + ) + + # There's another aggregation level (a second level of "buckets"). + self.AGGREGATION_POLLUTION = "Pollution Burden" + self.AGGREGATION_POPULATION = "Population Characteristics" + + self.PERCENTILE_FIELD_SUFFIX = " (percentile)" + self.MIN_MAX_FIELD_SUFFIX = " (min-max normalized)" + + self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv" + + # dataframes + self.df: pd.DataFrame + self.ejscreen_df: pd.DataFrame + self.census_df: pd.DataFrame + self.housing_and_transportation_df: pd.DataFrame + self.hud_housing_df: pd.DataFrame + + def extract(self) -> None: + # EJSCreen csv Load + ejscreen_csv = self.DATA_PATH / "dataset" / "ejscreen_2020" / "usa.csv" + self.ejscreen_df = pd.read_csv( + ejscreen_csv, dtype={"ID": "string"}, low_memory=False + ) + self.ejscreen_df.rename(columns={"ID": self.GEOID_FIELD_NAME}, inplace=True) + + # Load census data + census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv" + self.census_df = pd.read_csv( + census_csv, dtype={self.GEOID_FIELD_NAME: "string"}, low_memory=False + ) + + # Load housing and transportation data + housing_and_transportation_index_csv = ( + self.DATA_PATH / "dataset" / "housing_and_transportation_index" / "usa.csv" + ) + self.housing_and_transportation_df = pd.read_csv( + housing_and_transportation_index_csv, + dtype={self.GEOID_FIELD_NAME: "string"}, + low_memory=False, + ) + + # Load HUD housing data + hud_housing_csv = self.DATA_PATH / "dataset" / "hud_housing" / "usa.csv" + self.hud_housing_df = pd.read_csv( + hud_housing_csv, + dtype={self.GEOID_TRACT_FIELD_NAME: "string"}, + low_memory=False, + ) + + def transform(self) -> None: + logger.info(f"Transforming Score Data") + + # Join all the data sources that use census block groups + census_block_group_dfs = [ + self.ejscreen_df, + self.census_df, + self.housing_and_transportation_df, + ] + + census_block_group_df = functools.reduce( + lambda left, right: pd.merge( + left=left, right=right, on=self.GEOID_FIELD_NAME, how="outer" + ), + census_block_group_dfs, + ) + + # Sanity check the join. + if len(census_block_group_df[self.GEOID_FIELD_NAME].str.len().unique()) != 1: + raise ValueError( + f"One of the input CSVs uses {self.GEOID_FIELD_NAME} with a different length." + ) + + # Join all the data sources that use census tracts + # TODO: when there's more than one data source using census tract, reduce/merge them here. + census_tract_df = self.hud_housing_df + + # Calculate the tract for the CBG data. + census_block_group_df[self.GEOID_TRACT_FIELD_NAME] = census_block_group_df[ + self.GEOID_FIELD_NAME + ].str[0:11] + + self.df = census_block_group_df.merge( + census_tract_df, on=self.GEOID_TRACT_FIELD_NAME + ) + + if len(census_block_group_df) > 220333: + raise ValueError("Too many rows in the join.") + + # Define a named tuple that will be used for each data set input. + DataSet = collections.namedtuple( + typename="DataSet", field_names=["input_field", "renamed_field", "bucket"] + ) + + data_sets = [ + # The following data sets have `bucket=None`, because it's not used in the bucket based score ("Score C"). + DataSet( + input_field=self.GEOID_FIELD_NAME, + # Use the name `GEOID10` to enable geoplatform.gov's workflow. + renamed_field=self.GEOID_FIELD_NAME, + bucket=None, + ), + DataSet( + input_field=self.HOUSING_BURDEN_FIELD_NAME, + renamed_field=self.HOUSING_BURDEN_FIELD_NAME, + bucket=None, + ), + DataSet( + input_field="ACSTOTPOP", renamed_field="Total population", bucket=None + ), + # The following data sets have buckets, because they're used in the score + DataSet( + input_field="CANCER", + renamed_field="Air toxics cancer risk", + bucket=self.BUCKET_EXPOSURES, + ), + DataSet( + input_field="RESP", + renamed_field="Respiratory hazard index", + bucket=self.BUCKET_EXPOSURES, + ), + DataSet( + input_field="DSLPM", + renamed_field="Diesel particulate matter", + bucket=self.BUCKET_EXPOSURES, + ), + DataSet( + input_field="PM25", + renamed_field="Particulate matter (PM2.5)", + bucket=self.BUCKET_EXPOSURES, + ), + DataSet( + input_field="OZONE", renamed_field="Ozone", bucket=self.BUCKET_EXPOSURES + ), + DataSet( + input_field="PTRAF", + renamed_field="Traffic proximity and volume", + bucket=self.BUCKET_EXPOSURES, + ), + DataSet( + input_field="PRMP", + renamed_field="Proximity to RMP sites", + bucket=self.BUCKET_ENVIRONMENTAL, + ), + DataSet( + input_field="PTSDF", + renamed_field="Proximity to TSDF sites", + bucket=self.BUCKET_ENVIRONMENTAL, + ), + DataSet( + input_field="PNPL", + renamed_field="Proximity to NPL sites", + bucket=self.BUCKET_ENVIRONMENTAL, + ), + DataSet( + input_field="PWDIS", + renamed_field="Wastewater discharge", + bucket=self.BUCKET_ENVIRONMENTAL, + ), + DataSet( + input_field="PRE1960PCT", + renamed_field="Percent pre-1960s housing (lead paint indicator)", + bucket=self.BUCKET_ENVIRONMENTAL, + ), + DataSet( + input_field="UNDER5PCT", + renamed_field="Individuals under 5 years old", + bucket=self.BUCKET_SENSITIVE, + ), + DataSet( + input_field="OVER64PCT", + renamed_field="Individuals over 64 years old", + bucket=self.BUCKET_SENSITIVE, + ), + DataSet( + input_field=self.LINGUISTIC_ISOLATION_FIELD_NAME, + renamed_field=self.LINGUISTIC_ISOLATION_FIELD_NAME, + bucket=self.BUCKET_SENSITIVE, + ), + DataSet( + input_field="LINGISOPCT", + renamed_field="Percent of households in linguistic isolation", + bucket=self.BUCKET_SOCIOECONOMIC, + ), + DataSet( + input_field="LOWINCPCT", + renamed_field=self.POVERTY_FIELD_NAME, + bucket=self.BUCKET_SOCIOECONOMIC, + ), + DataSet( + input_field="LESSHSPCT", + renamed_field=self.HIGH_SCHOOL_FIELD_NAME, + bucket=self.BUCKET_SOCIOECONOMIC, + ), + DataSet( + input_field=self.UNEMPLOYED_FIELD_NAME, + renamed_field=self.UNEMPLOYED_FIELD_NAME, + bucket=self.BUCKET_SOCIOECONOMIC, + ), + DataSet( + input_field="ht_ami", + renamed_field="Housing + Transportation Costs % Income for the Regional Typical Household", + bucket=self.BUCKET_SOCIOECONOMIC, + ), + ] + + # Rename columns: + renaming_dict = { + data_set.input_field: data_set.renamed_field for data_set in data_sets + } + + self.df.rename( + columns=renaming_dict, + inplace=True, + errors="raise", + ) + + columns_to_keep = [data_set.renamed_field for data_set in data_sets] + self.df = self.df[columns_to_keep] + + # Convert all columns to numeric. + for data_set in data_sets: + # Skip GEOID_FIELD_NAME, because it's a string. + if data_set.renamed_field == self.GEOID_FIELD_NAME: + continue + self.df[f"{data_set.renamed_field}"] = pd.to_numeric( + self.df[data_set.renamed_field] + ) + + # calculate percentiles + for data_set in data_sets: + self.df[ + f"{data_set.renamed_field}{self.PERCENTILE_FIELD_SUFFIX}" + ] = self.df[data_set.renamed_field].rank(pct=True) + + # Math: + # ( + # Observed value + # - minimum of all values + # ) + # divided by + # ( + # Maximum of all values + # - minimum of all values + # ) + for data_set in data_sets: + # Skip GEOID_FIELD_NAME, because it's a string. + if data_set.renamed_field == self.GEOID_FIELD_NAME: + continue + + min_value = self.df[data_set.renamed_field].min(skipna=True) + + max_value = self.df[data_set.renamed_field].max(skipna=True) + + logger.info( + f"For data set {data_set.renamed_field}, the min value is {min_value} and the max value is {max_value}." + ) + + self.df[f"{data_set.renamed_field}{self.MIN_MAX_FIELD_SUFFIX}"] = ( + self.df[data_set.renamed_field] - min_value + ) / (max_value - min_value) + + # Graph distributions and correlations. + min_max_fields = [ + f"{data_set.renamed_field}{self.MIN_MAX_FIELD_SUFFIX}" + for data_set in data_sets + if data_set.renamed_field != self.GEOID_FIELD_NAME + ] + + # Calculate score "A" and score "B" + self.df["Score A"] = self.df[ + [ + "Poverty (Less than 200% of federal poverty line) (percentile)", + "Percent individuals age 25 or over with less than high school degree (percentile)", + ] + ].mean(axis=1) + self.df["Score B"] = ( + self.df["Poverty (Less than 200% of federal poverty line) (percentile)"] + * self.df[ + "Percent individuals age 25 or over with less than high school degree (percentile)" + ] + ) + + # Calculate "CalEnviroScreen for the US" score + # Average all the percentile values in each bucket into a single score for each of the four buckets. + for bucket in self.BUCKETS: + fields_in_bucket = [ + f"{data_set.renamed_field}{self.PERCENTILE_FIELD_SUFFIX}" + for data_set in data_sets + if data_set.bucket == bucket + ] + self.df[f"{bucket}"] = self.df[fields_in_bucket].mean(axis=1) + + # Combine the score from the two Exposures and Environmental Effects buckets into a single score called "Pollution Burden". The math for this score is: (1.0 * Exposures Score + 0.5 * Environment Effects score) / 1.5. + self.df[self.AGGREGATION_POLLUTION] = ( + 1.0 * self.df[f"{self.BUCKET_EXPOSURES}"] + + 0.5 * self.df[f"{self.BUCKET_ENVIRONMENTAL}"] + ) / 1.5 + + # Average the score from the two Sensitive populations and Socioeconomic factors buckets into a single score called "Population Characteristics". + self.df[self.AGGREGATION_POPULATION] = self.df[ + [f"{self.BUCKET_SENSITIVE}", f"{self.BUCKET_SOCIOECONOMIC}"] + ].mean(axis=1) + + # Multiply the "Pollution Burden" score and the "Population Characteristics" together to produce the cumulative impact score. + self.df["Score C"] = ( + self.df[self.AGGREGATION_POLLUTION] * self.df[self.AGGREGATION_POPULATION] + ) + + if len(census_block_group_df) > 220333: + raise ValueError("Too many rows in the join.") + + fields_to_use_in_score = [ + self.UNEMPLOYED_FIELD_NAME, + self.LINGUISTIC_ISOLATION_FIELD_NAME, + self.HOUSING_BURDEN_FIELD_NAME, + self.POVERTY_FIELD_NAME, + self.HIGH_SCHOOL_FIELD_NAME, + ] + + fields_min_max = [ + f"{field}{self.MIN_MAX_FIELD_SUFFIX}" for field in fields_to_use_in_score + ] + fields_percentile = [ + f"{field}{self.PERCENTILE_FIELD_SUFFIX}" for field in fields_to_use_in_score + ] + + # Calculate "Score D", which uses min-max normalization + # and calculate "Score E", which uses percentile normalization for the same fields + self.df["Score D"] = self.df[fields_min_max].mean(axis=1) + self.df["Score E"] = self.df[fields_percentile].mean(axis=1) + + # Calculate correlations + self.df[fields_min_max].corr() + + # Create percentiles for the scores + for score_field in ["Score A", "Score B", "Score C", "Score D", "Score E"]: + self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] = self.df[ + score_field + ].rank(pct=True) + self.df[f"{score_field} (top 25th percentile)"] = ( + self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] >= 0.75 + ) + + def load(self) -> None: + logger.info(f"Saving Score CSVs") + + # write nationwide csv + self.df.to_csv(self.SCORE_CSV_PATH / f"usa.csv", index=False) + + # write per state csvs + for states_fips in get_state_fips_codes(self.DATA_PATH): + logger.info(f"Generating data{states_fips} csv") + df1 = self.df[self.df["GEOID10"].str[:2] == states_fips] + # we need to name the file data01.csv for ogr2ogr csv merge to work + df1.to_csv(self.SCORE_CSV_PATH / f"data{states_fips}.csv", index=False) diff --git a/score/etl/sources/calenviroscreen/__init__.py b/score/etl/sources/calenviroscreen/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/score/etl/sources/calenviroscreen/etl.py b/score/etl/sources/calenviroscreen/etl.py new file mode 100644 index 00000000..6349deb6 --- /dev/null +++ b/score/etl/sources/calenviroscreen/etl.py @@ -0,0 +1,69 @@ +import pandas as pd + +from etl.base import ExtractTransformLoad +from utils import get_module_logger + +logger = get_module_logger(__name__) + + +class CalEnviroScreenETL(ExtractTransformLoad): + def __init__(self): + self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/CalEnviroScreen/CalEnviroScreen_4.0_2021.zip" + self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv" + self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4" + + # Definining some variable names + self.CALENVIROSCREEN_SCORE_FIELD_NAME = "calenviroscreen_score" + self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = "calenviroscreen_percentile" + self.CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME = ( + "calenviroscreen_priority_community" + ) + + # Choosing constants. + # None of these numbers are final, but just for the purposes of comparison. + self.CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD = 75 + + self.df: pd.DataFrame + + def extract(self) -> None: + logger.info(f"Downloading CalEnviroScreen Data") + super().extract( + self.CALENVIROSCREEN_FTP_URL, + self.TMP_PATH, + ) + + def transform(self) -> None: + logger.info(f"Transforming CalEnviroScreen Data") + + # Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically: + # https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip + # Load comparison index (CalEnviroScreen 4) + self.df = pd.read_csv( + self.CALENVIROSCREEN_CSV, dtype={"Census Tract": "string"} + ) + + self.df.rename( + columns={ + "Census Tract": self.GEOID_TRACT_FIELD_NAME, + "DRAFT CES 4.0 Score": self.CALENVIROSCREEN_SCORE_FIELD_NAME, + "DRAFT CES 4.0 Percentile": self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME, + }, + inplace=True, + ) + + # Add a leading "0" to the Census Tract to match our format in other data frames. + self.df[self.GEOID_TRACT_FIELD_NAME] = ( + "0" + self.df[self.GEOID_TRACT_FIELD_NAME] + ) + + # Calculate the top K% of prioritized communities + self.df[self.CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME] = ( + self.df[self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME] + >= self.CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD + ) + + def load(self) -> None: + logger.info(f"Saving CalEnviroScreen CSV") + # write nationwide csv + self.CSV_PATH.mkdir(parents=True, exist_ok=True) + self.df.to_csv(self.CSV_PATH / f"data06.csv", index=False) diff --git a/score/etl/sources/census/etl.py b/score/etl/sources/census/etl.py index 888cc76e..c652700e 100644 --- a/score/etl/sources/census/etl.py +++ b/score/etl/sources/census/etl.py @@ -10,84 +10,102 @@ logger = get_module_logger(__name__) def download_census_csvs(data_path: Path) -> None: + """Download all census shape files from the Census FTP and extract the geojson + to generate national and by state Census Block Group CSVs + + Args: + data_path (pathlib.Path): Name of the directory where the files and directories will + be created + + Returns: + None + """ + # the fips_states_2010.csv is generated from data here # https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html state_fips_codes = get_state_fips_codes(data_path) + geojson_dir_path = data_path / "census" / "geojson" + for fips in state_fips_codes: # check if file exists - shp_file_path = data_path / "census" / "shp" / fips / f"tl_2010_{fips}_bg10.shp" + shp_file_path = ( + data_path / "census" / "shp" / fips / f"tl_2010_{fips}_bg10.shp" + ) + logger.info(f"Checking if {fips} file exists") if not os.path.isfile(shp_file_path): - logger.info(f"Downloading {fips}") - + logger.info(f"Downloading and extracting {fips} shape file") # 2020 tiger data is here: https://www2.census.gov/geo/tiger/TIGER2020/BG/ # But using 2010 for now cbg_state_url = f"https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{fips}_bg10.zip" unzip_file_from_url( - cbg_state_url, data_path / "tmp", data_path / "census" / "shp" / fips + cbg_state_url, + data_path / "tmp", + data_path / "census" / "shp" / fips, ) - geojson_dir_path = data_path / "census" / "geojson" + cmd = ( + "ogr2ogr -f GeoJSON data/census/geojson/" + + fips + + ".json data/census/shp/" + + fips + + "/tl_2010_" + + fips + + "_bg10.shp" + ) + os.system(cmd) - cmd = ( - "ogr2ogr -f GeoJSON data/census/geojson/" - + fips - + ".json data/census/shp/" - + fips - + "/tl_2010_" - + fips - + "_bg10.shp" - ) - os.system(cmd) + # generate CBG CSV table for pandas + ## load in memory + cbg_national = [] # in-memory global list + cbg_per_state: dict = {} # in-memory dict per state + for file in os.listdir(geojson_dir_path): + if file.endswith(".json"): + logger.info(f"Ingesting geoid10 for file {file}") + with open(geojson_dir_path / file) as f: + geojson = json.load(f) + for feature in geojson["features"]: + geoid10 = feature["properties"]["GEOID10"] + cbg_national.append(str(geoid10)) + geoid10_state_id = geoid10[:2] + if not cbg_per_state.get(geoid10_state_id): + cbg_per_state[geoid10_state_id] = [] + cbg_per_state[geoid10_state_id].append(geoid10) - # generate CBG CSV table for pandas - ## load in memory - cbg_national = [] # in-memory global list - cbg_per_state: dict = {} # in-memory dict per state - for file in os.listdir(geojson_dir_path): - if file.endswith(".json"): - logger.info(f"Ingesting geoid10 for file {file}") - with open(geojson_dir_path / file) as f: - geojson = json.load(f) - for feature in geojson["features"]: - geoid10 = feature["properties"]["GEOID10"] - cbg_national.append(str(geoid10)) - geoid10_state_id = geoid10[:2] - if not cbg_per_state.get(geoid10_state_id): - cbg_per_state[geoid10_state_id] = [] - cbg_per_state[geoid10_state_id].append(geoid10) - - csv_dir_path = data_path / "census" / "csv" - ## write to individual state csv - for state_id in cbg_per_state: - geoid10_list = cbg_per_state[state_id] - with open( - csv_dir_path / f"{state_id}.csv", mode="w", newline="" - ) as cbg_csv_file: - cbg_csv_file_writer = csv.writer( - cbg_csv_file, - delimiter=",", - quotechar='"', - quoting=csv.QUOTE_MINIMAL, - ) - - for geoid10 in geoid10_list: - cbg_csv_file_writer.writerow( - [ - geoid10, - ] - ) - - ## write US csv - with open(csv_dir_path / "us.csv", mode="w", newline="") as cbg_csv_file: + csv_dir_path = data_path / "census" / "csv" + ## write to individual state csv + for state_id in cbg_per_state: + geoid10_list = cbg_per_state[state_id] + with open( + csv_dir_path / f"{state_id}.csv", mode="w", newline="" + ) as cbg_csv_file: cbg_csv_file_writer = csv.writer( - cbg_csv_file, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL + cbg_csv_file, + delimiter=",", + quotechar='"', + quoting=csv.QUOTE_MINIMAL, ) - for geoid10 in cbg_national: + + for geoid10 in geoid10_list: cbg_csv_file_writer.writerow( [ geoid10, ] ) - logger.info("Census block groups downloading complete") + ## write US csv + with open(csv_dir_path / "us.csv", mode="w", newline="") as cbg_csv_file: + cbg_csv_file_writer = csv.writer( + cbg_csv_file, + delimiter=",", + quotechar='"', + quoting=csv.QUOTE_MINIMAL, + ) + for geoid10 in cbg_national: + cbg_csv_file_writer.writerow( + [ + geoid10, + ] + ) + + logger.info("Census block groups downloading complete") diff --git a/score/etl/sources/census_acs/__init__.py b/score/etl/sources/census_acs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/score/etl/sources/census_acs/etl.py b/score/etl/sources/census_acs/etl.py new file mode 100644 index 00000000..a144c169 --- /dev/null +++ b/score/etl/sources/census_acs/etl.py @@ -0,0 +1,98 @@ +import pandas as pd +import censusdata + +from etl.base import ExtractTransformLoad +from etl.sources.census.etl_utils import get_state_fips_codes +from utils import get_module_logger + +logger = get_module_logger(__name__) + + +class CensusACSETL(ExtractTransformLoad): + def __init__(self): + self.ACS_YEAR = 2019 + self.OUTPUT_PATH = self.DATA_PATH / "dataset" / f"census_acs_{self.ACS_YEAR}" + self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)" + self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)" + self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = "Linguistic isolation (total)" + self.LINGUISTIC_ISOLATION_FIELDS = [ + "C16002_001E", + "C16002_004E", + "C16002_007E", + "C16002_010E", + "C16002_013E", + ] + self.df: pd.DataFrame + + def _fips_from_censusdata_censusgeo(self, censusgeo: censusdata.censusgeo) -> str: + """Create a FIPS code from the proprietary censusgeo index.""" + fips = "".join([value for (key, value) in censusgeo.params()]) + return fips + + def extract(self) -> None: + dfs = [] + for fips in get_state_fips_codes(self.DATA_PATH): + logger.info(f"Downloading data for state/territory with FIPS code {fips}") + + dfs.append( + censusdata.download( + src="acs5", + year=self.ACS_YEAR, + geo=censusdata.censusgeo( + [("state", fips), ("county", "*"), ("block group", "*")] + ), + var=[ + # Emploment fields + "B23025_005E", + "B23025_003E", + ] + + self.LINGUISTIC_ISOLATION_FIELDS, + ) + ) + + self.df = pd.concat(dfs) + + self.df[self.GEOID_FIELD_NAME] = self.df.index.to_series().apply( + func=self._fips_from_censusdata_censusgeo + ) + + def transform(self) -> None: + logger.info(f"Starting Census ACS Transform") + + # Calculate percent unemployment. + # TODO: remove small-sample data that should be `None` instead of a high-variance fraction. + self.df[self.UNEMPLOYED_FIELD_NAME] = self.df.B23025_005E / self.df.B23025_003E + + # Calculate linguistic isolation. + individual_limited_english_fields = [ + "C16002_004E", + "C16002_007E", + "C16002_010E", + "C16002_013E", + ] + + self.df[self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME] = self.df[ + individual_limited_english_fields + ].sum(axis=1, skipna=True) + self.df[self.LINGUISTIC_ISOLATION_FIELD_NAME] = ( + self.df[self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME].astype(float) + / self.df["C16002_001E"] + ) + + self.df[self.LINGUISTIC_ISOLATION_FIELD_NAME].describe() + + def load(self) -> None: + logger.info(f"Saving Census ACS Data") + + # mkdir census + self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True) + + columns_to_include = [ + self.GEOID_FIELD_NAME, + self.UNEMPLOYED_FIELD_NAME, + self.LINGUISTIC_ISOLATION_FIELD_NAME, + ] + + self.df[columns_to_include].to_csv( + path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False + ) diff --git a/score/etl/sources/ejscreen/etl.py b/score/etl/sources/ejscreen/etl.py index 12f433f1..f9175551 100644 --- a/score/etl/sources/ejscreen/etl.py +++ b/score/etl/sources/ejscreen/etl.py @@ -1 +1,39 @@ -# https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip +import pandas as pd + +from etl.base import ExtractTransformLoad +from utils import get_module_logger + +logger = get_module_logger(__name__) + + +class EJScreenETL(ExtractTransformLoad): + def __init__(self): + self.EJSCREEN_FTP_URL = ( + "https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip" + ) + self.EJSCREEN_CSV = self.TMP_PATH / "EJSCREEN_2020_StatePctile.csv" + self.CSV_PATH = self.DATA_PATH / "dataset" / "ejscreen_2020" + self.df: pd.DataFrame + + def extract(self) -> None: + logger.info(f"Downloading EJScreen Data") + super().extract( + self.EJSCREEN_FTP_URL, + self.TMP_PATH, + ) + + def transform(self) -> None: + logger.info(f"Transforming EJScreen Data") + self.df = pd.read_csv( + self.EJSCREEN_CSV, + dtype={"ID": "string"}, + # EJSCREEN writes the word "None" for NA data. + na_values=["None"], + low_memory=False, + ) + + def load(self) -> None: + logger.info(f"Saving EJScreen CSV") + # write nationwide csv + self.CSV_PATH.mkdir(parents=True, exist_ok=True) + self.df.to_csv(self.CSV_PATH / f"usa.csv", index=False) diff --git a/score/etl/sources/housing_and_transportation/__init__.py b/score/etl/sources/housing_and_transportation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/score/etl/sources/housing_and_transportation/etl.py b/score/etl/sources/housing_and_transportation/etl.py new file mode 100644 index 00000000..cbe56cfa --- /dev/null +++ b/score/etl/sources/housing_and_transportation/etl.py @@ -0,0 +1,55 @@ +import pandas as pd + +from etl.base import ExtractTransformLoad +from etl.sources.census.etl_utils import get_state_fips_codes +from utils import get_module_logger, unzip_file_from_url + +logger = get_module_logger(__name__) + + +class HousingTransportationETL(ExtractTransformLoad): + def __init__(self): + self.HOUSING_FTP_URL = ( + "https://htaindex.cnt.org/download/download.php?focus=blkgrp&geoid=" + ) + self.OUTPUT_PATH = ( + self.DATA_PATH / "dataset" / "housing_and_transportation_index" + ) + self.df: pd.DataFrame + + def extract(self) -> None: + # Download each state / territory individually + dfs = [] + zip_file_dir = self.TMP_PATH / "housing_and_transportation_index" + for fips in get_state_fips_codes(self.DATA_PATH): + logger.info( + f"Downloading housing data for state/territory with FIPS code {fips}" + ) + unzip_file_from_url( + f"{self.HOUSING_FTP_URL}{fips}", self.TMP_PATH, zip_file_dir + ) + + # New file name: + tmp_csv_file_path = zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv" + tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path) + + dfs.append(tmp_df) + + self.df = pd.concat(dfs) + + self.df.head() + + def transform(self) -> None: + logger.info(f"Transforming Housing and Transportation Data") + + # Rename and reformat block group ID + self.df.rename(columns={"blkgrp": self.GEOID_FIELD_NAME}, inplace=True) + self.df[self.GEOID_FIELD_NAME] = self.df[self.GEOID_FIELD_NAME].str.replace( + '"', "" + ) + + def load(self) -> None: + logger.info(f"Saving Housing and Transportation Data") + + self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True) + self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False) diff --git a/score/etl/sources/hud_housing/__init__.py b/score/etl/sources/hud_housing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/score/etl/sources/hud_housing/etl.py b/score/etl/sources/hud_housing/etl.py new file mode 100644 index 00000000..3dd3449a --- /dev/null +++ b/score/etl/sources/hud_housing/etl.py @@ -0,0 +1,180 @@ +import pandas as pd + +from etl.base import ExtractTransformLoad +from etl.sources.census.etl_utils import get_state_fips_codes +from utils import get_module_logger, unzip_file_from_url, remove_all_from_dir + +logger = get_module_logger(__name__) + + +class HudHousingETL(ExtractTransformLoad): + def __init__(self): + self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "hud_housing" + self.GEOID_TRACT_FIELD_NAME = "GEOID10_TRACT" + self.HOUSING_FTP_URL = ( + "https://www.huduser.gov/portal/datasets/cp/2012thru2016-140-csv.zip" + ) + self.HOUSING_ZIP_FILE_DIR = self.TMP_PATH / "hud_housing" + + # We measure households earning less than 80% of HUD Area Median Family Income by county + # and paying greater than 30% of their income to housing costs. + self.HOUSING_BURDEN_FIELD_NAME = "Housing burden (percent)" + self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME = "HOUSING_BURDEN_NUMERATOR" + self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME = "HOUSING_BURDEN_DENOMINATOR" + + # Note: some variable definitions. + # HUD-adjusted median family income (HAMFI). + # The four housing problems are: incomplete kitchen facilities, incomplete plumbing facilities, more than 1 person per room, and cost burden greater than 30%. + # Table 8 is the desired table. + + self.df: pd.DataFrame + + def extract(self) -> None: + logger.info(f"Extracting HUD Housing Data") + super().extract( + self.HOUSING_FTP_URL, + self.HOUSING_ZIP_FILE_DIR, + ) + + def transform(self) -> None: + logger.info(f"Transforming HUD Housing Data") + + # New file name: + tmp_csv_file_path = ( + self.HOUSING_ZIP_FILE_DIR + / "2012thru2016-140-csv" + / "2012thru2016-140-csv" + / "140" + / "Table8.csv" + ) + self.df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path) + + # Rename and reformat block group ID + self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True) + + # The CHAS data has census tract ids such as `14000US01001020100` + # Whereas the rest of our data uses, for the same tract, `01001020100`. + # the characters before `US`: + self.df[self.GEOID_TRACT_FIELD_NAME] = self.df[ + self.GEOID_TRACT_FIELD_NAME + ].str.replace(r"^.*?US", "", regex=True) + + # Calculate housing burden + # This is quite a number of steps. It does not appear to be accessible nationally in a simpler format, though. + # See "CHAS data dictionary 12-16.xlsx" + + # Owner occupied numerator fields + OWNER_OCCUPIED_NUMERATOR_FIELDS = [ + # Key: Column Name Line_Type Tenure Household income Cost burden Facilities + # T8_est7 Subtotal Owner occupied less than or equal to 30% of HAMFI greater than 30% but less than or equal to 50% All + "T8_est7", + # T8_est10 Subtotal Owner occupied less than or equal to 30% of HAMFI greater than 50% All + "T8_est10", + # T8_est20 Subtotal Owner occupied greater than 30% but less than or equal to 50% of HAMFI greater than 30% but less than or equal to 50% All + "T8_est20", + # T8_est23 Subtotal Owner occupied greater than 30% but less than or equal to 50% of HAMFI greater than 50% All + "T8_est23", + # T8_est33 Subtotal Owner occupied greater than 50% but less than or equal to 80% of HAMFI greater than 30% but less than or equal to 50% All + "T8_est33", + # T8_est36 Subtotal Owner occupied greater than 50% but less than or equal to 80% of HAMFI greater than 50% All + "T8_est36", + ] + + # These rows have the values where HAMFI was not computed, b/c of no or negative income. + OWNER_OCCUPIED_NOT_COMPUTED_FIELDS = [ + # Key: Column Name Line_Type Tenure Household income Cost burden Facilities + # T8_est13 Subtotal Owner occupied less than or equal to 30% of HAMFI not computed (no/negative income) All + "T8_est13", + # T8_est26 Subtotal Owner occupied greater than 30% but less than or equal to 50% of HAMFI not computed (no/negative income) All + "T8_est26", + # T8_est39 Subtotal Owner occupied greater than 50% but less than or equal to 80% of HAMFI not computed (no/negative income) All + "T8_est39", + # T8_est52 Subtotal Owner occupied greater than 80% but less than or equal to 100% of HAMFI not computed (no/negative income) All + "T8_est52", + # T8_est65 Subtotal Owner occupied greater than 100% of HAMFI not computed (no/negative income) All + "T8_est65", + ] + + # T8_est2 Subtotal Owner occupied All All All + OWNER_OCCUPIED_POPULATION_FIELD = "T8_est2" + + # Renter occupied numerator fields + RENTER_OCCUPIED_NUMERATOR_FIELDS = [ + # Key: Column Name Line_Type Tenure Household income Cost burden Facilities + # T8_est73 Subtotal Renter occupied less than or equal to 30% of HAMFI greater than 30% but less than or equal to 50% All + "T8_est73", + # T8_est76 Subtotal Renter occupied less than or equal to 30% of HAMFI greater than 50% All + "T8_est76", + # T8_est86 Subtotal Renter occupied greater than 30% but less than or equal to 50% of HAMFI greater than 30% but less than or equal to 50% All + "T8_est86", + # T8_est89 Subtotal Renter occupied greater than 30% but less than or equal to 50% of HAMFI greater than 50% All + "T8_est89", + # T8_est99 Subtotal Renter occupied greater than 50% but less than or equal to 80% of HAMFI greater than 30% but less than or equal to 50% All + "T8_est99", + # T8_est102 Subtotal Renter occupied greater than 50% but less than or equal to 80% of HAMFI greater than 50% All + "T8_est102", + ] + + # These rows have the values where HAMFI was not computed, b/c of no or negative income. + RENTER_OCCUPIED_NOT_COMPUTED_FIELDS = [ + # Key: Column Name Line_Type Tenure Household income Cost burden Facilities + # T8_est79 Subtotal Renter occupied less than or equal to 30% of HAMFI not computed (no/negative income) All + "T8_est79", + # T8_est92 Subtotal Renter occupied greater than 30% but less than or equal to 50% of HAMFI not computed (no/negative income) All + "T8_est92", + # T8_est105 Subtotal Renter occupied greater than 50% but less than or equal to 80% of HAMFI not computed (no/negative income) All + "T8_est105", + # T8_est118 Subtotal Renter occupied greater than 80% but less than or equal to 100% of HAMFI not computed (no/negative income) All + "T8_est118", + # T8_est131 Subtotal Renter occupied greater than 100% of HAMFI not computed (no/negative income) All + "T8_est131", + ] + + # T8_est68 Subtotal Renter occupied All All All + RENTER_OCCUPIED_POPULATION_FIELD = "T8_est68" + + # Math: + # ( + # # of Owner Occupied Units Meeting Criteria + # + # of Renter Occupied Units Meeting Criteria + # ) + # divided by + # ( + # Total # of Owner Occupied Units + # + Total # of Renter Occupied Units + # - # of Owner Occupied Units with HAMFI Not Computed + # - # of Renter Occupied Units with HAMFI Not Computed + # ) + + self.df[self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME] = self.df[ + OWNER_OCCUPIED_NUMERATOR_FIELDS + ].sum(axis=1) + self.df[RENTER_OCCUPIED_NUMERATOR_FIELDS].sum(axis=1) + + self.df[self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME] = ( + self.df[OWNER_OCCUPIED_POPULATION_FIELD] + + self.df[RENTER_OCCUPIED_POPULATION_FIELD] + - self.df[OWNER_OCCUPIED_NOT_COMPUTED_FIELDS].sum(axis=1) + - self.df[RENTER_OCCUPIED_NOT_COMPUTED_FIELDS].sum(axis=1) + ) + + # TODO: add small sample size checks + self.df[self.HOUSING_BURDEN_FIELD_NAME] = self.df[ + self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME + ].astype(float) / self.df[self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME].astype( + float + ) + + def load(self) -> None: + logger.info(f"Saving HUD Housing Data") + + self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True) + + # Drop unnecessary fields + self.df[ + [ + self.GEOID_TRACT_FIELD_NAME, + self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME, + self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME, + self.HOUSING_BURDEN_FIELD_NAME, + ] + ].to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False) diff --git a/score/etl/sources/hud_recap/__init__.py b/score/etl/sources/hud_recap/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/score/etl/sources/hud_recap/etl.py b/score/etl/sources/hud_recap/etl.py new file mode 100644 index 00000000..9fcafc23 --- /dev/null +++ b/score/etl/sources/hud_recap/etl.py @@ -0,0 +1,63 @@ +import pandas as pd +import requests + +from etl.base import ExtractTransformLoad +from utils import get_module_logger + +logger = get_module_logger(__name__) + + +class HudRecapETL(ExtractTransformLoad): + def __init__(self): + self.HUD_RECAP_CSV_URL = "https://opendata.arcgis.com/api/v3/datasets/56de4edea8264fe5a344da9811ef5d6e_0/downloads/data?format=csv&spatialRefId=4326" + self.HUD_RECAP_CSV = ( + self.TMP_PATH + / "Racially_or_Ethnically_Concentrated_Areas_of_Poverty__R_ECAPs_.csv" + ) + self.CSV_PATH = self.DATA_PATH / "dataset" / "hud_recap" + + # Definining some variable names + self.HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME = "hud_recap_priority_community" + + self.df: pd.DataFrame + + def extract(self) -> None: + logger.info(f"Downloading HUD Recap Data") + download = requests.get(self.HUD_RECAP_CSV_URL, verify=None) + file_contents = download.content + csv_file = open(self.HUD_RECAP_CSV, "wb") + csv_file.write(file_contents) + csv_file.close() + + def transform(self) -> None: + logger.info(f"Transforming HUD Recap Data") + + # Load comparison index (CalEnviroScreen 4) + self.df = pd.read_csv(self.HUD_RECAP_CSV, dtype={"Census Tract": "string"}) + + self.df.rename( + columns={ + "GEOID": self.GEOID_TRACT_FIELD_NAME, + # Interestingly, there's no data dictionary for the RECAP data that I could find. + # However, this site (http://www.schousing.com/library/Tax%20Credit/2020/QAP%20Instructions%20(2).pdf) + # suggests: + # "If RCAP_Current for the tract in which the site is located is 1, the tract is an R/ECAP. If RCAP_Current is 0, it is not." + "RCAP_Current": self.HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME, + }, + inplace=True, + ) + + # Convert to boolean + self.df[self.HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME] = self.df[ + self.HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME + ].astype("bool") + + self.df[self.HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME].value_counts() + + self.df.sort_values(by=self.GEOID_TRACT_FIELD_NAME, inplace=True) + + def load(self) -> None: + logger.info(f"Saving HUD Recap CSV") + # write nationwide csv + self.CSV_PATH.mkdir(parents=True, exist_ok=True) + self.df.to_csv(self.CSV_PATH / f"usa.csv", index=False) diff --git a/score/ipython/calenviroscreen_etl.ipynb b/score/ipython/calenviroscreen_etl.ipynb deleted file mode 100644 index 0333deef..00000000 --- a/score/ipython/calenviroscreen_etl.ipynb +++ /dev/null @@ -1,141 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "20aa3891", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "import numpy as np\n", - "import pandas as pd\n", - "import csv\n", - "import sys\n", - "import os\n", - "\n", - "module_path = os.path.abspath(os.path.join(\"..\"))\n", - "if module_path not in sys.path:\n", - " sys.path.append(module_path)\n", - "\n", - "from etl.sources.census.etl_utils import get_state_fips_codes\n", - "from utils import unzip_file_from_url, remove_all_from_dir\n", - "\n", - "DATA_PATH = Path.cwd().parent / \"data\"\n", - "TMP_PATH = DATA_PATH / \"tmp\"\n", - "CALENVIROSCREEN_FTP_URL = \"https://justice40-data.s3.amazonaws.com/CalEnviroScreen/CalEnviroScreen_4.0_2021.zip\"\n", - "CSV_PATH = DATA_PATH / \"dataset\" / \"calenviroscreen4\"\n", - "\n", - "# Definining some variable names\n", - "CALENVIROSCREEN_SCORE_FIELD_NAME = \"calenviroscreen_score\"\n", - "CALENVIROSCREEN_PERCENTILE_FIELD_NAME = \"calenviroscreen_percentile\"\n", - "CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME = \"calenviroscreen_priority_community\"\n", - "GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n", - "\n", - "# Choosing constants.\n", - "# None of these numbers are final, but just for the purposes of comparison.\n", - "CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD = 75\n", - "\n", - "print(DATA_PATH)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc3fb9ec", - "metadata": {}, - "outputs": [], - "source": [ - "# download file from ejscreen ftp\n", - "unzip_file_from_url(CALENVIROSCREEN_FTP_URL, TMP_PATH, TMP_PATH)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15f66756", - "metadata": {}, - "outputs": [], - "source": [ - "# Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:\n", - "# https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip\n", - "calenviroscreen_4_csv_name = \"CalEnviroScreen_4.0_2021.csv\"\n", - "calenviroscreen_data_path = TMP_PATH.joinpath(calenviroscreen_4_csv_name)\n", - "\n", - "# Load comparison index (CalEnviroScreen 4)\n", - "calenviroscreen_df = pd.read_csv(\n", - " calenviroscreen_data_path, dtype={\"Census Tract\": \"string\"}\n", - ")\n", - "\n", - "calenviroscreen_df.rename(\n", - " columns={\n", - " \"Census Tract\": GEOID_TRACT_FIELD_NAME,\n", - " \"DRAFT CES 4.0 Score\": CALENVIROSCREEN_SCORE_FIELD_NAME,\n", - " \"DRAFT CES 4.0 Percentile\": CALENVIROSCREEN_PERCENTILE_FIELD_NAME,\n", - " },\n", - " inplace=True,\n", - ")\n", - "\n", - "# Add a leading \"0\" to the Census Tract to match our format in other data frames.\n", - "\n", - "calenviroscreen_df[GEOID_TRACT_FIELD_NAME] = (\n", - " \"0\" + calenviroscreen_df[GEOID_TRACT_FIELD_NAME]\n", - ")\n", - "\n", - "# Calculate the top K% of prioritized communities\n", - "calenviroscreen_df[CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME] = (\n", - " calenviroscreen_df[CALENVIROSCREEN_PERCENTILE_FIELD_NAME]\n", - " >= CALENVIROSCREEN_PRIORITY_COMMUNITY_THRESHOLD\n", - ")\n", - "\n", - "calenviroscreen_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9fa2077a", - "metadata": {}, - "outputs": [], - "source": [ - "# write csv\n", - "CSV_PATH.mkdir(parents=True, exist_ok=True)\n", - "\n", - "# Matching other conventions in the ETL scripts, write only for the state (FIPS code 06).\n", - "calenviroscreen_df.to_csv(CSV_PATH / \"data06.csv\", index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81b977f8", - "metadata": {}, - "outputs": [], - "source": [ - "# cleanup\n", - "remove_all_from_dir(TMP_PATH)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/score/ipython/census_etl.ipynb b/score/ipython/census_etl.ipynb deleted file mode 100644 index 8f2ab4b6..00000000 --- a/score/ipython/census_etl.ipynb +++ /dev/null @@ -1,203 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0491828b", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import censusdata\n", - "import csv\n", - "from pathlib import Path\n", - "import os\n", - "import sys\n", - "\n", - "module_path = os.path.abspath(os.path.join(\"..\"))\n", - "if module_path not in sys.path:\n", - " sys.path.append(module_path)\n", - "\n", - "from etl.sources.census.etl_utils import get_state_fips_codes\n", - "\n", - "ACS_YEAR = 2019\n", - "\n", - "DATA_PATH = Path.cwd().parent / \"data\"\n", - "OUTPUT_PATH = DATA_PATH / \"dataset\" / f\"census_acs_{ACS_YEAR}\"\n", - "\n", - "GEOID_FIELD_NAME = \"GEOID10\"\n", - "UNEMPLOYED_FIELD_NAME = \"Unemployed civilians (percent)\"\n", - "LINGUISTIC_ISOLATION_FIELD_NAME = \"Linguistic isolation (percent)\"\n", - "LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = \"Linguistic isolation (total)\"\n", - "\n", - "LINGUISTIC_ISOLATION_FIELDS = [\n", - " \"C16002_001E\",\n", - " \"C16002_004E\",\n", - " \"C16002_007E\",\n", - " \"C16002_010E\",\n", - " \"C16002_013E\",\n", - "]\n", - "\n", - "# Some display settings to make pandas outputs more readable.\n", - "pd.set_option(\"display.expand_frame_repr\", False)\n", - "pd.set_option(\"display.precision\", 2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64df0b63", - "metadata": {}, - "outputs": [], - "source": [ - "# For variable discovery, if necessary.\n", - "# censusdata.search(\n", - "# \"acs5\", 2019, \"label\", \"Limited English speaking\"\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "654f25a1", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Following the tutorial at https://jtleider.github.io/censusdata/example1.html.\n", - "# Full list of fields is at https://www2.census.gov/programs-surveys/acs/summary_file/2019/documentation/user_tools/ACS2019_Table_Shells.xlsx\n", - "censusdata.printtable(censusdata.censustable(src=\"acs5\", year=ACS_YEAR, table=\"B23025\"))\n", - "censusdata.printtable(censusdata.censustable(src=\"acs5\", year=ACS_YEAR, table=\"C16002\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8999cea4", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "def fips_from_censusdata_censusgeo(censusgeo: censusdata.censusgeo) -> str:\n", - " \"\"\"Create a FIPS code from the proprietary censusgeo index.\"\"\"\n", - " fips = \"\".join([value for (key, value) in censusgeo.params()])\n", - " return fips\n", - "\n", - "\n", - "dfs = []\n", - "for fips in get_state_fips_codes(DATA_PATH):\n", - " print(f\"Downloading data for state/territory with FIPS code {fips}\")\n", - "\n", - " dfs.append(\n", - " censusdata.download(\n", - " src=\"acs5\",\n", - " year=ACS_YEAR,\n", - " geo=censusdata.censusgeo(\n", - " [(\"state\", fips), (\"county\", \"*\"), (\"block group\", \"*\")]\n", - " ),\n", - " var=[\n", - " # Emploment fields\n", - " \"B23025_005E\",\n", - " \"B23025_003E\",\n", - " ]\n", - " + LINGUISTIC_ISOLATION_FIELDS,\n", - " )\n", - " )\n", - "\n", - "\n", - "df = pd.concat(dfs)\n", - "\n", - "df[GEOID_FIELD_NAME] = df.index.to_series().apply(func=fips_from_censusdata_censusgeo)\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "803cce31", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# Calculate percent unemployment.\n", - "# TODO: remove small-sample data that should be `None` instead of a high-variance fraction.\n", - "df[UNEMPLOYED_FIELD_NAME] = df.B23025_005E / df.B23025_003E\n", - "\n", - "df[UNEMPLOYED_FIELD_NAME].describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e475472c", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# Calculate linguistic isolation.\n", - "individual_limited_english_fields = [\n", - " \"C16002_004E\",\n", - " \"C16002_007E\",\n", - " \"C16002_010E\",\n", - " \"C16002_013E\",\n", - "]\n", - "\n", - "df[LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME] = df[individual_limited_english_fields].sum(\n", - " axis=1, skipna=True\n", - ")\n", - "df[LINGUISTIC_ISOLATION_FIELD_NAME] = (\n", - " df[LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME].astype(float) / df[\"C16002_001E\"]\n", - ")\n", - "\n", - "df[LINGUISTIC_ISOLATION_FIELD_NAME].describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a269bb1", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# mkdir census\n", - "OUTPUT_PATH.mkdir(parents=True, exist_ok=True)\n", - "\n", - "columns_to_include = [\n", - " GEOID_FIELD_NAME,\n", - " UNEMPLOYED_FIELD_NAME,\n", - " LINGUISTIC_ISOLATION_FIELD_NAME,\n", - "]\n", - "\n", - "df[columns_to_include].to_csv(path_or_buf=OUTPUT_PATH / \"usa.csv\", index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/score/ipython/ejscreen_etl.ipynb b/score/ipython/ejscreen_etl.ipynb deleted file mode 100644 index 37d81d6c..00000000 --- a/score/ipython/ejscreen_etl.ipynb +++ /dev/null @@ -1,123 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "20aa3891", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "import numpy as np\n", - "import pandas as pd\n", - "import csv\n", - "import sys\n", - "import os\n", - "\n", - "module_path = os.path.abspath(os.path.join(\"..\"))\n", - "if module_path not in sys.path:\n", - " sys.path.append(module_path)\n", - "\n", - "from etl.sources.census.etl_utils import get_state_fips_codes\n", - "from utils import unzip_file_from_url, remove_all_from_dir\n", - "\n", - "DATA_PATH = Path.cwd().parent / \"data\"\n", - "TMP_PATH = DATA_PATH / \"tmp\"\n", - "EJSCREEN_FTP_URL = (\n", - " \"https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip\"\n", - ")\n", - "EJSCREEN_CSV = TMP_PATH / \"EJSCREEN_2020_StatePctile.csv\"\n", - "CSV_PATH = DATA_PATH / \"dataset\" / \"ejscreen_2020\"\n", - "print(DATA_PATH)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc3fb9ec", - "metadata": {}, - "outputs": [], - "source": [ - "# download file from ejscreen ftp\n", - "unzip_file_from_url(EJSCREEN_FTP_URL, TMP_PATH, TMP_PATH)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b25738bb", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "df = pd.read_csv(\n", - " EJSCREEN_CSV,\n", - " dtype={\"ID\": \"string\"},\n", - " # EJSCREEN writes the word \"None\" for NA data.\n", - " na_values=[\"None\"],\n", - " low_memory=False,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9fa2077a", - "metadata": {}, - "outputs": [], - "source": [ - "# write nationwide csv\n", - "CSV_PATH.mkdir(parents=True, exist_ok=True)\n", - "df.to_csv(CSV_PATH / f\"usa.csv\", index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5e5cc12a", - "metadata": {}, - "outputs": [], - "source": [ - "# write per state csvs\n", - "for fips in get_state_fips_codes(DATA_PATH):\n", - " print(f\"Generating data{fips} csv\")\n", - " df1 = df[df.ID.str[:2] == fips]\n", - " # we need to name the file data01.csv for ogr2ogr csv merge to work\n", - " df1.to_csv(CSV_PATH / f\"data{fips}.csv\", index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81b977f8", - "metadata": {}, - "outputs": [], - "source": [ - "# cleanup\n", - "remove_all_from_dir(TMP_PATH)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/score/ipython/housing_and_transportation_etl.ipynb b/score/ipython/housing_and_transportation_etl.ipynb deleted file mode 100644 index ed3f888a..00000000 --- a/score/ipython/housing_and_transportation_etl.ipynb +++ /dev/null @@ -1,116 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "c21b63a3", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import censusdata\n", - "import csv\n", - "from pathlib import Path\n", - "import os\n", - "import sys\n", - "\n", - "module_path = os.path.abspath(os.path.join(\"..\"))\n", - "if module_path not in sys.path:\n", - " sys.path.append(module_path)\n", - "\n", - "from etl.sources.census.etl_utils import get_state_fips_codes\n", - "from utils import unzip_file_from_url, remove_all_from_dir\n", - "\n", - "ACS_YEAR = 2019\n", - "\n", - "DATA_PATH = Path.cwd().parent / \"data\"\n", - "TMP_PATH = DATA_PATH / \"tmp\"\n", - "HOUSING_FTP_URL = \"https://htaindex.cnt.org/download/download.php?focus=blkgrp&geoid=\"\n", - "OUTPUT_PATH = DATA_PATH / \"dataset\" / \"housing_and_transportation_index\"\n", - "\n", - "GEOID_FIELD_NAME = \"GEOID10\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6696bc66", - "metadata": {}, - "outputs": [], - "source": [ - "# Download each state / territory individually\n", - "dfs = []\n", - "zip_file_dir = TMP_PATH / \"housing_and_transportation_index\"\n", - "for fips in get_state_fips_codes(DATA_PATH):\n", - " print(f\"Downloading housing data for state/territory with FIPS code {fips}\")\n", - " unzip_file_from_url(f\"{HOUSING_FTP_URL}{fips}\", TMP_PATH, zip_file_dir)\n", - "\n", - " # New file name:\n", - " tmp_csv_file_path = zip_file_dir / f\"htaindex_data_blkgrps_{fips}.csv\"\n", - " tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)\n", - "\n", - " dfs.append(tmp_df)\n", - "\n", - "df = pd.concat(dfs)\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "244e0d03", - "metadata": {}, - "outputs": [], - "source": [ - "# Rename and reformat block group ID\n", - "df.rename(columns={\"blkgrp\": GEOID_FIELD_NAME}, inplace=True)\n", - "df[GEOID_FIELD_NAME] = df[GEOID_FIELD_NAME].str.replace('\"', \"\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8275c1ef", - "metadata": {}, - "outputs": [], - "source": [ - "OUTPUT_PATH.mkdir(parents=True, exist_ok=True)\n", - "\n", - "df.to_csv(path_or_buf=OUTPUT_PATH / \"usa.csv\", index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ef5bb862", - "metadata": {}, - "outputs": [], - "source": [ - "# cleanup\n", - "remove_all_from_dir(TMP_PATH)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/score/ipython/hud_housing_etl.ipynb b/score/ipython/hud_housing_etl.ipynb deleted file mode 100644 index c8647b5c..00000000 --- a/score/ipython/hud_housing_etl.ipynb +++ /dev/null @@ -1,274 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "c21b63a3", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import censusdata\n", - "import csv\n", - "from pathlib import Path\n", - "import os\n", - "import re\n", - "import sys\n", - "\n", - "module_path = os.path.abspath(os.path.join(\"..\"))\n", - "if module_path not in sys.path:\n", - " sys.path.append(module_path)\n", - "\n", - "from etl.sources.census.etl_utils import get_state_fips_codes\n", - "from utils import unzip_file_from_url, remove_all_from_dir\n", - "\n", - "DATA_PATH = Path.cwd().parent / \"data\"\n", - "TMP_PATH = DATA_PATH / \"tmp\"\n", - "OUTPUT_PATH = DATA_PATH / \"dataset\" / \"hud_housing\"\n", - "\n", - "GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n", - "\n", - "# We measure households earning less than 80% of HUD Area Median Family Income by county\n", - "# and paying greater than 30% of their income to housing costs.\n", - "HOUSING_BURDEN_FIELD_NAME = \"Housing burden (percent)\"\n", - "HOUSING_BURDEN_NUMERATOR_FIELD_NAME = \"HOUSING_BURDEN_NUMERATOR\"\n", - "HOUSING_BURDEN_DENOMINATOR_FIELD_NAME = \"HOUSING_BURDEN_DENOMINATOR\"\n", - "\n", - "# Note: some variable definitions.\n", - "# HUD-adjusted median family income (HAMFI).\n", - "# The four housing problems are: incomplete kitchen facilities, incomplete plumbing facilities, more than 1 person per room, and cost burden greater than 30%.\n", - "# Table 8 is the desired table." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6696bc66", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "# Download the data.\n", - "dfs = []\n", - "zip_file_dir = TMP_PATH / \"hud_housing\"\n", - "\n", - "print(f\"Downloading 225MB housing data\")\n", - "unzip_file_from_url(\n", - " \"https://www.huduser.gov/portal/datasets/cp/2012thru2016-140-csv.zip\",\n", - " TMP_PATH,\n", - " zip_file_dir,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3e954589", - "metadata": {}, - "outputs": [], - "source": [ - "# New file name:\n", - "tmp_csv_file_path = (\n", - " zip_file_dir\n", - " / \"2012thru2016-140-csv\"\n", - " / \"2012thru2016-140-csv\"\n", - " / \"140\"\n", - " / \"Table8.csv\"\n", - ")\n", - "df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "244e0d03", - "metadata": {}, - "outputs": [], - "source": [ - "# Rename and reformat block group ID\n", - "df.rename(columns={\"geoid\": GEOID_TRACT_FIELD_NAME}, inplace=True)\n", - "\n", - "# The CHAS data has census tract ids such as `14000US01001020100`\n", - "# Whereas the rest of our data uses, for the same tract, `01001020100`.\n", - "# the characters before `US`:\n", - "df[GEOID_TRACT_FIELD_NAME] = df[GEOID_TRACT_FIELD_NAME].str.replace(\n", - " r\"^.*?US\", \"\", regex=True\n", - ")\n", - "\n", - "df[GEOID_TRACT_FIELD_NAME].head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "03250026", - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate housing burden\n", - "# This is quite a number of steps. It does not appear to be accessible nationally in a simpler format, though.\n", - "# See \"CHAS data dictionary 12-16.xlsx\"\n", - "\n", - "# Owner occupied numerator fields\n", - "OWNER_OCCUPIED_NUMERATOR_FIELDS = [\n", - " # Key: Column Name\tLine_Type\tTenure\tHousehold income\tCost burden\tFacilities\n", - " # T8_est7\tSubtotal\tOwner occupied\tless than or equal to 30% of HAMFI\tgreater than 30% but less than or equal to 50%\tAll\n", - " \"T8_est7\",\n", - " # T8_est10\tSubtotal\tOwner occupied\tless than or equal to 30% of HAMFI\tgreater than 50%\tAll\n", - " \"T8_est10\",\n", - " # T8_est20\tSubtotal\tOwner occupied\tgreater than 30% but less than or equal to 50% of HAMFI\tgreater than 30% but less than or equal to 50%\tAll\n", - " \"T8_est20\",\n", - " # T8_est23\tSubtotal\tOwner occupied\tgreater than 30% but less than or equal to 50% of HAMFI\tgreater than 50%\tAll\n", - " \"T8_est23\",\n", - " # T8_est33\tSubtotal\tOwner occupied\tgreater than 50% but less than or equal to 80% of HAMFI\tgreater than 30% but less than or equal to 50%\tAll\n", - " \"T8_est33\",\n", - " # T8_est36\tSubtotal\tOwner occupied\tgreater than 50% but less than or equal to 80% of HAMFI\tgreater than 50%\tAll\n", - " \"T8_est36\",\n", - "]\n", - "\n", - "# These rows have the values where HAMFI was not computed, b/c of no or negative income.\n", - "OWNER_OCCUPIED_NOT_COMPUTED_FIELDS = [\n", - " # Key: Column Name\tLine_Type\tTenure\tHousehold income\tCost burden\tFacilities\n", - " # T8_est13\tSubtotal\tOwner occupied\tless than or equal to 30% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est13\",\n", - " # T8_est26\tSubtotal\tOwner occupied\tgreater than 30% but less than or equal to 50% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est26\",\n", - " # T8_est39\tSubtotal\tOwner occupied\tgreater than 50% but less than or equal to 80% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est39\",\n", - " # T8_est52\tSubtotal\tOwner occupied\tgreater than 80% but less than or equal to 100% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est52\",\n", - " # T8_est65\tSubtotal\tOwner occupied\tgreater than 100% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est65\",\n", - "]\n", - "\n", - "# T8_est2\tSubtotal\tOwner occupied\tAll\tAll\tAll\n", - "OWNER_OCCUPIED_POPULATION_FIELD = \"T8_est2\"\n", - "\n", - "# Renter occupied numerator fields\n", - "RENTER_OCCUPIED_NUMERATOR_FIELDS = [\n", - " # Key: Column Name\tLine_Type\tTenure\tHousehold income\tCost burden\tFacilities\n", - " # T8_est73\tSubtotal\tRenter occupied\tless than or equal to 30% of HAMFI\tgreater than 30% but less than or equal to 50%\tAll\n", - " \"T8_est73\",\n", - " # T8_est76\tSubtotal\tRenter occupied\tless than or equal to 30% of HAMFI\tgreater than 50%\tAll\n", - " \"T8_est76\",\n", - " # T8_est86\tSubtotal\tRenter occupied\tgreater than 30% but less than or equal to 50% of HAMFI\tgreater than 30% but less than or equal to 50%\tAll\n", - " \"T8_est86\",\n", - " # T8_est89\tSubtotal\tRenter occupied\tgreater than 30% but less than or equal to 50% of HAMFI\tgreater than 50%\tAll\n", - " \"T8_est89\",\n", - " # T8_est99\tSubtotal\tRenter occupied\tgreater than 50% but less than or equal to 80% of HAMFI\tgreater than 30% but less than or equal to 50%\tAll\n", - " \"T8_est99\",\n", - " # T8_est102\tSubtotal\tRenter occupied\tgreater than 50% but less than or equal to 80% of HAMFI\tgreater than 50%\tAll\n", - " \"T8_est102\",\n", - "]\n", - "\n", - "# These rows have the values where HAMFI was not computed, b/c of no or negative income.\n", - "RENTER_OCCUPIED_NOT_COMPUTED_FIELDS = [\n", - " # Key: Column Name\tLine_Type\tTenure\tHousehold income\tCost burden\tFacilities\n", - " # T8_est79\tSubtotal\tRenter occupied\tless than or equal to 30% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est79\",\n", - " # T8_est92\tSubtotal\tRenter occupied\tgreater than 30% but less than or equal to 50% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est92\",\n", - " # T8_est105\tSubtotal\tRenter occupied\tgreater than 50% but less than or equal to 80% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est105\",\n", - " # T8_est118\tSubtotal\tRenter occupied\tgreater than 80% but less than or equal to 100% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est118\",\n", - " # T8_est131\tSubtotal\tRenter occupied\tgreater than 100% of HAMFI\tnot computed (no/negative income)\tAll\n", - " \"T8_est131\",\n", - "]\n", - "\n", - "\n", - "# T8_est68\tSubtotal\tRenter occupied\tAll\tAll\tAll\n", - "RENTER_OCCUPIED_POPULATION_FIELD = \"T8_est68\"\n", - "\n", - "\n", - "# Math:\n", - "# (\n", - "# # of Owner Occupied Units Meeting Criteria\n", - "# + # of Renter Occupied Units Meeting Criteria\n", - "# )\n", - "# divided by\n", - "# (\n", - "# Total # of Owner Occupied Units\n", - "# + Total # of Renter Occupied Units\n", - "# - # of Owner Occupied Units with HAMFI Not Computed\n", - "# - # of Renter Occupied Units with HAMFI Not Computed\n", - "# )\n", - "\n", - "df[HOUSING_BURDEN_NUMERATOR_FIELD_NAME] = df[OWNER_OCCUPIED_NUMERATOR_FIELDS].sum(\n", - " axis=1\n", - ") + df[RENTER_OCCUPIED_NUMERATOR_FIELDS].sum(axis=1)\n", - "\n", - "df[HOUSING_BURDEN_DENOMINATOR_FIELD_NAME] = (\n", - " df[OWNER_OCCUPIED_POPULATION_FIELD]\n", - " + df[RENTER_OCCUPIED_POPULATION_FIELD]\n", - " - df[OWNER_OCCUPIED_NOT_COMPUTED_FIELDS].sum(axis=1)\n", - " - df[RENTER_OCCUPIED_NOT_COMPUTED_FIELDS].sum(axis=1)\n", - ")\n", - "\n", - "# TODO: add small sample size checks\n", - "df[HOUSING_BURDEN_FIELD_NAME] = df[HOUSING_BURDEN_NUMERATOR_FIELD_NAME].astype(\n", - " float\n", - ") / df[HOUSING_BURDEN_DENOMINATOR_FIELD_NAME].astype(float)\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8275c1ef", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "OUTPUT_PATH.mkdir(parents=True, exist_ok=True)\n", - "\n", - "# Drop unnecessary fields\n", - "df[\n", - " [\n", - " GEOID_TRACT_FIELD_NAME,\n", - " HOUSING_BURDEN_NUMERATOR_FIELD_NAME,\n", - " HOUSING_BURDEN_DENOMINATOR_FIELD_NAME,\n", - " HOUSING_BURDEN_FIELD_NAME,\n", - " ]\n", - "].to_csv(path_or_buf=OUTPUT_PATH / \"usa.csv\", index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ef5bb862", - "metadata": {}, - "outputs": [], - "source": [ - "# cleanup\n", - "remove_all_from_dir(TMP_PATH)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/score/ipython/hud_recap_etl.ipynb b/score/ipython/hud_recap_etl.ipynb deleted file mode 100644 index 7d4df434..00000000 --- a/score/ipython/hud_recap_etl.ipynb +++ /dev/null @@ -1,115 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "20aa3891", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "import numpy as np\n", - "import pandas as pd\n", - "import csv\n", - "import sys\n", - "import os\n", - "\n", - "module_path = os.path.abspath(os.path.join(\"..\"))\n", - "if module_path not in sys.path:\n", - " sys.path.append(module_path)\n", - "\n", - "from etl.sources.census.etl_utils import get_state_fips_codes\n", - "from utils import unzip_file_from_url, remove_all_from_dir\n", - "\n", - "DATA_PATH = Path.cwd().parent / \"data\"\n", - "TMP_PATH = DATA_PATH / \"tmp\"\n", - "HUD_RECAP_CSV_URL = \"https://opendata.arcgis.com/api/v3/datasets/56de4edea8264fe5a344da9811ef5d6e_0/downloads/data?format=csv&spatialRefId=4326\"\n", - "CSV_PATH = DATA_PATH / \"dataset\" / \"hud_recap\"\n", - "\n", - "# Definining some variable names\n", - "GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n", - "HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME = \"hud_recap_priority_community\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9455da5", - "metadata": {}, - "outputs": [], - "source": [ - "# Data from https://hudgis-hud.opendata.arcgis.com/datasets/HUD::racially-or-ethnically-concentrated-areas-of-poverty-r-ecaps/about\n", - "df = pd.read_csv(HUD_RECAP_CSV_URL, dtype={\"GEOID\": \"string\"})\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ca63e66c", - "metadata": {}, - "outputs": [], - "source": [ - "# Rename some fields\n", - "df.rename(\n", - " columns={\n", - " \"GEOID\": GEOID_TRACT_FIELD_NAME,\n", - " # Interestingly, there's no data dictionary for the RECAP data that I could find.\n", - " # However, this site (http://www.schousing.com/library/Tax%20Credit/2020/QAP%20Instructions%20(2).pdf)\n", - " # suggests:\n", - " # \"If RCAP_Current for the tract in which the site is located is 1, the tract is an R/ECAP. If RCAP_Current is 0, it is not.\"\n", - " \"RCAP_Current\": HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME,\n", - " },\n", - " inplace=True,\n", - ")\n", - "\n", - "# Convert to boolean\n", - "df[HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME] = df[\n", - " HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME\n", - "].astype(\"bool\")\n", - "\n", - "df[HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME].value_counts()\n", - "\n", - "df.sort_values(by=GEOID_TRACT_FIELD_NAME, inplace=True)\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9fa2077a", - "metadata": {}, - "outputs": [], - "source": [ - "# write csv\n", - "CSV_PATH.mkdir(parents=True, exist_ok=True)\n", - "\n", - "# Drop unnecessary columns.\n", - "df[[GEOID_TRACT_FIELD_NAME, HUD_RECAP_PRIORITY_COMMUNITY_FIELD_NAME]].to_csv(\n", - " CSV_PATH / \"usa.csv\", index=False\n", - ")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/score/ipython/score_calc.ipynb b/score/ipython/score_calc.ipynb deleted file mode 100644 index e1eec406..00000000 --- a/score/ipython/score_calc.ipynb +++ /dev/null @@ -1,619 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "a664f981", - "metadata": {}, - "outputs": [], - "source": [ - "# Before running this notebook, you must run the following notebooks (in any order):\n", - "# 1. `ejscreen_etl.ipynb`\n", - "# 2. `census_etl.ipynb`\n", - "# 3. `housing_and_transportation_etl.ipynb`\n", - "# 4. `hud_housing_etl.ipynb`\n", - "\n", - "import collections\n", - "import functools\n", - "from pathlib import Path\n", - "import matplotlib.pyplot as plt\n", - "import pandas as pd\n", - "import csv\n", - "import os\n", - "import sys\n", - "\n", - "module_path = os.path.abspath(os.path.join(\"..\"))\n", - "if module_path not in sys.path:\n", - " sys.path.append(module_path)\n", - "\n", - "from etl.sources.census.etl_utils import get_state_fips_codes\n", - "\n", - "# Define some global parameters\n", - "GEOID_FIELD_NAME = \"GEOID10\"\n", - "GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n", - "BUCKET_SOCIOECONOMIC = \"Socioeconomic Factors\"\n", - "BUCKET_SENSITIVE = \"Sensitive populations\"\n", - "BUCKET_ENVIRONMENTAL = \"Environmental effects\"\n", - "BUCKET_EXPOSURES = \"Exposures\"\n", - "BUCKETS = [\n", - " BUCKET_SOCIOECONOMIC,\n", - " BUCKET_SENSITIVE,\n", - " BUCKET_ENVIRONMENTAL,\n", - " BUCKET_EXPOSURES,\n", - "]\n", - "\n", - "# A few specific field names\n", - "# TODO: clean this up, I name some fields but not others.\n", - "UNEMPLOYED_FIELD_NAME = \"Unemployed civilians (percent)\"\n", - "LINGUISTIC_ISOLATION_FIELD_NAME = \"Linguistic isolation (percent)\"\n", - "HOUSING_BURDEN_FIELD_NAME = \"Housing burden (percent)\"\n", - "POVERTY_FIELD_NAME = \"Poverty (Less than 200% of federal poverty line)\"\n", - "HIGH_SCHOOL_FIELD_NAME = (\n", - " \"Percent individuals age 25 or over with less than high school degree\"\n", - ")\n", - "\n", - "# There's another aggregation level (a second level of \"buckets\").\n", - "AGGREGATION_POLLUTION = \"Pollution Burden\"\n", - "AGGREGATION_POPULATION = \"Population Characteristics\"\n", - "\n", - "PERCENTILE_FIELD_SUFFIX = \" (percentile)\"\n", - "MIN_MAX_FIELD_SUFFIX = \" (min-max normalized)\"\n", - "\n", - "DATA_PATH = Path.cwd().parent / \"data\"\n", - "SCORE_CSV_PATH = DATA_PATH / \"score\" / \"csv\"\n", - "\n", - "# Tell pandas to display all columns\n", - "pd.set_option(\"display.max_columns\", None)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7df430cb", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# EJSCreen csv Load\n", - "ejscreen_csv = DATA_PATH / \"dataset\" / \"ejscreen_2020\" / \"usa.csv\"\n", - "ejscreen_df = pd.read_csv(ejscreen_csv, dtype={\"ID\": \"string\"}, low_memory=False)\n", - "ejscreen_df.rename(columns={\"ID\": GEOID_FIELD_NAME}, inplace=True)\n", - "ejscreen_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "daba69fb", - "metadata": {}, - "outputs": [], - "source": [ - "# Load census data\n", - "census_csv = DATA_PATH / \"dataset\" / \"census_acs_2019\" / \"usa.csv\"\n", - "census_df = pd.read_csv(\n", - " census_csv, dtype={GEOID_FIELD_NAME: \"string\"}, low_memory=False\n", - ")\n", - "census_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "144bdde2", - "metadata": {}, - "outputs": [], - "source": [ - "# Load housing and transportation data\n", - "housing_and_transportation_index_csv = (\n", - " DATA_PATH / \"dataset\" / \"housing_and_transportation_index\" / \"usa.csv\"\n", - ")\n", - "housing_and_transportation_df = pd.read_csv(\n", - " housing_and_transportation_index_csv,\n", - " dtype={GEOID_FIELD_NAME: \"string\"},\n", - " low_memory=False,\n", - ")\n", - "housing_and_transportation_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a9202e5d", - "metadata": {}, - "outputs": [], - "source": [ - "# Load HUD housing data\n", - "hud_housing_csv = DATA_PATH / \"dataset\" / \"hud_housing\" / \"usa.csv\"\n", - "hud_housing_df = pd.read_csv(\n", - " hud_housing_csv,\n", - " dtype={GEOID_TRACT_FIELD_NAME: \"string\"},\n", - " low_memory=False,\n", - ")\n", - "hud_housing_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf89efd8", - "metadata": {}, - "outputs": [], - "source": [ - "# Join all the data sources that use census block groups\n", - "census_block_group_dfs = [ejscreen_df, census_df, housing_and_transportation_df]\n", - "\n", - "census_block_group_df = functools.reduce(\n", - " lambda left, right: pd.merge(\n", - " left=left, right=right, on=GEOID_FIELD_NAME, how=\"outer\"\n", - " ),\n", - " census_block_group_dfs,\n", - ")\n", - "\n", - "\n", - "if len(census_block_group_df) > 220333:\n", - " raise ValueError(\"Too many rows in the join.\")\n", - "\n", - "census_block_group_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e79ec27a", - "metadata": {}, - "outputs": [], - "source": [ - "# Sanity check the join.\n", - "if len(census_block_group_df[GEOID_FIELD_NAME].str.len().unique()) != 1:\n", - " raise ValueError(\n", - " f\"One of the input CSVs uses {GEOID_FIELD_NAME} with a different length.\"\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3d0d2915", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Join all the data sources that use census tracts\n", - "# TODO: when there's more than one data source using census tract, reduce/merge them here.\n", - "census_tract_df = hud_housing_df\n", - "\n", - "# Calculate the tract for the CBG data.\n", - "census_block_group_df[GEOID_TRACT_FIELD_NAME] = census_block_group_df[\n", - " GEOID_FIELD_NAME\n", - "].str[0:11]\n", - "\n", - "df = census_block_group_df.merge(census_tract_df, on=GEOID_TRACT_FIELD_NAME)\n", - "\n", - "if len(census_block_group_df) > 220333:\n", - " raise ValueError(\"Too many rows in the join.\")\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b8567900", - "metadata": {}, - "outputs": [], - "source": [ - "# Define a named tuple that will be used for each data set input.\n", - "DataSet = collections.namedtuple(\n", - " typename=\"DataSet\", field_names=[\"input_field\", \"renamed_field\", \"bucket\"]\n", - ")\n", - "\n", - "data_sets = [\n", - " # The following data sets have `bucket=None`, because it's not used in the bucket based score (\"Score C\").\n", - " DataSet(\n", - " input_field=GEOID_FIELD_NAME,\n", - " # Use the name `GEOID10` to enable geoplatform.gov's workflow.\n", - " renamed_field=GEOID_FIELD_NAME,\n", - " bucket=None,\n", - " ),\n", - " DataSet(\n", - " input_field=HOUSING_BURDEN_FIELD_NAME,\n", - " renamed_field=HOUSING_BURDEN_FIELD_NAME,\n", - " bucket=None,\n", - " ),\n", - " DataSet(input_field=\"ACSTOTPOP\", renamed_field=\"Total population\", bucket=None),\n", - " # The following data sets have buckets, because they're used in the score\n", - " DataSet(\n", - " input_field=\"CANCER\",\n", - " renamed_field=\"Air toxics cancer risk\",\n", - " bucket=BUCKET_EXPOSURES,\n", - " ),\n", - " DataSet(\n", - " input_field=\"RESP\",\n", - " renamed_field=\"Respiratory hazard index\",\n", - " bucket=BUCKET_EXPOSURES,\n", - " ),\n", - " DataSet(\n", - " input_field=\"DSLPM\",\n", - " renamed_field=\"Diesel particulate matter\",\n", - " bucket=BUCKET_EXPOSURES,\n", - " ),\n", - " DataSet(\n", - " input_field=\"PM25\",\n", - " renamed_field=\"Particulate matter (PM2.5)\",\n", - " bucket=BUCKET_EXPOSURES,\n", - " ),\n", - " DataSet(input_field=\"OZONE\", renamed_field=\"Ozone\", bucket=BUCKET_EXPOSURES),\n", - " DataSet(\n", - " input_field=\"PTRAF\",\n", - " renamed_field=\"Traffic proximity and volume\",\n", - " bucket=BUCKET_EXPOSURES,\n", - " ),\n", - " DataSet(\n", - " input_field=\"PRMP\",\n", - " renamed_field=\"Proximity to RMP sites\",\n", - " bucket=BUCKET_ENVIRONMENTAL,\n", - " ),\n", - " DataSet(\n", - " input_field=\"PTSDF\",\n", - " renamed_field=\"Proximity to TSDF sites\",\n", - " bucket=BUCKET_ENVIRONMENTAL,\n", - " ),\n", - " DataSet(\n", - " input_field=\"PNPL\",\n", - " renamed_field=\"Proximity to NPL sites\",\n", - " bucket=BUCKET_ENVIRONMENTAL,\n", - " ),\n", - " DataSet(\n", - " input_field=\"PWDIS\",\n", - " renamed_field=\"Wastewater discharge\",\n", - " bucket=BUCKET_ENVIRONMENTAL,\n", - " ),\n", - " DataSet(\n", - " input_field=\"PRE1960PCT\",\n", - " renamed_field=\"Percent pre-1960s housing (lead paint indicator)\",\n", - " bucket=BUCKET_ENVIRONMENTAL,\n", - " ),\n", - " DataSet(\n", - " input_field=\"UNDER5PCT\",\n", - " renamed_field=\"Individuals under 5 years old\",\n", - " bucket=BUCKET_SENSITIVE,\n", - " ),\n", - " DataSet(\n", - " input_field=\"OVER64PCT\",\n", - " renamed_field=\"Individuals over 64 years old\",\n", - " bucket=BUCKET_SENSITIVE,\n", - " ),\n", - " DataSet(\n", - " input_field=LINGUISTIC_ISOLATION_FIELD_NAME,\n", - " renamed_field=LINGUISTIC_ISOLATION_FIELD_NAME,\n", - " bucket=BUCKET_SENSITIVE,\n", - " ),\n", - " DataSet(\n", - " input_field=\"LINGISOPCT\",\n", - " renamed_field=\"Percent of households in linguistic isolation\",\n", - " bucket=BUCKET_SOCIOECONOMIC,\n", - " ),\n", - " DataSet(\n", - " input_field=\"LOWINCPCT\",\n", - " renamed_field=POVERTY_FIELD_NAME,\n", - " bucket=BUCKET_SOCIOECONOMIC,\n", - " ),\n", - " DataSet(\n", - " input_field=\"LESSHSPCT\",\n", - " renamed_field=HIGH_SCHOOL_FIELD_NAME,\n", - " bucket=BUCKET_SOCIOECONOMIC,\n", - " ),\n", - " DataSet(\n", - " input_field=UNEMPLOYED_FIELD_NAME,\n", - " renamed_field=UNEMPLOYED_FIELD_NAME,\n", - " bucket=BUCKET_SOCIOECONOMIC,\n", - " ),\n", - " DataSet(\n", - " input_field=\"ht_ami\",\n", - " renamed_field=\"Housing + Transportation Costs % Income for the Regional Typical Household\",\n", - " bucket=BUCKET_SOCIOECONOMIC,\n", - " ),\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e152a655", - "metadata": {}, - "outputs": [], - "source": [ - "# Rename columns:\n", - "renaming_dict = {data_set.input_field: data_set.renamed_field for data_set in data_sets}\n", - "\n", - "df.rename(\n", - " columns=renaming_dict,\n", - " inplace=True,\n", - " errors=\"raise\",\n", - ")\n", - "\n", - "columns_to_keep = [data_set.renamed_field for data_set in data_sets]\n", - "df = df[columns_to_keep]\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1280cbd4", - "metadata": {}, - "outputs": [], - "source": [ - "# Convert all columns to numeric.\n", - "for data_set in data_sets:\n", - " # Skip GEOID_FIELD_NAME, because it's a string.\n", - " if data_set.renamed_field == GEOID_FIELD_NAME:\n", - " continue\n", - " df[f\"{data_set.renamed_field}\"] = pd.to_numeric(df[data_set.renamed_field])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "27677132", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Calculate percentiles for each data set.\n", - "for data_set in data_sets:\n", - " df[f\"{data_set.renamed_field}{PERCENTILE_FIELD_SUFFIX}\"] = df[\n", - " data_set.renamed_field\n", - " ].rank(pct=True)\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f2088013", - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate min-max for each data set.\n", - "# Math:\n", - "# (\n", - "# Observed value\n", - "# - minimum of all values\n", - "# )\n", - "# divided by\n", - "# (\n", - "# Maximum of all values\n", - "# - minimum of all values\n", - "# )\n", - "for data_set in data_sets:\n", - " # Skip GEOID_FIELD_NAME, because it's a string.\n", - " if data_set.renamed_field == GEOID_FIELD_NAME:\n", - " continue\n", - "\n", - " min_value = df[data_set.renamed_field].min(skipna=True)\n", - "\n", - " max_value = df[data_set.renamed_field].max(skipna=True)\n", - "\n", - " print(\n", - " f\"For data set {data_set.renamed_field}, the min value is {min_value} and the max value is {max_value}.\"\n", - " )\n", - "\n", - " df[f\"{data_set.renamed_field}{MIN_MAX_FIELD_SUFFIX}\"] = (\n", - " df[data_set.renamed_field] - min_value\n", - " ) / (max_value - min_value)\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f4eec326", - "metadata": {}, - "outputs": [], - "source": [ - "# Graph distributions and correlations.\n", - "min_max_fields = [\n", - " f\"{data_set.renamed_field}{MIN_MAX_FIELD_SUFFIX}\"\n", - " for data_set in data_sets\n", - " if data_set.renamed_field != GEOID_FIELD_NAME\n", - "]\n", - "df.hist(\n", - " column=min_max_fields, layout=(len(min_max_fields), 1), figsize=(10, 30), bins=30\n", - ")\n", - "\n", - "plt.tight_layout()\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f7b864f", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Calculate score \"A\" and score \"B\"\n", - "df[\"Score A\"] = df[\n", - " [\n", - " \"Poverty (Less than 200% of federal poverty line) (percentile)\",\n", - " \"Percent individuals age 25 or over with less than high school degree (percentile)\",\n", - " ]\n", - "].mean(axis=1)\n", - "df[\"Score B\"] = (\n", - " df[\"Poverty (Less than 200% of federal poverty line) (percentile)\"]\n", - " * df[\n", - " \"Percent individuals age 25 or over with less than high school degree (percentile)\"\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0c107baf", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Calculate \"CalEnviroScreen for the US\" score\n", - "# Average all the percentile values in each bucket into a single score for each of the four buckets.\n", - "for bucket in BUCKETS:\n", - " fields_in_bucket = [\n", - " f\"{data_set.renamed_field}{PERCENTILE_FIELD_SUFFIX}\"\n", - " for data_set in data_sets\n", - " if data_set.bucket == bucket\n", - " ]\n", - " df[f\"{bucket}\"] = df[fields_in_bucket].mean(axis=1)\n", - "\n", - "# Combine the score from the two Exposures and Environmental Effects buckets into a single score called \"Pollution Burden\". The math for this score is: (1.0 * Exposures Score + 0.5 * Environment Effects score) / 1.5.\n", - "df[AGGREGATION_POLLUTION] = (\n", - " 1.0 * df[f\"{BUCKET_EXPOSURES}\"] + 0.5 * df[f\"{BUCKET_ENVIRONMENTAL}\"]\n", - ") / 1.5\n", - "\n", - "# Average the score from the two Sensitive populations and Socioeconomic factors buckets into a single score called \"Population Characteristics\".\n", - "df[AGGREGATION_POPULATION] = df[\n", - " [f\"{BUCKET_SENSITIVE}\", f\"{BUCKET_SOCIOECONOMIC}\"]\n", - "].mean(axis=1)\n", - "\n", - "# Multiply the \"Pollution Burden\" score and the \"Population Characteristics\" together to produce the cumulative impact score.\n", - "df[\"Score C\"] = df[AGGREGATION_POLLUTION] * df[AGGREGATION_POPULATION]\n", - "\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f70106f5", - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate scores D and E.\n", - "fields_to_use_in_score_d_and_e = [\n", - " UNEMPLOYED_FIELD_NAME,\n", - " LINGUISTIC_ISOLATION_FIELD_NAME,\n", - " HOUSING_BURDEN_FIELD_NAME,\n", - " POVERTY_FIELD_NAME,\n", - " HIGH_SCHOOL_FIELD_NAME,\n", - "]\n", - "\n", - "fields_min_max = [\n", - " f\"{field}{MIN_MAX_FIELD_SUFFIX}\" for field in fields_to_use_in_score_d_and_e\n", - "]\n", - "fields_percentile = [\n", - " f\"{field}{PERCENTILE_FIELD_SUFFIX}\" for field in fields_to_use_in_score_d_and_e\n", - "]\n", - "\n", - "# Calculate \"Score D\", which uses min-max normalization\n", - "# and calculate \"Score E\", which uses percentile normalization for the same fields\n", - "df[\"Score D\"] = df[fields_min_max].mean(axis=1)\n", - "df[\"Score E\"] = df[fields_percentile].mean(axis=1)\n", - "\n", - "print(df[\"Score D\"].describe())\n", - "print(df[\"Score E\"].describe())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a02e5bac", - "metadata": {}, - "outputs": [], - "source": [ - "# Graph distributions\n", - "df.hist(\n", - " column=fields_min_max, layout=(len(fields_min_max), 1), figsize=(10, 30), bins=30\n", - ")\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0e608c8", - "metadata": {}, - "outputs": [], - "source": [ - "# Calculate correlations\n", - "df[fields_min_max].corr()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "729aed12", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Create percentiles for the scores\n", - "for score_field in [\"Score A\", \"Score B\", \"Score C\", \"Score D\", \"Score E\"]:\n", - " df[f\"{score_field}{PERCENTILE_FIELD_SUFFIX}\"] = df[score_field].rank(pct=True)\n", - " df[f\"{score_field} (top 25th percentile)\"] = (\n", - " df[f\"{score_field}{PERCENTILE_FIELD_SUFFIX}\"] >= 0.75\n", - " )\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b3a65af4", - "metadata": {}, - "outputs": [], - "source": [ - "# write nationwide csv\n", - "df.to_csv(SCORE_CSV_PATH / f\"usa.csv\", index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "58ddd8b3", - "metadata": {}, - "outputs": [], - "source": [ - "# write per state csvs\n", - "for states_fips in get_state_fips_codes(DATA_PATH):\n", - " print(f\"Generating data{states_fips} csv\")\n", - " df1 = df[df[\"GEOID10\"].str[:2] == states_fips]\n", - " # we need to name the file data01.csv for ogr2ogr csv merge to work\n", - " df1.to_csv(SCORE_CSV_PATH / f\"data{states_fips}.csv\", index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.1" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/score/poetry.lock b/score/poetry.lock index 08ffd4cf..6b12b2e3 100644 --- a/score/poetry.lock +++ b/score/poetry.lock @@ -76,6 +76,8 @@ mypy-extensions = ">=0.4.3" pathspec = ">=0.8.1,<1" regex = ">=2020.1.8" toml = ">=0.10.1" +typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\""} +typing-extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""} [package.extras] colorama = ["colorama (>=0.4.3)"] @@ -118,7 +120,7 @@ python-versions = "*" [[package]] name = "cffi" -version = "1.14.5" +version = "1.14.6" description = "Foreign Function Interface for Python calling C code." category = "main" optional = false @@ -145,6 +147,7 @@ python-versions = ">=3.6" [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} [[package]] name = "colorama" @@ -154,6 +157,14 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "debugpy" +version = "1.3.0" +description = "An implementation of the Debug Adapter Protocol for Python" +category = "main" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" + [[package]] name = "decorator" version = "5.0.9" @@ -203,27 +214,45 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +[[package]] +name = "importlib-metadata" +version = "3.10.1" +description = "Read metadata from Python packages" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = ">=0.5" + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] +testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"] + [[package]] name = "ipykernel" -version = "5.5.5" +version = "6.0.1" description = "IPython Kernel for Jupyter" category = "main" optional = false -python-versions = ">=3.5" +python-versions = ">=3.7" [package.dependencies] -appnope = {version = "*", markers = "platform_system == \"Darwin\""} -ipython = ">=5.0.0" +debugpy = ">=1.0.0" +importlib-metadata = {version = "<4", markers = "python_version < \"3.8.0\""} +ipython = ">=7.23.1" jupyter-client = "*" +matplotlib-inline = {version = ">=0.1.0,<0.2.0appnope", markers = "platform_system == \"Darwin\""} tornado = ">=4.2" traitlets = ">=4.1.0" [package.extras] -test = ["pytest (!=5.3.4)", "pytest-cov", "flaky", "nose", "jedi (<=0.17.2)"] +test = ["pytest (!=5.3.4)", "pytest-cov", "flaky", "nose", "ipyparallel"] [[package]] name = "ipython" -version = "7.24.1" +version = "7.25.0" description = "IPython: Productive Interactive Computing" category = "main" optional = false @@ -319,6 +348,7 @@ python-versions = "*" [package.dependencies] attrs = ">=17.4.0" +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} pyrsistent = ">=0.14.0" six = ">=1.11.0" @@ -547,6 +577,7 @@ python-versions = ">=3.5" [package.dependencies] mypy-extensions = ">=0.4.3,<0.5.0" toml = "*" +typed-ast = {version = ">=1.4.0,<1.5.0", markers = "python_version < \"3.8\""} typing-extensions = ">=3.7.4" [package.extras] @@ -676,30 +707,30 @@ python-versions = ">=3.7" [[package]] name = "packaging" -version = "20.9" +version = "21.0" description = "Core utilities for Python packages" category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.6" [package.dependencies] pyparsing = ">=2.0.2" [[package]] name = "pandas" -version = "1.2.5" +version = "1.3.0" description = "Powerful data structures for data analysis, time series, and statistics" category = "main" optional = false python-versions = ">=3.7.1" [package.dependencies] -numpy = ">=1.16.5" +numpy = ">=1.17.3" python-dateutil = ">=2.7.3" pytz = ">=2017.3" [package.extras] -test = ["pytest (>=5.0.1)", "pytest-xdist", "hypothesis (>=3.58)"] +test = ["hypothesis (>=3.58)", "pytest (>=6.0)", "pytest-xdist"] [[package]] name = "pandocfilters" @@ -812,11 +843,11 @@ python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" [[package]] name = "pyrsistent" -version = "0.17.3" +version = "0.18.0" description = "Persistent/Functional/Immutable data structures" category = "main" optional = false -python-versions = ">=3.5" +python-versions = ">=3.6" [[package]] name = "python-dateutil" @@ -847,7 +878,7 @@ python-versions = "*" [[package]] name = "pywinpty" -version = "1.1.2" +version = "1.1.3" description = "Pseudo terminal support for Windows from Python." category = "main" optional = false @@ -875,7 +906,7 @@ py = {version = "*", markers = "implementation_name == \"pypy\""} [[package]] name = "qtconsole" -version = "5.1.0" +version = "5.1.1" description = "Jupyter Qt console" category = "main" optional = false @@ -905,7 +936,7 @@ python-versions = "*" [[package]] name = "regex" -version = "2021.4.4" +version = "2021.7.6" description = "Alternative regular expression module, to replace re." category = "dev" optional = false @@ -1005,6 +1036,14 @@ ipython-genutils = "*" [package.extras] test = ["pytest"] +[[package]] +name = "typed-ast" +version = "1.4.3" +description = "a fork of Python 2 and 3 ast modules with type comment support" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "types-requests" version = "2.25.0" @@ -1017,13 +1056,13 @@ python-versions = "*" name = "typing-extensions" version = "3.10.0.0" description = "Backported and Experimental Type Hints for Python 3.5+" -category = "dev" +category = "main" optional = false python-versions = "*" [[package]] name = "urllib3" -version = "1.26.5" +version = "1.26.6" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false @@ -1061,10 +1100,22 @@ python-versions = "*" [package.dependencies] notebook = ">=4.4.1" +[[package]] +name = "zipp" +version = "3.5.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] +testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"] + [metadata] lock-version = "1.1" -python-versions = "^3.9" -content-hash = "93144f580490e6f83370e233e8259e884973752d42460e632eff508240339db8" +python-versions = "^3.7.1" +content-hash = "52ad5ba35e7d93fa44ce1b0040073618f990a30bdd90b8828e7b3270bbaf8385" [metadata.files] appdirs = [ @@ -1127,55 +1178,46 @@ certifi = [ {file = "certifi-2021.5.30.tar.gz", hash = "sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee"}, ] cffi = [ - {file = "cffi-1.14.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:bb89f306e5da99f4d922728ddcd6f7fcebb3241fc40edebcb7284d7514741991"}, - {file = "cffi-1.14.5-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:34eff4b97f3d982fb93e2831e6750127d1355a923ebaeeb565407b3d2f8d41a1"}, - {file = "cffi-1.14.5-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:99cd03ae7988a93dd00bcd9d0b75e1f6c426063d6f03d2f90b89e29b25b82dfa"}, - {file = "cffi-1.14.5-cp27-cp27m-win32.whl", hash = "sha256:65fa59693c62cf06e45ddbb822165394a288edce9e276647f0046e1ec26920f3"}, - {file = "cffi-1.14.5-cp27-cp27m-win_amd64.whl", hash = "sha256:51182f8927c5af975fece87b1b369f722c570fe169f9880764b1ee3bca8347b5"}, - {file = "cffi-1.14.5-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:43e0b9d9e2c9e5d152946b9c5fe062c151614b262fda2e7b201204de0b99e482"}, - {file = "cffi-1.14.5-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:cbde590d4faaa07c72bf979734738f328d239913ba3e043b1e98fe9a39f8b2b6"}, - {file = "cffi-1.14.5-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:5de7970188bb46b7bf9858eb6890aad302577a5f6f75091fd7cdd3ef13ef3045"}, - {file = "cffi-1.14.5-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:a465da611f6fa124963b91bf432d960a555563efe4ed1cc403ba5077b15370aa"}, - {file = "cffi-1.14.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:d42b11d692e11b6634f7613ad8df5d6d5f8875f5d48939520d351007b3c13406"}, - {file = "cffi-1.14.5-cp35-cp35m-win32.whl", hash = "sha256:72d8d3ef52c208ee1c7b2e341f7d71c6fd3157138abf1a95166e6165dd5d4369"}, - {file = "cffi-1.14.5-cp35-cp35m-win_amd64.whl", hash = "sha256:29314480e958fd8aab22e4a58b355b629c59bf5f2ac2492b61e3dc06d8c7a315"}, - {file = "cffi-1.14.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3d3dd4c9e559eb172ecf00a2a7517e97d1e96de2a5e610bd9b68cea3925b4892"}, - {file = "cffi-1.14.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058"}, - {file = "cffi-1.14.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5"}, - {file = "cffi-1.14.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132"}, - {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24ec4ff2c5c0c8f9c6b87d5bb53555bf267e1e6f70e52e5a9740d32861d36b6f"}, - {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c3f39fa737542161d8b0d680df2ec249334cd70a8f420f71c9304bd83c3cbed"}, - {file = "cffi-1.14.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:681d07b0d1e3c462dd15585ef5e33cb021321588bebd910124ef4f4fb71aef55"}, - {file = "cffi-1.14.5-cp36-cp36m-win32.whl", hash = "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53"}, - {file = "cffi-1.14.5-cp36-cp36m-win_amd64.whl", hash = "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813"}, - {file = "cffi-1.14.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73"}, - {file = "cffi-1.14.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06"}, - {file = "cffi-1.14.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1"}, - {file = "cffi-1.14.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49"}, - {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06d7cd1abac2ffd92e65c0609661866709b4b2d82dd15f611e602b9b188b0b69"}, - {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f861a89e0043afec2a51fd177a567005847973be86f709bbb044d7f42fc4e05"}, - {file = "cffi-1.14.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc5a8e069b9ebfa22e26d0e6b97d6f9781302fe7f4f2b8776c3e1daea35f1adc"}, - {file = "cffi-1.14.5-cp37-cp37m-win32.whl", hash = "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62"}, - {file = "cffi-1.14.5-cp37-cp37m-win_amd64.whl", hash = "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4"}, - {file = "cffi-1.14.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053"}, - {file = "cffi-1.14.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0"}, - {file = "cffi-1.14.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e"}, - {file = "cffi-1.14.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827"}, - {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:04c468b622ed31d408fea2346bec5bbffba2cc44226302a0de1ade9f5ea3d373"}, - {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:06db6321b7a68b2bd6df96d08a5adadc1fa0e8f419226e25b2a5fbf6ccc7350f"}, - {file = "cffi-1.14.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:293e7ea41280cb28c6fcaaa0b1aa1f533b8ce060b9e701d78511e1e6c4a1de76"}, - {file = "cffi-1.14.5-cp38-cp38-win32.whl", hash = "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e"}, - {file = "cffi-1.14.5-cp38-cp38-win_amd64.whl", hash = "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396"}, - {file = "cffi-1.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea"}, - {file = "cffi-1.14.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322"}, - {file = "cffi-1.14.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c"}, - {file = "cffi-1.14.5-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee"}, - {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bf1ac1984eaa7675ca8d5745a8cb87ef7abecb5592178406e55858d411eadc0"}, - {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:df5052c5d867c1ea0b311fb7c3cd28b19df469c056f7fdcfe88c7473aa63e333"}, - {file = "cffi-1.14.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24a570cd11895b60829e941f2613a4f79df1a27344cbbb82164ef2e0116f09c7"}, - {file = "cffi-1.14.5-cp39-cp39-win32.whl", hash = "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396"}, - {file = "cffi-1.14.5-cp39-cp39-win_amd64.whl", hash = "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d"}, - {file = "cffi-1.14.5.tar.gz", hash = "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c"}, + {file = "cffi-1.14.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:22b9c3c320171c108e903d61a3723b51e37aaa8c81255b5e7ce102775bd01e2c"}, + {file = "cffi-1.14.6-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:f0c5d1acbfca6ebdd6b1e3eded8d261affb6ddcf2186205518f1428b8569bb99"}, + {file = "cffi-1.14.6-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:99f27fefe34c37ba9875f224a8f36e31d744d8083e00f520f133cab79ad5e819"}, + {file = "cffi-1.14.6-cp27-cp27m-win32.whl", hash = "sha256:55af55e32ae468e9946f741a5d51f9896da6b9bf0bbdd326843fec05c730eb20"}, + {file = "cffi-1.14.6-cp27-cp27m-win_amd64.whl", hash = "sha256:7bcac9a2b4fdbed2c16fa5681356d7121ecabf041f18d97ed5b8e0dd38a80224"}, + {file = "cffi-1.14.6-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:ed38b924ce794e505647f7c331b22a693bee1538fdf46b0222c4717b42f744e7"}, + {file = "cffi-1.14.6-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:e22dcb48709fc51a7b58a927391b23ab37eb3737a98ac4338e2448bef8559b33"}, + {file = "cffi-1.14.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e8c6a99be100371dbb046880e7a282152aa5d6127ae01783e37662ef73850d8f"}, + {file = "cffi-1.14.6-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:19ca0dbdeda3b2615421d54bef8985f72af6e0c47082a8d26122adac81a95872"}, + {file = "cffi-1.14.6-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:d950695ae4381ecd856bcaf2b1e866720e4ab9a1498cba61c602e56630ca7195"}, + {file = "cffi-1.14.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9dc245e3ac69c92ee4c167fbdd7428ec1956d4e754223124991ef29eb57a09d"}, + {file = "cffi-1.14.6-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8661b2ce9694ca01c529bfa204dbb144b275a31685a075ce123f12331be790b"}, + {file = "cffi-1.14.6-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b315d709717a99f4b27b59b021e6207c64620790ca3e0bde636a6c7f14618abb"}, + {file = "cffi-1.14.6-cp36-cp36m-win32.whl", hash = "sha256:80b06212075346b5546b0417b9f2bf467fea3bfe7352f781ffc05a8ab24ba14a"}, + {file = "cffi-1.14.6-cp36-cp36m-win_amd64.whl", hash = "sha256:a9da7010cec5a12193d1af9872a00888f396aba3dc79186604a09ea3ee7c029e"}, + {file = "cffi-1.14.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4373612d59c404baeb7cbd788a18b2b2a8331abcc84c3ba40051fcd18b17a4d5"}, + {file = "cffi-1.14.6-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:f10afb1004f102c7868ebfe91c28f4a712227fe4cb24974350ace1f90e1febbf"}, + {file = "cffi-1.14.6-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:fd4305f86f53dfd8cd3522269ed7fc34856a8ee3709a5e28b2836b2db9d4cd69"}, + {file = "cffi-1.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d6169cb3c6c2ad50db5b868db6491a790300ade1ed5d1da29289d73bbe40b56"}, + {file = "cffi-1.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d4b68e216fc65e9fe4f524c177b54964af043dde734807586cf5435af84045c"}, + {file = "cffi-1.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33791e8a2dc2953f28b8d8d300dde42dd929ac28f974c4b4c6272cb2955cb762"}, + {file = "cffi-1.14.6-cp37-cp37m-win32.whl", hash = "sha256:0c0591bee64e438883b0c92a7bed78f6290d40bf02e54c5bf0978eaf36061771"}, + {file = "cffi-1.14.6-cp37-cp37m-win_amd64.whl", hash = "sha256:8eb687582ed7cd8c4bdbff3df6c0da443eb89c3c72e6e5dcdd9c81729712791a"}, + {file = "cffi-1.14.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba6f2b3f452e150945d58f4badd92310449876c4c954836cfb1803bdd7b422f0"}, + {file = "cffi-1.14.6-cp38-cp38-manylinux1_i686.whl", hash = "sha256:64fda793737bc4037521d4899be780534b9aea552eb673b9833b01f945904c2e"}, + {file = "cffi-1.14.6-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9f3e33c28cd39d1b655ed1ba7247133b6f7fc16fa16887b120c0c670e35ce346"}, + {file = "cffi-1.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26bb2549b72708c833f5abe62b756176022a7b9a7f689b571e74c8478ead51dc"}, + {file = "cffi-1.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb687a11f0a7a1839719edd80f41e459cc5366857ecbed383ff376c4e3cc6afd"}, + {file = "cffi-1.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d2ad4d668a5c0645d281dcd17aff2be3212bc109b33814bbb15c4939f44181cc"}, + {file = "cffi-1.14.6-cp38-cp38-win32.whl", hash = "sha256:487d63e1454627c8e47dd230025780e91869cfba4c753a74fda196a1f6ad6548"}, + {file = "cffi-1.14.6-cp38-cp38-win_amd64.whl", hash = "sha256:c33d18eb6e6bc36f09d793c0dc58b0211fccc6ae5149b808da4a62660678b156"}, + {file = "cffi-1.14.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:06c54a68935738d206570b20da5ef2b6b6d92b38ef3ec45c5422c0ebaf338d4d"}, + {file = "cffi-1.14.6-cp39-cp39-manylinux1_i686.whl", hash = "sha256:f174135f5609428cc6e1b9090f9268f5c8935fddb1b25ccb8255a2d50de6789e"}, + {file = "cffi-1.14.6-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:f3ebe6e73c319340830a9b2825d32eb6d8475c1dac020b4f0aa774ee3b898d1c"}, + {file = "cffi-1.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c8d896becff2fa653dc4438b54a5a25a971d1f4110b32bd3068db3722c80202"}, + {file = "cffi-1.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4922cd707b25e623b902c86188aca466d3620892db76c0bdd7b99a3d5e61d35f"}, + {file = "cffi-1.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9e005e9bd57bc987764c32a1bee4364c44fdc11a3cc20a40b93b444984f2b87"}, + {file = "cffi-1.14.6-cp39-cp39-win32.whl", hash = "sha256:eb9e2a346c5238a30a746893f23a9535e700f8192a68c07c0258e7ece6ff3728"}, + {file = "cffi-1.14.6-cp39-cp39-win_amd64.whl", hash = "sha256:818014c754cd3dba7229c0f5884396264d51ffb87ec86e927ef0be140bfdb0d2"}, + {file = "cffi-1.14.6.tar.gz", hash = "sha256:c9a875ce9d7fe32887784274dd533c57909b7b1dcadcc128a2ac21331a9765dd"}, ] chardet = [ {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, @@ -1189,6 +1231,64 @@ colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, ] +debugpy = [ + {file = "debugpy-1.3.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:54109c9cbce8e96986a943812de8536d001130bce27d1a370b0c39bc7d6ef619"}, + {file = "debugpy-1.3.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:cd3e74f465bb71122481c27688cf09a3dd13fae18df30abfd51e513811fc7873"}, + {file = "debugpy-1.3.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:98c76193a924baddfbffd329a03d9d5722b0ea86a777db40263f257555ab0dba"}, + {file = "debugpy-1.3.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:313bcd88d40a65a6a9032ecd3aa83099f759839ec80677bac70285aa025112ba"}, + {file = "debugpy-1.3.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:1478532ed5d29626cf2acbe58213a22ce6d86af9b57716d2e4824a5ae750418b"}, + {file = "debugpy-1.3.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:0c017a26a489cf6c57fd9a51ec33718275d15cbb19cc29097e7efb0492a1def4"}, + {file = "debugpy-1.3.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d53e7b8dba67b390b43d891fd5459c49499fb274748ced89cada1f7dad95c414"}, + {file = "debugpy-1.3.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:dd1f907b2ea8b57dd26c315bd5c907a147f9b5f28ffec092c2572cab6d57e332"}, + {file = "debugpy-1.3.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:8ca653751aa728cf620c8fddc9c6200511fcc2e7d0a6ed615d246fdca1df5201"}, + {file = "debugpy-1.3.0-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:de92459af4b0079437fae79f10469488ef1566942028847e4bac780e079a5a88"}, + {file = "debugpy-1.3.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:996439d56a0a2f38ea2c0a4d88874a56815585120a3dedd03422b1e3678875f1"}, + {file = "debugpy-1.3.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:a696ac566adc8b6aca3e7eb3bd2bd7b71d61f4721f42bf2e504f4166769ea4d3"}, + {file = "debugpy-1.3.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:04b6730cc4149d3fd947e351e8a2cf18cd31fd4c8ba46872921dd54c4eee2acc"}, + {file = "debugpy-1.3.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:81cfd83a911b454c36b677d0bc722c35acd978e1856d5550e71c1226af9c143c"}, + {file = "debugpy-1.3.0-cp35-cp35m-manylinux2014_i686.whl", hash = "sha256:e6f344db72fa9773ab52a1f527bb1b517e8426a13611a68aae5db587d1996bc1"}, + {file = "debugpy-1.3.0-cp35-cp35m-manylinux2014_x86_64.whl", hash = "sha256:c28a4a74082bf7c06553e5002ad505d4119d0b4425a70570368082bcb222d8f2"}, + {file = "debugpy-1.3.0-cp35-cp35m-win32.whl", hash = "sha256:37d06369b46d2013768494cf18e0568834d89ba52698a695358d12411ac9cf65"}, + {file = "debugpy-1.3.0-cp35-cp35m-win_amd64.whl", hash = "sha256:0777fff5d8ce086383bbb6017ab7a4300f29c02565aa72a4533f0c815898d44b"}, + {file = "debugpy-1.3.0-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:e658b89c9e3eab39bbbe56d3e086ffc0b3266817788cb5aa6669f194620b3951"}, + {file = "debugpy-1.3.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:61c6c77b3ea3098dfd78f2ff4ce27565145a293af995f817f2475d02a2145b6d"}, + {file = "debugpy-1.3.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:fab455f6c811f98f3d669b23eb99623200929eef9c0a8a8f1052aeba89346f93"}, + {file = "debugpy-1.3.0-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:91ff6c5ea619a0a3bfdc49587d2f05198c1849d8888632f96d2f855e4e88a21a"}, + {file = "debugpy-1.3.0-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:709bc213b0b31665e00a3547cb92b2760b948b6473dbd56fe0a5ff1fa1202e80"}, + {file = "debugpy-1.3.0-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:813075f9ff6795187417109fff11819b23a92169b98b56837d2a9c06eb81f15e"}, + {file = "debugpy-1.3.0-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:d15b0be81c9a448346ed0a7c19d9c88f60ccfb53f66e5e4ec99320d9dcd4fe4e"}, + {file = "debugpy-1.3.0-cp36-cp36m-win32.whl", hash = "sha256:1b7929baf506d897d170adbb9a99b83b6453acb2d7b10780eb46cb697522529c"}, + {file = "debugpy-1.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:062b87923f78636217617c8de2c16c9846612f30d12f3b51c0eb194739963003"}, + {file = "debugpy-1.3.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:231851bec777e210cebb247b8a57ae35d4bc213b190b05d95556e52a0a765ccf"}, + {file = "debugpy-1.3.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:a332717a0778d55ca4629fb0b4a016affa06151a9822af940552497a77aac7ce"}, + {file = "debugpy-1.3.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:bcdffa215de49033aac273facbc4c2413a137b6e2b6694ac7ae04a88f38e4eba"}, + {file = "debugpy-1.3.0-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:33ce42e58977d811d974a1f30352d2822a0f2e7160f0e6211753da3027fcf442"}, + {file = "debugpy-1.3.0-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:71f634cf1eb52c825a000300e031c52e789337754237745a4d31560ce0041c9c"}, + {file = "debugpy-1.3.0-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:8e26ce355631f80f044bf0c97fd2d8db0b83b43b6fa8abac956108e58c79f522"}, + {file = "debugpy-1.3.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:72c3cb415cdf42c7ff26ee2aebe3095bc136ed3065d1f60d76feebe47b1980a6"}, + {file = "debugpy-1.3.0-cp37-cp37m-win32.whl", hash = "sha256:9b4304cc2ddedcefdc7ac0d6499a246aff6c981b58bfbd89f4103c0584e200e5"}, + {file = "debugpy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:bd307ceabb2b17328e84cc0416bd6c0181de78d4f920510017f4fc7590afc2d9"}, + {file = "debugpy-1.3.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:b3e2d0256736e77acfa1c05c35ed0f7b00a17a7d7da45e47d0705c5a2fc31256"}, + {file = "debugpy-1.3.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:de28c434abb8179b05afaa8a0447fff36980f397ef6c64a6c825a26c5258b67f"}, + {file = "debugpy-1.3.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:9c858b3bc1a28b30d06df0bdb02a7a5e7a146f986b0d5e4c438cc1940d121bce"}, + {file = "debugpy-1.3.0-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:028fd23004a4f86e37767efa1c285ee74ee2c5cd9b02f9dff62be0ce17429ad9"}, + {file = "debugpy-1.3.0-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:068db6d85b69500f76fb28ac2b8d6dcedb6d9e405fbffb39489651eb56e793f0"}, + {file = "debugpy-1.3.0-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:0ba4dd246588740f17725841be08c7368c1f2df706bb65dd85998c5809809c8e"}, + {file = "debugpy-1.3.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:2a8246403058457e8f777853af52a61402cf8596d6b9442de1112038495b5603"}, + {file = "debugpy-1.3.0-cp38-cp38-win32.whl", hash = "sha256:d678f48f2fd14716839e7e5b560eacbebddb0cc95832998dd020010e20a1cd9e"}, + {file = "debugpy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:7a1df03e909e8b3f9eb45e2d3495e290df8fe9df1b903957b144125635b5ecf6"}, + {file = "debugpy-1.3.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:7cd804d531e6c932ffb87766746bca111c9470b6c7877340df9ed3edd66d7c7c"}, + {file = "debugpy-1.3.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:5f7aeae9c8d7b77d8bad23d82723585949d4ef32fc4eb769e28f1d33319a28b0"}, + {file = "debugpy-1.3.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:a24d65a295875d6f7b063bbc100240523537aff3380d33c1205819ebf213e340"}, + {file = "debugpy-1.3.0-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:63acc9e755c1ae426c223b0596ac098b773a633091121c997086b7bd50faa1e0"}, + {file = "debugpy-1.3.0-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:4558ac356f3a6d46d3b3fb92bf4c053b87fd3903cf4022f10425e811c62a0514"}, + {file = "debugpy-1.3.0-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:d1254de50f25623df4ff90512f4dd5734874438680f6ad284daa9af1c622f504"}, + {file = "debugpy-1.3.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:29252f8253b1cbd5a4786d41d0d44835bd8152f910af109a48eebf1d0b66a40c"}, + {file = "debugpy-1.3.0-cp39-cp39-win32.whl", hash = "sha256:9c3cb1f0324dcaf5e1dcc64013dbe959112724c8f58a558fc804741a54a90f14"}, + {file = "debugpy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:68905f3bc59b7d903724e040f80bd89c9d649d67473f09d6912908a4c46f971e"}, + {file = "debugpy-1.3.0-py2.py3-none-any.whl", hash = "sha256:8e3002cfb2ebf570f19fd060950e459a071630f6767f7e44804ac5a67ef57baf"}, + {file = "debugpy-1.3.0.zip", hash = "sha256:71ab9068e87a28cfbb7a7db041a946ac5493d45d0c61280021af038e14a64232"}, +] decorator = [ {file = "decorator-5.0.9-py3-none-any.whl", hash = "sha256:6e5c199c16f7a9f0e3a61a4a54b3d27e7dad0dbdde92b944426cb20914376323"}, {file = "decorator-5.0.9.tar.gz", hash = "sha256:72ecfba4320a893c53f9706bebb2d55c270c1e51a28789361aa93e4a21319ed5"}, @@ -1209,13 +1309,17 @@ idna = [ {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, ] +importlib-metadata = [ + {file = "importlib_metadata-3.10.1-py3-none-any.whl", hash = "sha256:2ec0faae539743ae6aaa84b49a169670a465f7f5d64e6add98388cc29fd1f2f6"}, + {file = "importlib_metadata-3.10.1.tar.gz", hash = "sha256:c9356b657de65c53744046fa8f7358afe0714a1af7d570c00c3835c2d724a7c1"}, +] ipykernel = [ - {file = "ipykernel-5.5.5-py3-none-any.whl", hash = "sha256:29eee66548ee7c2edb7941de60c0ccf0a7a8dd957341db0a49c5e8e6a0fcb712"}, - {file = "ipykernel-5.5.5.tar.gz", hash = "sha256:e976751336b51082a89fc2099fb7f96ef20f535837c398df6eab1283c2070884"}, + {file = "ipykernel-6.0.1-py3-none-any.whl", hash = "sha256:9a8576cb70a70cc8c63b0b6671e5f4767917071204653a5934e9b2c8680cec74"}, + {file = "ipykernel-6.0.1.tar.gz", hash = "sha256:a4f51c53c7be3f93d75c25839183fa2dfa24908fc650dfd023b276c7a080dc73"}, ] ipython = [ - {file = "ipython-7.24.1-py3-none-any.whl", hash = "sha256:d513e93327cf8657d6467c81f1f894adc125334ffe0e4ddd1abbb1c78d828703"}, - {file = "ipython-7.24.1.tar.gz", hash = "sha256:9bc24a99f5d19721fb8a2d1408908e9c0520a17fff2233ffe82620847f17f1b6"}, + {file = "ipython-7.25.0-py3-none-any.whl", hash = "sha256:aa21412f2b04ad1a652e30564fff6b4de04726ce875eab222c8430edc6db383a"}, + {file = "ipython-7.25.0.tar.gz", hash = "sha256:54bbd1fe3882457aaf28ae060a5ccdef97f212a741754e420028d4ec5c2291dc"}, ] ipython-genutils = [ {file = "ipython_genutils-0.2.0-py2.py3-none-any.whl", hash = "sha256:72dd37233799e619666c9f639a9da83c34013a73e8bbc79a7a6348d93c61fab8"}, @@ -1452,28 +1556,29 @@ numpy = [ {file = "numpy-1.21.0.zip", hash = "sha256:e80fe25cba41c124d04c662f33f6364909b985f2eb5998aaa5ae4b9587242cce"}, ] packaging = [ - {file = "packaging-20.9-py2.py3-none-any.whl", hash = "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"}, - {file = "packaging-20.9.tar.gz", hash = "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5"}, + {file = "packaging-21.0-py3-none-any.whl", hash = "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"}, + {file = "packaging-21.0.tar.gz", hash = "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7"}, ] pandas = [ - {file = "pandas-1.2.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1102d719038e134e648e7920672188a00375f3908f0383fd3b202fbb9d2c3a95"}, - {file = "pandas-1.2.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:38e7486410de23069392bdf1dc7297ae75d2d67531750753f3149c871cd1c6e3"}, - {file = "pandas-1.2.5-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:94ca6ea3f46f44a979a38a4d5a70a88cee734f7248d7aeeed202e6b3ba485af1"}, - {file = "pandas-1.2.5-cp37-cp37m-win32.whl", hash = "sha256:821d92466fcd2826656374a9b6fe4f2ec2ba5e370cce71d5a990577929d948df"}, - {file = "pandas-1.2.5-cp37-cp37m-win_amd64.whl", hash = "sha256:0dbd125b0e44e5068163cbc9080a00db1756a5e36309329ae14fd259747f2300"}, - {file = "pandas-1.2.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7b09293c7119ab22ab3f7f086f813ac2acbfa3bcaaaeb650f4cddfb5b9fa9be4"}, - {file = "pandas-1.2.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc9215dd1dd836ff26b896654e66b2dfcf4bbb18aa4c1089a79bab527b665a90"}, - {file = "pandas-1.2.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e36515163829e0e95a6af10820f178dd8768102482c01872bff8ae592e508e58"}, - {file = "pandas-1.2.5-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0c34b89215f984a9e4956446e0a29330d720085efa08ea72022387ee37d8b373"}, - {file = "pandas-1.2.5-cp38-cp38-win32.whl", hash = "sha256:f20e4b8a7909f5a0c0a9e745091e3ea18b45af9f73496a4d498688badbdac7ea"}, - {file = "pandas-1.2.5-cp38-cp38-win_amd64.whl", hash = "sha256:9244fb0904512b074d8c6362fb13aac1da6c4db94372760ddb2565c620240264"}, - {file = "pandas-1.2.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c862cd72353921c102166784fc4db749f1c3b691dd017fc36d9df2c67a9afe4e"}, - {file = "pandas-1.2.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e6edddeac9a8e473391d2d2067bb3c9dc7ad79fd137af26a39ee425c2b4c78"}, - {file = "pandas-1.2.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a67227e17236442c6bc31c02cb713b5277b26eee204eac14b5aecba52492e3a3"}, - {file = "pandas-1.2.5-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:4bfbf62b00460f78a8bc4407112965c5ab44324f34551e8e1f4cac271a07706c"}, - {file = "pandas-1.2.5-cp39-cp39-win32.whl", hash = "sha256:25fc8ef6c6beb51c9224284a1ad89dfb591832f23ceff78845f182de35c52356"}, - {file = "pandas-1.2.5-cp39-cp39-win_amd64.whl", hash = "sha256:78de96c1174bcfdbe8dece9c38c2d7994e407fd8bb62146bb46c61294bcc06ef"}, - {file = "pandas-1.2.5.tar.gz", hash = "sha256:14abb8ea73fce8aebbb1fb44bec809163f1c55241bcc1db91c2c780e97265033"}, + {file = "pandas-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c81b8d91e9ae861eb4406b4e0f8d4dabbc105b9c479b3d1e921fba1d35b5b62a"}, + {file = "pandas-1.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08eeff3da6a188e24db7f292b39a8ca9e073bf841fbbeadb946b3ad5c19d843e"}, + {file = "pandas-1.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:88864c1e28353b958b1f30e4193818519624ad9a1776921622a6a2a016d5d807"}, + {file = "pandas-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:872aa91e0f9ca913046ab639d4181a899f5e592030d954d28c2529b88756a736"}, + {file = "pandas-1.3.0-cp37-cp37m-win32.whl", hash = "sha256:92835113a67cbd34747c198d41f09f4b63f6fe11ca5643baebc7ab1e30e89e95"}, + {file = "pandas-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:7d3cd2c99faa94d717ca00ea489264a291ad7209453dffbf059bfb7971fd3a61"}, + {file = "pandas-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:823737830364d0e2af8c3912a28ba971296181a07950873492ed94e12d28c405"}, + {file = "pandas-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c746876cdd8380be0c3e70966d4566855901ac9aaa5e4b9ccaa5ca5311457d11"}, + {file = "pandas-1.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe7a549d10ca534797095586883a5c17d140d606747591258869c56e14d1b457"}, + {file = "pandas-1.3.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:f058c786e7b0a9e7fa5e0b9f4422e0ccdd3bf3aa3053c18d77ed2a459bd9a45a"}, + {file = "pandas-1.3.0-cp38-cp38-win32.whl", hash = "sha256:98efc2d4983d5bb47662fe2d97b2c81b91566cb08b266490918b9c7d74a5ef64"}, + {file = "pandas-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e6b75091fa54a53db3927b4d1bc997c23c5ba6f87acdfe1ee5a92c38c6b2ed6a"}, + {file = "pandas-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1ff13eed501e07e7fb26a4ea18a846b6e5d7de549b497025601fd9ccb7c1d123"}, + {file = "pandas-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:798675317d0e4863a92a9a6bc5bd2490b5f6fef8c17b95f29e2e33f28bef9eca"}, + {file = "pandas-1.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ed4fc66f23fe17c93a5d439230ca2d6b5f8eac7154198d327dbe8a16d98f3f10"}, + {file = "pandas-1.3.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:522bfea92f3ef6207cadc7428bda1e7605dae0383b8065030e7b5d0266717b48"}, + {file = "pandas-1.3.0-cp39-cp39-win32.whl", hash = "sha256:7897326cae660eee69d501cbfa950281a193fcf407393965e1bc07448e1cc35a"}, + {file = "pandas-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:b10d7910ae9d7920a5ff7816d794d99acbc361f7b16a0f017d4fa83ced8cb55e"}, + {file = "pandas-1.3.0.tar.gz", hash = "sha256:c554e6c9cf2d5ea1aba5979cc837b3649539ced0e18ece186f055450c86622e2"}, ] pandocfilters = [ {file = "pandocfilters-1.4.3.tar.gz", hash = "sha256:bc63fbb50534b4b1f8ebe1860889289e8af94a23bff7445259592df25a3906eb"}, @@ -1523,7 +1628,27 @@ pyparsing = [ {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"}, ] pyrsistent = [ - {file = "pyrsistent-0.17.3.tar.gz", hash = "sha256:2e636185d9eb976a18a8a8e96efce62f2905fea90041958d8cc2a189756ebf3e"}, + {file = "pyrsistent-0.18.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f4c8cabb46ff8e5d61f56a037974228e978f26bfefce4f61a4b1ac0ba7a2ab72"}, + {file = "pyrsistent-0.18.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:da6e5e818d18459fa46fac0a4a4e543507fe1110e808101277c5a2b5bab0cd2d"}, + {file = "pyrsistent-0.18.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:5e4395bbf841693eaebaa5bb5c8f5cdbb1d139e07c975c682ec4e4f8126e03d2"}, + {file = "pyrsistent-0.18.0-cp36-cp36m-win32.whl", hash = "sha256:527be2bfa8dc80f6f8ddd65242ba476a6c4fb4e3aedbf281dfbac1b1ed4165b1"}, + {file = "pyrsistent-0.18.0-cp36-cp36m-win_amd64.whl", hash = "sha256:2aaf19dc8ce517a8653746d98e962ef480ff34b6bc563fc067be6401ffb457c7"}, + {file = "pyrsistent-0.18.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:58a70d93fb79dc585b21f9d72487b929a6fe58da0754fa4cb9f279bb92369396"}, + {file = "pyrsistent-0.18.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:4916c10896721e472ee12c95cdc2891ce5890898d2f9907b1b4ae0f53588b710"}, + {file = "pyrsistent-0.18.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:73ff61b1411e3fb0ba144b8f08d6749749775fe89688093e1efef9839d2dcc35"}, + {file = "pyrsistent-0.18.0-cp37-cp37m-win32.whl", hash = "sha256:b29b869cf58412ca5738d23691e96d8aff535e17390128a1a52717c9a109da4f"}, + {file = "pyrsistent-0.18.0-cp37-cp37m-win_amd64.whl", hash = "sha256:097b96f129dd36a8c9e33594e7ebb151b1515eb52cceb08474c10a5479e799f2"}, + {file = "pyrsistent-0.18.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:772e94c2c6864f2cd2ffbe58bb3bdefbe2a32afa0acb1a77e472aac831f83427"}, + {file = "pyrsistent-0.18.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:c1a9ff320fa699337e05edcaae79ef8c2880b52720bc031b219e5b5008ebbdef"}, + {file = "pyrsistent-0.18.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cd3caef37a415fd0dae6148a1b6957a8c5f275a62cca02e18474608cb263640c"}, + {file = "pyrsistent-0.18.0-cp38-cp38-win32.whl", hash = "sha256:e79d94ca58fcafef6395f6352383fa1a76922268fa02caa2272fff501c2fdc78"}, + {file = "pyrsistent-0.18.0-cp38-cp38-win_amd64.whl", hash = "sha256:a0c772d791c38bbc77be659af29bb14c38ced151433592e326361610250c605b"}, + {file = "pyrsistent-0.18.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d5ec194c9c573aafaceebf05fc400656722793dac57f254cd4741f3c27ae57b4"}, + {file = "pyrsistent-0.18.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:6b5eed00e597b5b5773b4ca30bd48a5774ef1e96f2a45d105db5b4ebb4bca680"}, + {file = "pyrsistent-0.18.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:48578680353f41dca1ca3dc48629fb77dfc745128b56fc01096b2530c13fd426"}, + {file = "pyrsistent-0.18.0-cp39-cp39-win32.whl", hash = "sha256:f3ef98d7b76da5eb19c37fda834d50262ff9167c65658d1d8f974d2e4d90676b"}, + {file = "pyrsistent-0.18.0-cp39-cp39-win_amd64.whl", hash = "sha256:404e1f1d254d314d55adb8d87f4f465c8693d6f902f67eb6ef5b4526dc58e6ea"}, + {file = "pyrsistent-0.18.0.tar.gz", hash = "sha256:773c781216f8c2900b42a7b638d5b517bb134ae1acbebe4d1e8f1f41ea60eb4b"}, ] python-dateutil = [ {file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"}, @@ -1546,11 +1671,11 @@ pywin32 = [ {file = "pywin32-301-cp39-cp39-win_amd64.whl", hash = "sha256:87604a4087434cd814ad8973bd47d6524bd1fa9e971ce428e76b62a5e0860fdf"}, ] pywinpty = [ - {file = "pywinpty-1.1.2-cp36-none-win_amd64.whl", hash = "sha256:7bb1b8380bc71bf04a983e803746b1ea7b8a91765723a82e108df81538b258c1"}, - {file = "pywinpty-1.1.2-cp37-none-win_amd64.whl", hash = "sha256:951f1b988c2407e9bd0c5c9b199f588673769abf0c8cb4724a01bc0666b97b0a"}, - {file = "pywinpty-1.1.2-cp38-none-win_amd64.whl", hash = "sha256:b3a38a0afb63b639ca4f78f67f4f8caa78ca470bd71b146480ef37d86cc99823"}, - {file = "pywinpty-1.1.2-cp39-none-win_amd64.whl", hash = "sha256:eac78a3ff69ce443ad9f67620bc60469f6354b18388570c63af6fc643beae498"}, - {file = "pywinpty-1.1.2.tar.gz", hash = "sha256:f1718838e1c7c700e5f0b79d5d5e05243ff583313ff88e47bb94318ba303e565"}, + {file = "pywinpty-1.1.3-cp36-none-win_amd64.whl", hash = "sha256:81dc6f16d917b756e06fc58943e9750d59dbefc0ffd2086871d3fa5f33824446"}, + {file = "pywinpty-1.1.3-cp37-none-win_amd64.whl", hash = "sha256:54557887e712ea3215ab0d9f089ed55a6cc8d826cd5d1e340d75300654c9663f"}, + {file = "pywinpty-1.1.3-cp38-none-win_amd64.whl", hash = "sha256:f5e25197397f1fef0362caf3eb89f25441827a1e48bf15827c27021592fd2160"}, + {file = "pywinpty-1.1.3-cp39-none-win_amd64.whl", hash = "sha256:b767276224f86b7560eb9173ba7956758cafcdfab97bb33837d42d2a0f1dbf67"}, + {file = "pywinpty-1.1.3.tar.gz", hash = "sha256:3a1d57b338390333812a5eed31c93c7d8ba82b131078063703e731946d90c9f2"}, ] pyyaml = [ {file = "PyYAML-5.4.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922"}, @@ -1618,55 +1743,55 @@ pyzmq = [ {file = "pyzmq-22.1.0.tar.gz", hash = "sha256:7040d6dd85ea65703904d023d7f57fab793d7ffee9ba9e14f3b897f34ff2415d"}, ] qtconsole = [ - {file = "qtconsole-5.1.0-py3-none-any.whl", hash = "sha256:3a2adecc43ff201a08972fb2179df22e7b3a08d71b9ed680f46ad1bfd4fb9132"}, - {file = "qtconsole-5.1.0.tar.gz", hash = "sha256:12c734494901658787339dea9bbd82f3dc0d5e394071377a1c77b4a0954d7d8b"}, + {file = "qtconsole-5.1.1-py3-none-any.whl", hash = "sha256:73994105b0369bb99f4164df4a131010f3c7b33a7b5169c37366358d8744675b"}, + {file = "qtconsole-5.1.1.tar.gz", hash = "sha256:bbc34bca14f65535afcb401bc74b752bac955e5313001ba640383f7e5857dc49"}, ] qtpy = [ {file = "QtPy-1.9.0-py2.py3-none-any.whl", hash = "sha256:fa0b8363b363e89b2a6f49eddc162a04c0699ae95e109a6be3bb145a913190ea"}, {file = "QtPy-1.9.0.tar.gz", hash = "sha256:2db72c44b55d0fe1407be8fba35c838ad0d6d3bb81f23007886dc1fc0f459c8d"}, ] regex = [ - {file = "regex-2021.4.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:619d71c59a78b84d7f18891fe914446d07edd48dc8328c8e149cbe0929b4e000"}, - {file = "regex-2021.4.4-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:47bf5bf60cf04d72bf6055ae5927a0bd9016096bf3d742fa50d9bf9f45aa0711"}, - {file = "regex-2021.4.4-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:281d2fd05555079448537fe108d79eb031b403dac622621c78944c235f3fcf11"}, - {file = "regex-2021.4.4-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:bd28bc2e3a772acbb07787c6308e00d9626ff89e3bfcdebe87fa5afbfdedf968"}, - {file = "regex-2021.4.4-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:7c2a1af393fcc09e898beba5dd59196edaa3116191cc7257f9224beaed3e1aa0"}, - {file = "regex-2021.4.4-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:c38c71df845e2aabb7fb0b920d11a1b5ac8526005e533a8920aea97efb8ec6a4"}, - {file = "regex-2021.4.4-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:96fcd1888ab4d03adfc9303a7b3c0bd78c5412b2bfbe76db5b56d9eae004907a"}, - {file = "regex-2021.4.4-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:ade17eb5d643b7fead300a1641e9f45401c98eee23763e9ed66a43f92f20b4a7"}, - {file = "regex-2021.4.4-cp36-cp36m-win32.whl", hash = "sha256:e8e5b509d5c2ff12f8418006d5a90e9436766133b564db0abaec92fd27fcee29"}, - {file = "regex-2021.4.4-cp36-cp36m-win_amd64.whl", hash = "sha256:11d773d75fa650cd36f68d7ca936e3c7afaae41b863b8c387a22aaa78d3c5c79"}, - {file = "regex-2021.4.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d3029c340cfbb3ac0a71798100ccc13b97dddf373a4ae56b6a72cf70dfd53bc8"}, - {file = "regex-2021.4.4-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:18c071c3eb09c30a264879f0d310d37fe5d3a3111662438889ae2eb6fc570c31"}, - {file = "regex-2021.4.4-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:4c557a7b470908b1712fe27fb1ef20772b78079808c87d20a90d051660b1d69a"}, - {file = "regex-2021.4.4-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:01afaf2ec48e196ba91b37451aa353cb7eda77efe518e481707e0515025f0cd5"}, - {file = "regex-2021.4.4-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:3a9cd17e6e5c7eb328517969e0cb0c3d31fd329298dd0c04af99ebf42e904f82"}, - {file = "regex-2021.4.4-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:90f11ff637fe8798933fb29f5ae1148c978cccb0452005bf4c69e13db951e765"}, - {file = "regex-2021.4.4-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:919859aa909429fb5aa9cf8807f6045592c85ef56fdd30a9a3747e513db2536e"}, - {file = "regex-2021.4.4-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:339456e7d8c06dd36a22e451d58ef72cef293112b559010db3d054d5560ef439"}, - {file = "regex-2021.4.4-cp37-cp37m-win32.whl", hash = "sha256:67bdb9702427ceddc6ef3dc382455e90f785af4c13d495f9626861763ee13f9d"}, - {file = "regex-2021.4.4-cp37-cp37m-win_amd64.whl", hash = "sha256:32e65442138b7b76dd8173ffa2cf67356b7bc1768851dded39a7a13bf9223da3"}, - {file = "regex-2021.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1e1c20e29358165242928c2de1482fb2cf4ea54a6a6dea2bd7a0e0d8ee321500"}, - {file = "regex-2021.4.4-cp38-cp38-manylinux1_i686.whl", hash = "sha256:314d66636c494ed9c148a42731b3834496cc9a2c4251b1661e40936814542b14"}, - {file = "regex-2021.4.4-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6d1b01031dedf2503631d0903cb563743f397ccaf6607a5e3b19a3d76fc10480"}, - {file = "regex-2021.4.4-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:741a9647fcf2e45f3a1cf0e24f5e17febf3efe8d4ba1281dcc3aa0459ef424dc"}, - {file = "regex-2021.4.4-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:4c46e22a0933dd783467cf32b3516299fb98cfebd895817d685130cc50cd1093"}, - {file = "regex-2021.4.4-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:e512d8ef5ad7b898cdb2d8ee1cb09a8339e4f8be706d27eaa180c2f177248a10"}, - {file = "regex-2021.4.4-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:980d7be47c84979d9136328d882f67ec5e50008681d94ecc8afa8a65ed1f4a6f"}, - {file = "regex-2021.4.4-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:ce15b6d103daff8e9fee13cf7f0add05245a05d866e73926c358e871221eae87"}, - {file = "regex-2021.4.4-cp38-cp38-win32.whl", hash = "sha256:a91aa8619b23b79bcbeb37abe286f2f408d2f2d6f29a17237afda55bb54e7aac"}, - {file = "regex-2021.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:c0502c0fadef0d23b128605d69b58edb2c681c25d44574fc673b0e52dce71ee2"}, - {file = "regex-2021.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:598585c9f0af8374c28edd609eb291b5726d7cbce16be6a8b95aa074d252ee17"}, - {file = "regex-2021.4.4-cp39-cp39-manylinux1_i686.whl", hash = "sha256:ee54ff27bf0afaf4c3b3a62bcd016c12c3fdb4ec4f413391a90bd38bc3624605"}, - {file = "regex-2021.4.4-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:7d9884d86dd4dd489e981d94a65cd30d6f07203d90e98f6f657f05170f6324c9"}, - {file = "regex-2021.4.4-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:bf5824bfac591ddb2c1f0a5f4ab72da28994548c708d2191e3b87dd207eb3ad7"}, - {file = "regex-2021.4.4-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:563085e55b0d4fb8f746f6a335893bda5c2cef43b2f0258fe1020ab1dd874df8"}, - {file = "regex-2021.4.4-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b9c3db21af35e3b3c05764461b262d6f05bbca08a71a7849fd79d47ba7bc33ed"}, - {file = "regex-2021.4.4-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:3916d08be28a1149fb97f7728fca1f7c15d309a9f9682d89d79db75d5e52091c"}, - {file = "regex-2021.4.4-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:fd45ff9293d9274c5008a2054ecef86a9bfe819a67c7be1afb65e69b405b3042"}, - {file = "regex-2021.4.4-cp39-cp39-win32.whl", hash = "sha256:fa4537fb4a98fe8fde99626e4681cc644bdcf2a795038533f9f711513a862ae6"}, - {file = "regex-2021.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:97f29f57d5b84e73fbaf99ab3e26134e6687348e95ef6b48cfd2c06807005a07"}, - {file = "regex-2021.4.4.tar.gz", hash = "sha256:52ba3d3f9b942c49d7e4bc105bb28551c44065f139a65062ab7912bef10c9afb"}, + {file = "regex-2021.7.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e6a1e5ca97d411a461041d057348e578dc344ecd2add3555aedba3b408c9f874"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:6afe6a627888c9a6cfbb603d1d017ce204cebd589d66e0703309b8048c3b0854"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:ccb3d2190476d00414aab36cca453e4596e8f70a206e2aa8db3d495a109153d2"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:ed693137a9187052fc46eedfafdcb74e09917166362af4cc4fddc3b31560e93d"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:99d8ab206a5270c1002bfcf25c51bf329ca951e5a169f3b43214fdda1f0b5f0d"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:b85ac458354165405c8a84725de7bbd07b00d9f72c31a60ffbf96bb38d3e25fa"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:3f5716923d3d0bfb27048242a6e0f14eecdb2e2a7fac47eda1d055288595f222"}, + {file = "regex-2021.7.6-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5983c19d0beb6af88cb4d47afb92d96751fb3fa1784d8785b1cdf14c6519407"}, + {file = "regex-2021.7.6-cp36-cp36m-win32.whl", hash = "sha256:c92831dac113a6e0ab28bc98f33781383fe294df1a2c3dfd1e850114da35fd5b"}, + {file = "regex-2021.7.6-cp36-cp36m-win_amd64.whl", hash = "sha256:791aa1b300e5b6e5d597c37c346fb4d66422178566bbb426dd87eaae475053fb"}, + {file = "regex-2021.7.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:59506c6e8bd9306cd8a41511e32d16d5d1194110b8cfe5a11d102d8b63cf945d"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:564a4c8a29435d1f2256ba247a0315325ea63335508ad8ed938a4f14c4116a5d"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:59c00bb8dd8775473cbfb967925ad2c3ecc8886b3b2d0c90a8e2707e06c743f0"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:9a854b916806c7e3b40e6616ac9e85d3cdb7649d9e6590653deb5b341a736cec"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:db2b7df831c3187a37f3bb80ec095f249fa276dbe09abd3d35297fc250385694"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:173bc44ff95bc1e96398c38f3629d86fa72e539c79900283afa895694229fe6a"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:15dddb19823f5147e7517bb12635b3c82e6f2a3a6b696cc3e321522e8b9308ad"}, + {file = "regex-2021.7.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ddeabc7652024803666ea09f32dd1ed40a0579b6fbb2a213eba590683025895"}, + {file = "regex-2021.7.6-cp37-cp37m-win32.whl", hash = "sha256:f080248b3e029d052bf74a897b9d74cfb7643537fbde97fe8225a6467fb559b5"}, + {file = "regex-2021.7.6-cp37-cp37m-win_amd64.whl", hash = "sha256:d8bbce0c96462dbceaa7ac4a7dfbbee92745b801b24bce10a98d2f2b1ea9432f"}, + {file = "regex-2021.7.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:edd1a68f79b89b0c57339bce297ad5d5ffcc6ae7e1afdb10f1947706ed066c9c"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux1_i686.whl", hash = "sha256:422dec1e7cbb2efbbe50e3f1de36b82906def93ed48da12d1714cabcd993d7f0"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:cbe23b323988a04c3e5b0c387fe3f8f363bf06c0680daf775875d979e376bd26"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:0eb2c6e0fcec5e0f1d3bcc1133556563222a2ffd2211945d7b1480c1b1a42a6f"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:1c78780bf46d620ff4fff40728f98b8afd8b8e35c3efd638c7df67be2d5cddbf"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:bc84fb254a875a9f66616ed4538542fb7965db6356f3df571d783f7c8d256edd"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:598c0a79b4b851b922f504f9f39a863d83ebdfff787261a5ed061c21e67dd761"}, + {file = "regex-2021.7.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:875c355360d0f8d3d827e462b29ea7682bf52327d500a4f837e934e9e4656068"}, + {file = "regex-2021.7.6-cp38-cp38-win32.whl", hash = "sha256:e586f448df2bbc37dfadccdb7ccd125c62b4348cb90c10840d695592aa1b29e0"}, + {file = "regex-2021.7.6-cp38-cp38-win_amd64.whl", hash = "sha256:2fe5e71e11a54e3355fa272137d521a40aace5d937d08b494bed4529964c19c4"}, + {file = "regex-2021.7.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6110bab7eab6566492618540c70edd4d2a18f40ca1d51d704f1d81c52d245026"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux1_i686.whl", hash = "sha256:4f64fc59fd5b10557f6cd0937e1597af022ad9b27d454e182485f1db3008f417"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:89e5528803566af4df368df2d6f503c84fbfb8249e6631c7b025fe23e6bd0cde"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2366fe0479ca0e9afa534174faa2beae87847d208d457d200183f28c74eaea59"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:f9392a4555f3e4cb45310a65b403d86b589adc773898c25a39184b1ba4db8985"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:2bceeb491b38225b1fee4517107b8491ba54fba77cf22a12e996d96a3c55613d"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:f98dc35ab9a749276f1a4a38ab3e0e2ba1662ce710f6530f5b0a6656f1c32b58"}, + {file = "regex-2021.7.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:319eb2a8d0888fa6f1d9177705f341bc9455a2c8aca130016e52c7fe8d6c37a3"}, + {file = "regex-2021.7.6-cp39-cp39-win32.whl", hash = "sha256:eaf58b9e30e0e546cdc3ac06cf9165a1ca5b3de8221e9df679416ca667972035"}, + {file = "regex-2021.7.6-cp39-cp39-win_amd64.whl", hash = "sha256:4c9c3155fe74269f61e27617529b7f09552fbb12e44b1189cebbdb24294e6e1c"}, + {file = "regex-2021.7.6.tar.gz", hash = "sha256:8394e266005f2d8c6f0bc6780001f7afa3ef81a7a2111fa35058ded6fce79e4d"}, ] requests = [ {file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"}, @@ -1739,6 +1864,38 @@ traitlets = [ {file = "traitlets-5.0.5-py3-none-any.whl", hash = "sha256:69ff3f9d5351f31a7ad80443c2674b7099df13cc41fc5fa6e2f6d3b0330b0426"}, {file = "traitlets-5.0.5.tar.gz", hash = "sha256:178f4ce988f69189f7e523337a3e11d91c786ded9360174a3d9ca83e79bc5396"}, ] +typed-ast = [ + {file = "typed_ast-1.4.3-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6"}, + {file = "typed_ast-1.4.3-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075"}, + {file = "typed_ast-1.4.3-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528"}, + {file = "typed_ast-1.4.3-cp35-cp35m-win32.whl", hash = "sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428"}, + {file = "typed_ast-1.4.3-cp35-cp35m-win_amd64.whl", hash = "sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3"}, + {file = "typed_ast-1.4.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f"}, + {file = "typed_ast-1.4.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341"}, + {file = "typed_ast-1.4.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace"}, + {file = "typed_ast-1.4.3-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f"}, + {file = "typed_ast-1.4.3-cp36-cp36m-win32.whl", hash = "sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363"}, + {file = "typed_ast-1.4.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7"}, + {file = "typed_ast-1.4.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266"}, + {file = "typed_ast-1.4.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e"}, + {file = "typed_ast-1.4.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04"}, + {file = "typed_ast-1.4.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899"}, + {file = "typed_ast-1.4.3-cp37-cp37m-win32.whl", hash = "sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c"}, + {file = "typed_ast-1.4.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805"}, + {file = "typed_ast-1.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a"}, + {file = "typed_ast-1.4.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff"}, + {file = "typed_ast-1.4.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41"}, + {file = "typed_ast-1.4.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39"}, + {file = "typed_ast-1.4.3-cp38-cp38-win32.whl", hash = "sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927"}, + {file = "typed_ast-1.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40"}, + {file = "typed_ast-1.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3"}, + {file = "typed_ast-1.4.3-cp39-cp39-manylinux1_i686.whl", hash = "sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4"}, + {file = "typed_ast-1.4.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0"}, + {file = "typed_ast-1.4.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3"}, + {file = "typed_ast-1.4.3-cp39-cp39-win32.whl", hash = "sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808"}, + {file = "typed_ast-1.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c"}, + {file = "typed_ast-1.4.3.tar.gz", hash = "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"}, +] types-requests = [ {file = "types-requests-2.25.0.tar.gz", hash = "sha256:ee0d0c507210141b7d5b8639cc43eaa726084178775db2a5fb06fbf85c185808"}, {file = "types_requests-2.25.0-py3-none-any.whl", hash = "sha256:fa5c1e5e832ff6193507d8da7e1159281383908ee193a2f4b37bc08140b51844"}, @@ -1749,8 +1906,8 @@ typing-extensions = [ {file = "typing_extensions-3.10.0.0.tar.gz", hash = "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342"}, ] urllib3 = [ - {file = "urllib3-1.26.5-py2.py3-none-any.whl", hash = "sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c"}, - {file = "urllib3-1.26.5.tar.gz", hash = "sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098"}, + {file = "urllib3-1.26.6-py2.py3-none-any.whl", hash = "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4"}, + {file = "urllib3-1.26.6.tar.gz", hash = "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"}, ] wcwidth = [ {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, @@ -1764,3 +1921,7 @@ widgetsnbextension = [ {file = "widgetsnbextension-3.5.1-py2.py3-none-any.whl", hash = "sha256:bd314f8ceb488571a5ffea6cc5b9fc6cba0adaf88a9d2386b93a489751938bcd"}, {file = "widgetsnbextension-3.5.1.tar.gz", hash = "sha256:079f87d87270bce047512400efd70238820751a11d2d8cb137a5a5bdbaf255c7"}, ] +zipp = [ + {file = "zipp-3.5.0-py3-none-any.whl", hash = "sha256:957cfda87797e389580cb8b9e3870841ca991e2125350677b2ca83a0e99390a3"}, + {file = "zipp-3.5.0.tar.gz", hash = "sha256:f5812b1e007e48cff63449a5e9f4e7ebea716b4111f9c4f9a645f91d579bf0c4"}, +] diff --git a/score/pyproject.toml b/score/pyproject.toml index 3f418822..2f1a0588 100644 --- a/score/pyproject.toml +++ b/score/pyproject.toml @@ -5,7 +5,7 @@ description = "ETL and Generation of Justice 40 Score" authors = ["Your Name "] [tool.poetry.dependencies] -python = "^3.9" +python = "^3.7.1" ipython = "^7.24.1" jupyter = "^1.0.0" jupyter-contrib-nbextensions = "^0.5.1" diff --git a/score/requirements.txt b/score/requirements.txt index 3fd8170b..a03b7f6c 100644 --- a/score/requirements.txt +++ b/score/requirements.txt @@ -1,4 +1,4 @@ -appnope==0.1.2; sys_platform == "darwin" and python_version >= "3.7" and platform_system == "Darwin" +appnope==0.1.2; sys_platform == "darwin" and python_version >= "3.7" argon2-cffi==20.1.0; python_version >= "3.6" async-generator==1.10; python_full_version >= "3.6.1" and python_version >= "3.7" attrs==21.2.0; python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.5" @@ -6,18 +6,20 @@ backcall==0.2.0; python_version >= "3.7" bleach==3.3.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" censusdata==1.13; python_version >= "2.7" certifi==2021.5.30; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7" -cffi==1.14.5; implementation_name == "pypy" and python_version >= "3.6" +cffi==1.14.6; implementation_name == "pypy" and python_version >= "3.6" chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7" click==8.0.1; python_version >= "3.6" colorama==0.4.4; python_version >= "3.7" and python_full_version < "3.0.0" and sys_platform == "win32" and platform_system == "Windows" or sys_platform == "win32" and python_version >= "3.7" and python_full_version >= "3.5.0" and platform_system == "Windows" +debugpy==1.3.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" decorator==5.0.9; python_version >= "3.7" defusedxml==0.7.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" dynaconf==3.1.4 entrypoints==0.3; python_version >= "3.7" idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7" -ipykernel==5.5.5; python_version >= "3.6" +importlib-metadata==3.10.1; python_version < "3.8" and python_version >= "3.7" +ipykernel==6.0.1; python_version >= "3.7" ipython-genutils==0.2.0; python_version >= "3.7" -ipython==7.24.1; python_version >= "3.7" +ipython==7.25.0; python_version >= "3.7" ipywidgets==7.6.3 jedi==0.18.0; python_version >= "3.7" jinja2==3.0.1; python_version >= "3.7" @@ -35,7 +37,7 @@ jupyterlab-pygments==0.1.2; python_version >= "3.7" jupyterlab-widgets==1.0.0; python_version >= "3.6" lxml==4.6.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" markupsafe==2.0.1; python_version >= "3.7" -matplotlib-inline==0.1.2; python_version >= "3.7" +matplotlib-inline==0.1.2; platform_system == "Darwin" and python_version >= "3.7" mistune==0.8.4; python_version >= "3.7" nbclient==0.5.3; python_full_version >= "3.6.1" and python_version >= "3.7" nbconvert==6.1.0; python_version >= "3.7" @@ -43,8 +45,8 @@ nbformat==5.1.3; python_full_version >= "3.6.1" and python_version >= "3.7" nest-asyncio==1.5.1; python_full_version >= "3.6.1" and python_version >= "3.7" notebook==6.4.0; python_version >= "3.6" numpy==1.21.0; python_version >= "3.7" -packaging==20.9; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" -pandas==1.2.5; python_full_version >= "3.7.1" +packaging==21.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" +pandas==1.3.0; python_full_version >= "3.7.1" pandocfilters==1.4.3; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.7" parso==0.8.2; python_version >= "3.7" pexpect==4.8.0; sys_platform != "win32" and python_version >= "3.7" @@ -56,24 +58,26 @@ py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" and implem pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" pygments==2.9.0; python_version >= "3.7" pyparsing==2.4.7; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" -pyrsistent==0.17.3; python_version >= "3.5" -python-dateutil==2.8.1; python_full_version >= "3.7.1" and python_version >= "3.5" +pyrsistent==0.18.0; python_version >= "3.6" +python-dateutil==2.8.1; python_full_version >= "3.7.1" and python_version >= "3.7" pytz==2021.1; python_full_version >= "3.7.1" and python_version >= "2.7" pywin32==301; sys_platform == "win32" and python_version >= "3.6" -pywinpty==1.1.2; os_name == "nt" and python_version >= "3.6" +pywinpty==1.1.3; os_name == "nt" and python_version >= "3.6" pyyaml==5.4.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" -pyzmq==22.1.0; python_full_version >= "3.6.1" and python_version >= "3.6" -qtconsole==5.1.0; python_version >= "3.6" +pyzmq==22.1.0; python_full_version >= "3.6.1" and python_version >= "3.7" +qtconsole==5.1.1; python_version >= "3.6" qtpy==1.9.0; python_version >= "3.6" requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") send2trash==1.7.1; python_version >= "3.6" six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.7" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.6") and (python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.5") terminado==0.10.1; python_version >= "3.6" testpath==0.5.0; python_version >= "3.7" -tornado==6.1; python_full_version >= "3.6.1" and python_version >= "3.6" +tornado==6.1; python_full_version >= "3.6.1" and python_version >= "3.7" traitlets==5.0.5; python_full_version >= "3.6.1" and python_version >= "3.7" types-requests==2.25.0 -urllib3==1.26.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4" and python_version >= "2.7" +typing-extensions==3.10.0.0; python_version < "3.8" and python_version >= "3.6" +urllib3==1.26.6; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4" and python_version >= "2.7" wcwidth==0.2.5; python_full_version >= "3.6.1" and python_version >= "3.7" webencodings==0.5.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" widgetsnbextension==3.5.1 +zipp==3.5.0; python_version < "3.8" and python_version >= "3.6" diff --git a/score/tile/generate.py b/score/tile/generate.py index 2762e399..8c6964fc 100644 --- a/score/tile/generate.py +++ b/score/tile/generate.py @@ -17,13 +17,6 @@ def generate_tiles(data_path: Path) -> None: if os.path.exists(mvt_tiles_path): shutil.rmtree(mvt_tiles_path) - # Merge scores into json - - if os.name == "nt": - pwd = "%cd%" - else: - pwd = "${PWD}" - # remove existing score json files score_geojson_dir = data_path / "score" / "geojson" files_in_directory = os.listdir(score_geojson_dir) @@ -36,12 +29,9 @@ def generate_tiles(data_path: Path) -> None: state_fips_codes = get_state_fips_codes() for fips in state_fips_codes: cmd = ( - 'docker run --rm -v "' - + pwd - + '"/:/home ' - + "osgeo/gdal:alpine-small-latest ogr2ogr -f GeoJSON " - + f"-sql \"SELECT * FROM tl_2010_{fips}_bg10 LEFT JOIN '/home/data/score/csv/data{fips}.csv'.data{fips} ON tl_2010_{fips}_bg10.GEOID10 = data{fips}.ID\" " - + f"/home/data/score/geojson/{fips}.json /home/data/census/shp/{fips}/tl_2010_{fips}_bg10.dbf" + "ogr2ogr -f GeoJSON " + + f"-sql \"SELECT * FROM tl_2010_{fips}_bg10 LEFT JOIN 'data/score/csv/data{fips}.csv'.data{fips} ON tl_2010_{fips}_bg10.GEOID10 = data{fips}.ID\" " + + f"data/score/geojson/{fips}.json data/census/shp/{fips}/tl_2010_{fips}_bg10.dbf" ) os.system(cmd) @@ -51,7 +41,7 @@ def generate_tiles(data_path: Path) -> None: geojson_path = data_path / "score" / "geojson" for file in os.listdir(geojson_path): if file.endswith(".json"): - geojson_list += f"/home/data/score/geojson/{file} " + geojson_list += f"data/score/geojson/{file} " if geojson_list == "": logging.error( @@ -59,28 +49,15 @@ def generate_tiles(data_path: Path) -> None: ) # generate mbtiles file - # PWD is different for Windows - if os.name == "nt": - pwd = "%cd%" - else: - pwd = "${PWD}" cmd = ( - 'docker run --rm -it -v "' - + pwd - + '"/:/home klokantech/tippecanoe tippecanoe --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 ' + "tippecanoe --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 " + geojson_list ) os.system(cmd) - # PWD is different for Windows - if os.name == "nt": - pwd = "%cd%" - else: - pwd = "${PWD}" + # generate mvts cmd = ( - 'docker run --rm -it -v "' - + pwd - + '"/:/home klokantech/tippecanoe tippecanoe --drop-densest-as-needed --no-tile-compression -zg -e /home/data/tiles/mvt ' + "tippecanoe --drop-densest-as-needed --no-tile-compression -zg -e /home/data/tiles/mvt " + geojson_list ) os.system(cmd) diff --git a/score/utils.py b/score/utils.py index dea5a3a6..bb915003 100644 --- a/score/utils.py +++ b/score/utils.py @@ -4,11 +4,20 @@ import logging import shutil import requests import zipfile +import urllib3 + +from config import settings -def get_module_logger(module_name): - """ - To use this, do logger = get_module_logger(__name__) +def get_module_logger(module_name: str) -> logging.Logger: + """Instantiates a logger object on stdout + + Args: + module_name (str): Name of the module outputting the logs + + Returns: + logger (Logging.logger): A logger object + """ logger = logging.getLogger(module_name) handler = logging.StreamHandler() @@ -25,6 +34,17 @@ logger = get_module_logger(__name__) def remove_files_from_dir(files_path: Path, extension: str = None) -> None: + """Removes all files from a specific directory with the exception of __init__.py + files or files with a specific extension + + Args: + files_path (pathlib.Path): Name of the directory where the files will be deleted + extension (str): Extension of the file pattern to delete, example "json" (optional) + + Returns: + None + + """ for file in os.listdir(files_path): if extension: if not file.endswith(extension): @@ -38,6 +58,15 @@ def remove_files_from_dir(files_path: Path, extension: str = None) -> None: def remove_all_from_dir(files_path: Path) -> None: + """Removes all files and directories from a specific directory, except __init__.py files + + Args: + files_path (pathlib.Path): Name of the directory where the files and directories will be deleted + + Returns: + None + + """ for file in os.listdir(files_path): # don't rempove __init__ files as they conserve dir structure if file == "__init__.py": @@ -50,6 +79,15 @@ def remove_all_from_dir(files_path: Path) -> None: def remove_all_dirs_from_dir(dir_path: Path) -> None: + """Removes all directories from a speficic directory + + Args: + dir_path (pathlib.Path): Name of the directory where the directories will be deleted + + Returns: + None + + """ for filename in os.listdir(dir_path): file_path = os.path.join(dir_path, filename) if os.path.isdir(file_path): @@ -58,8 +96,27 @@ def remove_all_dirs_from_dir(dir_path: Path) -> None: def unzip_file_from_url( - file_url: str, download_path: Path, zip_file_directory: Path, verify: bool = False + file_url: str, + download_path: Path, + unzipped_file_path: Path, + verify: bool = False, ) -> None: + """Downloads a zip file from a remote URL location and unzips it in a specific directory, removing the temporary file after + + Args: + file_url (str): URL where the zip file is located + download_path (pathlib.Path): directory where the temporary file will be downloaded (called downloaded.zip by default) + unzipped_file_path (pathlib.Path): directory and name of the extracted file + verify (bool): A flag to check if the certificate is valid. If truthy, an invalid certificate will throw an error (optional, default to False) + + Returns: + None + + """ + + # disable https warning + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + logger.info(f"Downloading {file_url}") download = requests.get(file_url, verify=verify) file_contents = download.content @@ -70,1018 +127,1050 @@ def unzip_file_from_url( logger.info(f"Extracting {zip_file_path}") with zipfile.ZipFile(zip_file_path, "r") as zip_ref: - zip_ref.extractall(zip_file_directory) + zip_ref.extractall(unzipped_file_path) # cleanup temporary file os.remove(zip_file_path) + +def data_folder_cleanup() -> None: + """Remove all files and directories from the local data/dataset path""" + + data_path = settings.APP_ROOT / "data" + + logger.info(f"Initializing all dataset directoriees") + remove_all_from_dir(data_path / "dataset") + + +def score_folder_cleanup() -> None: + """Remove all files and directories from the local data/score path""" + + data_path = settings.APP_ROOT / "data" + + logger.info(f"Initializing all score data") + remove_files_from_dir(data_path / "score" / "csv", ".csv") + remove_files_from_dir(data_path / "score" / "geojson", ".json") + + +def temp_folder_cleanup() -> None: + """Remove all files and directories from the local data/tmp temporary path""" + + data_path = settings.APP_ROOT / "data" + + logger.info(f"Initializing all temp directoriees") + remove_all_from_dir(data_path / "tmp") + + def get_excel_column_name(index: int) -> str: - """This is used to map a numeric index to the appropriate column in Excel. - - E.g., column #95 is "CR". - + """Map a numeric index to the appropriate column in Excel. E.g., column #95 is "CR". Only works for the first 1000 columns. + + Args: + index (int): the index of the column + + Returns: + str: the excel column name """ excel_column_names = [ - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q", - "R", - "S", - "T", - "U", - "V", - "W", - "X", - "Y", - "Z", - "AA", - "AB", - "AC", - "AD", - "AE", - "AF", - "AG", - "AH", - "AI", - "AJ", - "AK", - "AL", - "AM", - "AN", - "AO", - "AP", - "AQ", - "AR", - "AS", - "AT", - "AU", - "AV", - "AW", - "AX", - "AY", - "AZ", - "BA", - "BB", - "BC", - "BD", - "BE", - "BF", - "BG", - "BH", - "BI", - "BJ", - "BK", - "BL", - "BM", - "BN", - "BO", - "BP", - "BQ", - "BR", - "BS", - "BT", - "BU", - "BV", - "BW", - "BX", - "BY", - "BZ", - "CA", - "CB", - "CC", - "CD", - "CE", - "CF", - "CG", - "CH", - "CI", - "CJ", - "CK", - "CL", - "CM", - "CN", - "CO", - "CP", - "CQ", - "CR", - "CS", - "CT", - "CU", - "CV", - "CW", - "CX", - "CY", - "CZ", - "DA", - "DB", - "DC", - "DD", - "DE", - "DF", - "DG", - "DH", - "DI", - "DJ", - "DK", - "DL", - "DM", - "DN", - "DO", - "DP", - "DQ", - "DR", - "DS", - "DT", - "DU", - "DV", - "DW", - "DX", - "DY", - "DZ", - "EA", - "EB", - "EC", - "ED", - "EE", - "EF", - "EG", - "EH", - "EI", - "EJ", - "EK", - "EL", - "EM", - "EN", - "EO", - "EP", - "EQ", - "ER", - "ES", - "ET", - "EU", - "EV", - "EW", - "EX", - "EY", - "EZ", - "FA", - "FB", - "FC", - "FD", - "FE", - "FF", - "FG", - "FH", - "FI", - "FJ", - "FK", - "FL", - "FM", - "FN", - "FO", - "FP", - "FQ", - "FR", - "FS", - "FT", - "FU", - "FV", - "FW", - "FX", - "FY", - "FZ", - "GA", - "GB", - "GC", - "GD", - "GE", - "GF", - "GG", - "GH", - "GI", - "GJ", - "GK", - "GL", - "GM", - "GN", - "GO", - "GP", - "GQ", - "GR", - "GS", - "GT", - "GU", - "GV", - "GW", - "GX", - "GY", - "GZ", - "HA", - "HB", - "HC", - "HD", - "HE", - "HF", - "HG", - "HH", - "HI", - "HJ", - "HK", - "HL", - "HM", - "HN", - "HO", - "HP", - "HQ", - "HR", - "HS", - "HT", - "HU", - "HV", - "HW", - "HX", - "HY", - "HZ", - "IA", - "IB", - "IC", - "ID", - "IE", - "IF", - "IG", - "IH", - "II", - "IJ", - "IK", - "IL", - "IM", - "IN", - "IO", - "IP", - "IQ", - "IR", - "IS", - "IT", - "IU", - "IV", - "IW", - "IX", - "IY", - "IZ", - "JA", - "JB", - "JC", - "JD", - "JE", - "JF", - "JG", - "JH", - "JI", - "JJ", - "JK", - "JL", - "JM", - "JN", - "JO", - "JP", - "JQ", - "JR", - "JS", - "JT", - "JU", - "JV", - "JW", - "JX", - "JY", - "JZ", - "KA", - "KB", - "KC", - "KD", - "KE", - "KF", - "KG", - "KH", - "KI", - "KJ", - "KK", - "KL", - "KM", - "KN", - "KO", - "KP", - "KQ", - "KR", - "KS", - "KT", - "KU", - "KV", - "KW", - "KX", - "KY", - "KZ", - "LA", - "LB", - "LC", - "LD", - "LE", - "LF", - "LG", - "LH", - "LI", - "LJ", - "LK", - "LL", - "LM", - "LN", - "LO", - "LP", - "LQ", - "LR", - "LS", - "LT", - "LU", - "LV", - "LW", - "LX", - "LY", - "LZ", - "MA", - "MB", - "MC", - "MD", - "ME", - "MF", - "MG", - "MH", - "MI", - "MJ", - "MK", - "ML", - "MM", - "MN", - "MO", - "MP", - "MQ", - "MR", - "MS", - "MT", - "MU", - "MV", - "MW", - "MX", - "MY", - "MZ", - "NA", - "NB", - "NC", - "ND", - "NE", - "NF", - "NG", - "NH", - "NI", - "NJ", - "NK", - "NL", - "NM", - "NN", - "NO", - "NP", - "NQ", - "NR", - "NS", - "NT", - "NU", - "NV", - "NW", - "NX", - "NY", - "NZ", - "OA", - "OB", - "OC", - "OD", - "OE", - "OF", - "OG", - "OH", - "OI", - "OJ", - "OK", - "OL", - "OM", - "ON", - "OO", - "OP", - "OQ", - "OR", - "OS", - "OT", - "OU", - "OV", - "OW", - "OX", - "OY", - "OZ", - "PA", - "PB", - "PC", - "PD", - "PE", - "PF", - "PG", - "PH", - "PI", - "PJ", - "PK", - "PL", - "PM", - "PN", - "PO", - "PP", - "PQ", - "PR", - "PS", - "PT", - "PU", - "PV", - "PW", - "PX", - "PY", - "PZ", - "QA", - "QB", - "QC", - "QD", - "QE", - "QF", - "QG", - "QH", - "QI", - "QJ", - "QK", - "QL", - "QM", - "QN", - "QO", - "QP", - "QQ", - "QR", - "QS", - "QT", - "QU", - "QV", - "QW", - "QX", - "QY", - "QZ", - "RA", - "RB", - "RC", - "RD", - "RE", - "RF", - "RG", - "RH", - "RI", - "RJ", - "RK", - "RL", - "RM", - "RN", - "RO", - "RP", - "RQ", - "RR", - "RS", - "RT", - "RU", - "RV", - "RW", - "RX", - "RY", - "RZ", - "SA", - "SB", - "SC", - "SD", - "SE", - "SF", - "SG", - "SH", - "SI", - "SJ", - "SK", - "SL", - "SM", - "SN", - "SO", - "SP", - "SQ", - "SR", - "SS", - "ST", - "SU", - "SV", - "SW", - "SX", - "SY", - "SZ", - "TA", - "TB", - "TC", - "TD", - "TE", - "TF", - "TG", - "TH", - "TI", - "TJ", - "TK", - "TL", - "TM", - "TN", - "TO", - "TP", - "TQ", - "TR", - "TS", - "TT", - "TU", - "TV", - "TW", - "TX", - "TY", - "TZ", - "UA", - "UB", - "UC", - "UD", - "UE", - "UF", - "UG", - "UH", - "UI", - "UJ", - "UK", - "UL", - "UM", - "UN", - "UO", - "UP", - "UQ", - "UR", - "US", - "UT", - "UU", - "UV", - "UW", - "UX", - "UY", - "UZ", - "VA", - "VB", - "VC", - "VD", - "VE", - "VF", - "VG", - "VH", - "VI", - "VJ", - "VK", - "VL", - "VM", - "VN", - "VO", - "VP", - "VQ", - "VR", - "VS", - "VT", - "VU", - "VV", - "VW", - "VX", - "VY", - "VZ", - "WA", - "WB", - "WC", - "WD", - "WE", - "WF", - "WG", - "WH", - "WI", - "WJ", - "WK", - "WL", - "WM", - "WN", - "WO", - "WP", - "WQ", - "WR", - "WS", - "WT", - "WU", - "WV", - "WW", - "WX", - "WY", - "WZ", - "XA", - "XB", - "XC", - "XD", - "XE", - "XF", - "XG", - "XH", - "XI", - "XJ", - "XK", - "XL", - "XM", - "XN", - "XO", - "XP", - "XQ", - "XR", - "XS", - "XT", - "XU", - "XV", - "XW", - "XX", - "XY", - "XZ", - "YA", - "YB", - "YC", - "YD", - "YE", - "YF", - "YG", - "YH", - "YI", - "YJ", - "YK", - "YL", - "YM", - "YN", - "YO", - "YP", - "YQ", - "YR", - "YS", - "YT", - "YU", - "YV", - "YW", - "YX", - "YY", - "YZ", - "ZA", - "ZB", - "ZC", - "ZD", - "ZE", - "ZF", - "ZG", - "ZH", - "ZI", - "ZJ", - "ZK", - "ZL", - "ZM", - "ZN", - "ZO", - "ZP", - "ZQ", - "ZR", - "ZS", - "ZT", - "ZU", - "ZV", - "ZW", - "ZX", - "ZY", - "ZZ", - "AAA", - "AAB", - "AAC", - "AAD", - "AAE", - "AAF", - "AAG", - "AAH", - "AAI", - "AAJ", - "AAK", - "AAL", - "AAM", - "AAN", - "AAO", - "AAP", - "AAQ", - "AAR", - "AAS", - "AAT", - "AAU", - "AAV", - "AAW", - "AAX", - "AAY", - "AAZ", - "ABA", - "ABB", - "ABC", - "ABD", - "ABE", - "ABF", - "ABG", - "ABH", - "ABI", - "ABJ", - "ABK", - "ABL", - "ABM", - "ABN", - "ABO", - "ABP", - "ABQ", - "ABR", - "ABS", - "ABT", - "ABU", - "ABV", - "ABW", - "ABX", - "ABY", - "ABZ", - "ACA", - "ACB", - "ACC", - "ACD", - "ACE", - "ACF", - "ACG", - "ACH", - "ACI", - "ACJ", - "ACK", - "ACL", - "ACM", - "ACN", - "ACO", - "ACP", - "ACQ", - "ACR", - "ACS", - "ACT", - "ACU", - "ACV", - "ACW", - "ACX", - "ACY", - "ACZ", - "ADA", - "ADB", - "ADC", - "ADD", - "ADE", - "ADF", - "ADG", - "ADH", - "ADI", - "ADJ", - "ADK", - "ADL", - "ADM", - "ADN", - "ADO", - "ADP", - "ADQ", - "ADR", - "ADS", - "ADT", - "ADU", - "ADV", - "ADW", - "ADX", - "ADY", - "ADZ", - "AEA", - "AEB", - "AEC", - "AED", - "AEE", - "AEF", - "AEG", - "AEH", - "AEI", - "AEJ", - "AEK", - "AEL", - "AEM", - "AEN", - "AEO", - "AEP", - "AEQ", - "AER", - "AES", - "AET", - "AEU", - "AEV", - "AEW", - "AEX", - "AEY", - "AEZ", - "AFA", - "AFB", - "AFC", - "AFD", - "AFE", - "AFF", - "AFG", - "AFH", - "AFI", - "AFJ", - "AFK", - "AFL", - "AFM", - "AFN", - "AFO", - "AFP", - "AFQ", - "AFR", - "AFS", - "AFT", - "AFU", - "AFV", - "AFW", - "AFX", - "AFY", - "AFZ", - "AGA", - "AGB", - "AGC", - "AGD", - "AGE", - "AGF", - "AGG", - "AGH", - "AGI", - "AGJ", - "AGK", - "AGL", - "AGM", - "AGN", - "AGO", - "AGP", - "AGQ", - "AGR", - "AGS", - "AGT", - "AGU", - "AGV", - "AGW", - "AGX", - "AGY", - "AGZ", - "AHA", - "AHB", - "AHC", - "AHD", - "AHE", - "AHF", - "AHG", - "AHH", - "AHI", - "AHJ", - "AHK", - "AHL", - "AHM", - "AHN", - "AHO", - "AHP", - "AHQ", - "AHR", - "AHS", - "AHT", - "AHU", - "AHV", - "AHW", - "AHX", - "AHY", - "AHZ", - "AIA", - "AIB", - "AIC", - "AID", - "AIE", - "AIF", - "AIG", - "AIH", - "AII", - "AIJ", - "AIK", - "AIL", - "AIM", - "AIN", - "AIO", - "AIP", - "AIQ", - "AIR", - "AIS", - "AIT", - "AIU", - "AIV", - "AIW", - "AIX", - "AIY", - "AIZ", - "AJA", - "AJB", - "AJC", - "AJD", - "AJE", - "AJF", - "AJG", - "AJH", - "AJI", - "AJJ", - "AJK", - "AJL", - "AJM", - "AJN", - "AJO", - "AJP", - "AJQ", - "AJR", - "AJS", - "AJT", - "AJU", - "AJV", - "AJW", - "AJX", - "AJY", - "AJZ", - "AKA", - "AKB", - "AKC", - "AKD", - "AKE", - "AKF", - "AKG", - "AKH", - "AKI", - "AKJ", - "AKK", - "AKL", - "AKM", - "AKN", - "AKO", - "AKP", - "AKQ", - "AKR", - "AKS", - "AKT", - "AKU", - "AKV", - "AKW", - "AKX", - "AKY", - "AKZ", - "ALA", - "ALB", - "ALC", - "ALD", - "ALE", - "ALF", - "ALG", - "ALH", - "ALI", - "ALJ", - "ALK", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "AA", + "AB", + "AC", + "AD", + "AE", + "AF", + "AG", + "AH", + "AI", + "AJ", + "AK", + "AL", + "AM", + "AN", + "AO", + "AP", + "AQ", + "AR", + "AS", + "AT", + "AU", + "AV", + "AW", + "AX", + "AY", + "AZ", + "BA", + "BB", + "BC", + "BD", + "BE", + "BF", + "BG", + "BH", + "BI", + "BJ", + "BK", + "BL", + "BM", + "BN", + "BO", + "BP", + "BQ", + "BR", + "BS", + "BT", + "BU", + "BV", + "BW", + "BX", + "BY", + "BZ", + "CA", + "CB", + "CC", + "CD", + "CE", + "CF", + "CG", + "CH", + "CI", + "CJ", + "CK", + "CL", + "CM", + "CN", + "CO", + "CP", + "CQ", + "CR", + "CS", + "CT", + "CU", + "CV", + "CW", + "CX", + "CY", + "CZ", + "DA", + "DB", + "DC", + "DD", + "DE", + "DF", + "DG", + "DH", + "DI", + "DJ", + "DK", + "DL", + "DM", + "DN", + "DO", + "DP", + "DQ", + "DR", + "DS", + "DT", + "DU", + "DV", + "DW", + "DX", + "DY", + "DZ", + "EA", + "EB", + "EC", + "ED", + "EE", + "EF", + "EG", + "EH", + "EI", + "EJ", + "EK", + "EL", + "EM", + "EN", + "EO", + "EP", + "EQ", + "ER", + "ES", + "ET", + "EU", + "EV", + "EW", + "EX", + "EY", + "EZ", + "FA", + "FB", + "FC", + "FD", + "FE", + "FF", + "FG", + "FH", + "FI", + "FJ", + "FK", + "FL", + "FM", + "FN", + "FO", + "FP", + "FQ", + "FR", + "FS", + "FT", + "FU", + "FV", + "FW", + "FX", + "FY", + "FZ", + "GA", + "GB", + "GC", + "GD", + "GE", + "GF", + "GG", + "GH", + "GI", + "GJ", + "GK", + "GL", + "GM", + "GN", + "GO", + "GP", + "GQ", + "GR", + "GS", + "GT", + "GU", + "GV", + "GW", + "GX", + "GY", + "GZ", + "HA", + "HB", + "HC", + "HD", + "HE", + "HF", + "HG", + "HH", + "HI", + "HJ", + "HK", + "HL", + "HM", + "HN", + "HO", + "HP", + "HQ", + "HR", + "HS", + "HT", + "HU", + "HV", + "HW", + "HX", + "HY", + "HZ", + "IA", + "IB", + "IC", + "ID", + "IE", + "IF", + "IG", + "IH", + "II", + "IJ", + "IK", + "IL", + "IM", + "IN", + "IO", + "IP", + "IQ", + "IR", + "IS", + "IT", + "IU", + "IV", + "IW", + "IX", + "IY", + "IZ", + "JA", + "JB", + "JC", + "JD", + "JE", + "JF", + "JG", + "JH", + "JI", + "JJ", + "JK", + "JL", + "JM", + "JN", + "JO", + "JP", + "JQ", + "JR", + "JS", + "JT", + "JU", + "JV", + "JW", + "JX", + "JY", + "JZ", + "KA", + "KB", + "KC", + "KD", + "KE", + "KF", + "KG", + "KH", + "KI", + "KJ", + "KK", + "KL", + "KM", + "KN", + "KO", + "KP", + "KQ", + "KR", + "KS", + "KT", + "KU", + "KV", + "KW", + "KX", + "KY", + "KZ", + "LA", + "LB", + "LC", + "LD", + "LE", + "LF", + "LG", + "LH", + "LI", + "LJ", + "LK", + "LL", + "LM", + "LN", + "LO", + "LP", + "LQ", + "LR", + "LS", + "LT", + "LU", + "LV", + "LW", + "LX", + "LY", + "LZ", + "MA", + "MB", + "MC", + "MD", + "ME", + "MF", + "MG", + "MH", + "MI", + "MJ", + "MK", + "ML", + "MM", + "MN", + "MO", + "MP", + "MQ", + "MR", + "MS", + "MT", + "MU", + "MV", + "MW", + "MX", + "MY", + "MZ", + "NA", + "NB", + "NC", + "ND", + "NE", + "NF", + "NG", + "NH", + "NI", + "NJ", + "NK", + "NL", + "NM", + "NN", + "NO", + "NP", + "NQ", + "NR", + "NS", + "NT", + "NU", + "NV", + "NW", + "NX", + "NY", + "NZ", + "OA", + "OB", + "OC", + "OD", + "OE", + "OF", + "OG", + "OH", + "OI", + "OJ", + "OK", + "OL", + "OM", + "ON", + "OO", + "OP", + "OQ", + "OR", + "OS", + "OT", + "OU", + "OV", + "OW", + "OX", + "OY", + "OZ", + "PA", + "PB", + "PC", + "PD", + "PE", + "PF", + "PG", + "PH", + "PI", + "PJ", + "PK", + "PL", + "PM", + "PN", + "PO", + "PP", + "PQ", + "PR", + "PS", + "PT", + "PU", + "PV", + "PW", + "PX", + "PY", + "PZ", + "QA", + "QB", + "QC", + "QD", + "QE", + "QF", + "QG", + "QH", + "QI", + "QJ", + "QK", + "QL", + "QM", + "QN", + "QO", + "QP", + "QQ", + "QR", + "QS", + "QT", + "QU", + "QV", + "QW", + "QX", + "QY", + "QZ", + "RA", + "RB", + "RC", + "RD", + "RE", + "RF", + "RG", + "RH", + "RI", + "RJ", + "RK", + "RL", + "RM", + "RN", + "RO", + "RP", + "RQ", + "RR", + "RS", + "RT", + "RU", + "RV", + "RW", + "RX", + "RY", + "RZ", + "SA", + "SB", + "SC", + "SD", + "SE", + "SF", + "SG", + "SH", + "SI", + "SJ", + "SK", + "SL", + "SM", + "SN", + "SO", + "SP", + "SQ", + "SR", + "SS", + "ST", + "SU", + "SV", + "SW", + "SX", + "SY", + "SZ", + "TA", + "TB", + "TC", + "TD", + "TE", + "TF", + "TG", + "TH", + "TI", + "TJ", + "TK", + "TL", + "TM", + "TN", + "TO", + "TP", + "TQ", + "TR", + "TS", + "TT", + "TU", + "TV", + "TW", + "TX", + "TY", + "TZ", + "UA", + "UB", + "UC", + "UD", + "UE", + "UF", + "UG", + "UH", + "UI", + "UJ", + "UK", + "UL", + "UM", + "UN", + "UO", + "UP", + "UQ", + "UR", + "US", + "UT", + "UU", + "UV", + "UW", + "UX", + "UY", + "UZ", + "VA", + "VB", + "VC", + "VD", + "VE", + "VF", + "VG", + "VH", + "VI", + "VJ", + "VK", + "VL", + "VM", + "VN", + "VO", + "VP", + "VQ", + "VR", + "VS", + "VT", + "VU", + "VV", + "VW", + "VX", + "VY", + "VZ", + "WA", + "WB", + "WC", + "WD", + "WE", + "WF", + "WG", + "WH", + "WI", + "WJ", + "WK", + "WL", + "WM", + "WN", + "WO", + "WP", + "WQ", + "WR", + "WS", + "WT", + "WU", + "WV", + "WW", + "WX", + "WY", + "WZ", + "XA", + "XB", + "XC", + "XD", + "XE", + "XF", + "XG", + "XH", + "XI", + "XJ", + "XK", + "XL", + "XM", + "XN", + "XO", + "XP", + "XQ", + "XR", + "XS", + "XT", + "XU", + "XV", + "XW", + "XX", + "XY", + "XZ", + "YA", + "YB", + "YC", + "YD", + "YE", + "YF", + "YG", + "YH", + "YI", + "YJ", + "YK", + "YL", + "YM", + "YN", + "YO", + "YP", + "YQ", + "YR", + "YS", + "YT", + "YU", + "YV", + "YW", + "YX", + "YY", + "YZ", + "ZA", + "ZB", + "ZC", + "ZD", + "ZE", + "ZF", + "ZG", + "ZH", + "ZI", + "ZJ", + "ZK", + "ZL", + "ZM", + "ZN", + "ZO", + "ZP", + "ZQ", + "ZR", + "ZS", + "ZT", + "ZU", + "ZV", + "ZW", + "ZX", + "ZY", + "ZZ", + "AAA", + "AAB", + "AAC", + "AAD", + "AAE", + "AAF", + "AAG", + "AAH", + "AAI", + "AAJ", + "AAK", + "AAL", + "AAM", + "AAN", + "AAO", + "AAP", + "AAQ", + "AAR", + "AAS", + "AAT", + "AAU", + "AAV", + "AAW", + "AAX", + "AAY", + "AAZ", + "ABA", + "ABB", + "ABC", + "ABD", + "ABE", + "ABF", + "ABG", + "ABH", + "ABI", + "ABJ", + "ABK", + "ABL", + "ABM", + "ABN", + "ABO", + "ABP", + "ABQ", + "ABR", + "ABS", + "ABT", + "ABU", + "ABV", + "ABW", + "ABX", + "ABY", + "ABZ", + "ACA", + "ACB", + "ACC", + "ACD", + "ACE", + "ACF", + "ACG", + "ACH", + "ACI", + "ACJ", + "ACK", + "ACL", + "ACM", + "ACN", + "ACO", + "ACP", + "ACQ", + "ACR", + "ACS", + "ACT", + "ACU", + "ACV", + "ACW", + "ACX", + "ACY", + "ACZ", + "ADA", + "ADB", + "ADC", + "ADD", + "ADE", + "ADF", + "ADG", + "ADH", + "ADI", + "ADJ", + "ADK", + "ADL", + "ADM", + "ADN", + "ADO", + "ADP", + "ADQ", + "ADR", + "ADS", + "ADT", + "ADU", + "ADV", + "ADW", + "ADX", + "ADY", + "ADZ", + "AEA", + "AEB", + "AEC", + "AED", + "AEE", + "AEF", + "AEG", + "AEH", + "AEI", + "AEJ", + "AEK", + "AEL", + "AEM", + "AEN", + "AEO", + "AEP", + "AEQ", + "AER", + "AES", + "AET", + "AEU", + "AEV", + "AEW", + "AEX", + "AEY", + "AEZ", + "AFA", + "AFB", + "AFC", + "AFD", + "AFE", + "AFF", + "AFG", + "AFH", + "AFI", + "AFJ", + "AFK", + "AFL", + "AFM", + "AFN", + "AFO", + "AFP", + "AFQ", + "AFR", + "AFS", + "AFT", + "AFU", + "AFV", + "AFW", + "AFX", + "AFY", + "AFZ", + "AGA", + "AGB", + "AGC", + "AGD", + "AGE", + "AGF", + "AGG", + "AGH", + "AGI", + "AGJ", + "AGK", + "AGL", + "AGM", + "AGN", + "AGO", + "AGP", + "AGQ", + "AGR", + "AGS", + "AGT", + "AGU", + "AGV", + "AGW", + "AGX", + "AGY", + "AGZ", + "AHA", + "AHB", + "AHC", + "AHD", + "AHE", + "AHF", + "AHG", + "AHH", + "AHI", + "AHJ", + "AHK", + "AHL", + "AHM", + "AHN", + "AHO", + "AHP", + "AHQ", + "AHR", + "AHS", + "AHT", + "AHU", + "AHV", + "AHW", + "AHX", + "AHY", + "AHZ", + "AIA", + "AIB", + "AIC", + "AID", + "AIE", + "AIF", + "AIG", + "AIH", + "AII", + "AIJ", + "AIK", + "AIL", + "AIM", + "AIN", + "AIO", + "AIP", + "AIQ", + "AIR", + "AIS", + "AIT", + "AIU", + "AIV", + "AIW", + "AIX", + "AIY", + "AIZ", + "AJA", + "AJB", + "AJC", + "AJD", + "AJE", + "AJF", + "AJG", + "AJH", + "AJI", + "AJJ", + "AJK", + "AJL", + "AJM", + "AJN", + "AJO", + "AJP", + "AJQ", + "AJR", + "AJS", + "AJT", + "AJU", + "AJV", + "AJW", + "AJX", + "AJY", + "AJZ", + "AKA", + "AKB", + "AKC", + "AKD", + "AKE", + "AKF", + "AKG", + "AKH", + "AKI", + "AKJ", + "AKK", + "AKL", + "AKM", + "AKN", + "AKO", + "AKP", + "AKQ", + "AKR", + "AKS", + "AKT", + "AKU", + "AKV", + "AKW", + "AKX", + "AKY", + "AKZ", + "ALA", + "ALB", + "ALC", + "ALD", + "ALE", + "ALF", + "ALG", + "ALH", + "ALI", + "ALJ", + "ALK", ] return excel_column_names[index]