mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Imputing income using geographic neighbors (#1559)
Imputes income field with a light refactor. Needs more refactor and more tests (I spotchecked). Next ticket will check and address but a lot of "narwhal" architecture is here.
This commit is contained in:
parent
485a9a8316
commit
f047ca9d83
16 changed files with 1245 additions and 81 deletions
Binary file not shown.
|
@ -40,7 +40,7 @@ def validate_new_data(
|
|||
assert (
|
||||
checking_df[score_col].nunique() <= 3
|
||||
), f"Error: there are too many values possible in {score_col}"
|
||||
assert (True in checking_df[score_col].unique()) & (
|
||||
assert (True in checking_df[score_col].unique()) | (
|
||||
False in checking_df[score_col].unique()
|
||||
), f"Error: {score_col} should be a boolean"
|
||||
|
||||
|
|
|
@ -26,6 +26,9 @@ fields:
|
|||
- score_name: Total population
|
||||
label: Total population
|
||||
format: float
|
||||
- score_name: Percent of individuals below 200% Federal Poverty Line, imputed and adjusted
|
||||
label: Adjusted percent of individuals below 200% Federal Poverty Line
|
||||
format: float
|
||||
- score_name: Is low income and has a low percent of higher ed students?
|
||||
label: Is low income and high percent of residents that are not higher ed students?
|
||||
format: bool
|
||||
|
|
|
@ -30,6 +30,9 @@ sheets:
|
|||
- score_name: Total population
|
||||
label: Total population
|
||||
format: float
|
||||
- score_name: Percent of individuals below 200% Federal Poverty Line, imputed and adjusted
|
||||
label: Adjusted percent of individuals below 200% Federal Poverty Line
|
||||
format: float
|
||||
- score_name: Is low income and has a low percent of higher ed students?
|
||||
label: Is low income and high percent of residents that are not higher ed students?
|
||||
format: bool
|
||||
|
|
|
@ -14,16 +14,6 @@ DATASET_LIST = [
|
|||
"module_dir": "tree_equity_score",
|
||||
"class_name": "TreeEquityScoreETL",
|
||||
},
|
||||
{
|
||||
"name": "census_acs",
|
||||
"module_dir": "census_acs",
|
||||
"class_name": "CensusACSETL",
|
||||
},
|
||||
{
|
||||
"name": "census_acs_2010",
|
||||
"module_dir": "census_acs_2010",
|
||||
"class_name": "CensusACS2010ETL",
|
||||
},
|
||||
{
|
||||
"name": "census_decennial",
|
||||
"module_dir": "census_decennial",
|
||||
|
@ -124,6 +114,17 @@ DATASET_LIST = [
|
|||
"module_dir": "maryland_ejscreen",
|
||||
"class_name": "MarylandEJScreenETL",
|
||||
},
|
||||
# This has to come after us.json exists
|
||||
{
|
||||
"name": "census_acs",
|
||||
"module_dir": "census_acs",
|
||||
"class_name": "CensusACSETL",
|
||||
},
|
||||
{
|
||||
"name": "census_acs_2010",
|
||||
"module_dir": "census_acs_2010",
|
||||
"class_name": "CensusACS2010ETL",
|
||||
},
|
||||
]
|
||||
|
||||
CENSUS_INFO = {
|
||||
|
|
|
@ -5,6 +5,9 @@ from data_pipeline.config import settings
|
|||
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
## note: to keep map porting "right" fields, keeping descriptors the same.
|
||||
|
||||
|
||||
# Base Paths
|
||||
DATA_PATH = Path(settings.APP_ROOT) / "data"
|
||||
TMP_PATH = DATA_PATH / "tmp"
|
||||
|
@ -179,6 +182,8 @@ TILES_SCORE_COLUMNS = {
|
|||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P100_PFS",
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P200_PFS",
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "P200_I_PFS",
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "LPF_PFS",
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX: "NPL_PFS",
|
||||
|
@ -198,7 +203,8 @@ TILES_SCORE_COLUMNS = {
|
|||
field_names.M_HOUSING: "M_HSG",
|
||||
field_names.M_POLLUTION: "M_PLN",
|
||||
field_names.M_HEALTH: "M_HLTH",
|
||||
field_names.SCORE_M_COMMUNITIES: "SM_C",
|
||||
# temporarily update this so that it's the Narwhal score that gets visualized on the map
|
||||
field_names.SCORE_N_COMMUNITIES: "SM_C",
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
|
||||
|
@ -283,7 +289,7 @@ TILES_SCORE_COLUMNS = {
|
|||
## Low high school and low higher ed for t&wd
|
||||
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
|
||||
## FPL 200 and low higher ed for all others
|
||||
field_names.FPL_200_AND_COLLEGE_ATTENDANCE_SERIES: "M_EBSI",
|
||||
field_names.FPL_200_SERIES: "M_EBSI",
|
||||
}
|
||||
|
||||
# columns to round floats to 2 decimals
|
||||
|
@ -311,6 +317,8 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
|
@ -332,7 +340,6 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
|||
field_names.LOW_HS_EDUCATION_LOW_HIGHER_ED_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX,
|
||||
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
|
||||
field_names.COLLEGE_ATTENDANCE_FIELD,
|
||||
]
|
||||
|
|
|
@ -405,6 +405,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD]
|
||||
)
|
||||
|
||||
# QQ: why don't we just filter to the numeric columns by type?
|
||||
numeric_columns = [
|
||||
field_names.HOUSING_BURDEN_FIELD,
|
||||
field_names.TOTAL_POP_FIELD,
|
||||
|
@ -458,6 +459,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
field_names.IMPENETRABLE_SURFACES_FIELD,
|
||||
# We have to pass this boolean here in order to include it in ag value loss percentiles.
|
||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
]
|
||||
|
||||
non_numeric_columns = [
|
||||
|
|
|
@ -29,7 +29,7 @@ from . import constants
|
|||
logger = get_module_logger(__name__)
|
||||
|
||||
# Define the DAC variable
|
||||
DISADVANTAGED_COMMUNITIES_FIELD = field_names.SCORE_M_COMMUNITIES
|
||||
DISADVANTAGED_COMMUNITIES_FIELD = field_names.SCORE_N_COMMUNITIES
|
||||
|
||||
|
||||
class PostScoreETL(ExtractTransformLoad):
|
||||
|
|
|
@ -1,14 +1,26 @@
|
|||
from collections import namedtuple
|
||||
import os
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.etl.sources.census_acs.etl_utils import (
|
||||
retrieve_census_acs_data,
|
||||
)
|
||||
from data_pipeline.utils import get_module_logger
|
||||
from data_pipeline.etl.sources.census_acs.etl_imputations import (
|
||||
calculate_income_measures,
|
||||
)
|
||||
|
||||
from data_pipeline.utils import get_module_logger, unzip_file_from_url
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
# because now there is a requirement for the us.json, this will port from
|
||||
# AWS when a local copy does not exist.
|
||||
CENSUS_DATA_S3_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/census.zip"
|
||||
|
||||
|
||||
class CensusACSETL(ExtractTransformLoad):
|
||||
def __init__(self):
|
||||
|
@ -59,6 +71,23 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Percent of individuals < 200% Federal Poverty Line"
|
||||
)
|
||||
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Percent of individuals < 200% Federal Poverty Line, imputed"
|
||||
)
|
||||
|
||||
self.ADJUSTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Adjusted percent of individuals < 200% Federal Poverty Line"
|
||||
)
|
||||
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME_PRELIMINARY = (
|
||||
"Preliminary adjusted percent of individuals < 200% Federal Poverty Line,"
|
||||
+ " imputed"
|
||||
)
|
||||
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Adjusted percent of individuals < 200% Federal Poverty Line,"
|
||||
+ " imputed"
|
||||
)
|
||||
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD = "B25077_001E"
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME = (
|
||||
|
@ -136,6 +165,10 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
"Percent enrollment in college or graduate school"
|
||||
)
|
||||
|
||||
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD = (
|
||||
"Percent enrollment in college or graduate school, imputed"
|
||||
)
|
||||
|
||||
self.COLLEGE_NON_ATTENDANCE_FIELD = "Percent of population not currently enrolled in college or graduate school"
|
||||
|
||||
self.RE_FIELDS = [
|
||||
|
@ -188,18 +221,50 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.MEDIAN_INCOME_FIELD_NAME,
|
||||
self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME,
|
||||
self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME,
|
||||
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||
self.HIGH_SCHOOL_ED_FIELD,
|
||||
self.COLLEGE_ATTENDANCE_FIELD,
|
||||
self.COLLEGE_NON_ATTENDANCE_FIELD,
|
||||
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
|
||||
]
|
||||
+ self.RE_OUTPUT_FIELDS
|
||||
+ [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
|
||||
+ [
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
]
|
||||
)
|
||||
|
||||
self.df: pd.DataFrame
|
||||
|
||||
def _merge_geojson(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
usa_geo_df: gpd.GeoDataFrame,
|
||||
geoid_field: str = "GEOID10",
|
||||
geometry_field: str = "geometry",
|
||||
state_code_field: str = "STATEFP10",
|
||||
county_code_field: str = "COUNTYFP10",
|
||||
) -> gpd.GeoDataFrame:
|
||||
usa_geo_df[geoid_field] = (
|
||||
usa_geo_df[geoid_field].astype(str).str.zfill(11)
|
||||
)
|
||||
return gpd.GeoDataFrame(
|
||||
df.merge(
|
||||
usa_geo_df[
|
||||
[
|
||||
geoid_field,
|
||||
geometry_field,
|
||||
state_code_field,
|
||||
county_code_field,
|
||||
]
|
||||
],
|
||||
left_on=[self.GEOID_TRACT_FIELD_NAME],
|
||||
right_on=[geoid_field],
|
||||
)
|
||||
)
|
||||
|
||||
def extract(self) -> None:
|
||||
# Define the variables to retrieve
|
||||
variables = (
|
||||
|
@ -227,6 +292,27 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
|
||||
df = self.df
|
||||
|
||||
# Here we join the geometry of the US to the dataframe so that we can impute
|
||||
# The income of neighbors. first this looks locally; if there's no local
|
||||
# geojson file for all of the US, this will read it off of S3
|
||||
logger.info("Reading in geojson for the country")
|
||||
if not os.path.exists(
|
||||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
||||
):
|
||||
logger.info("Fetching Census data from AWS S3")
|
||||
unzip_file_from_url(
|
||||
CENSUS_DATA_S3_URL,
|
||||
self.DATA_PATH / "tmp",
|
||||
self.DATA_PATH,
|
||||
)
|
||||
|
||||
geo_df = gpd.read_file(
|
||||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
||||
)
|
||||
df = self._merge_geojson(
|
||||
df=df,
|
||||
usa_geo_df=geo_df,
|
||||
)
|
||||
# Rename two fields.
|
||||
df = df.rename(
|
||||
columns={
|
||||
|
@ -349,7 +435,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
df["B03003_003E"] / df["B03003_001E"]
|
||||
)
|
||||
|
||||
# Calculate college attendance:
|
||||
# Calculate college attendance and adjust low income
|
||||
df[self.COLLEGE_ATTENDANCE_FIELD] = (
|
||||
df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC]
|
||||
+ df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE]
|
||||
|
@ -361,22 +447,64 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
1 - df[self.COLLEGE_ATTENDANCE_FIELD]
|
||||
)
|
||||
|
||||
# strip columns
|
||||
df = df[self.COLUMNS_TO_KEEP]
|
||||
|
||||
# Save results to self.
|
||||
self.df = df
|
||||
|
||||
# rename columns to be used in score
|
||||
rename_fields = {
|
||||
"Percent of individuals < 200% Federal Poverty Line": field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
||||
}
|
||||
self.df.rename(
|
||||
columns=rename_fields,
|
||||
inplace=True,
|
||||
errors="raise",
|
||||
# we impute income for both income measures
|
||||
## TODO: Convert to pydantic for clarity
|
||||
logger.info("Imputing income information")
|
||||
ImputeVariables = namedtuple(
|
||||
"ImputeVariables", ["raw_field_name", "imputed_field_name"]
|
||||
)
|
||||
|
||||
df = calculate_income_measures(
|
||||
impute_var_named_tup_list=[
|
||||
ImputeVariables(
|
||||
raw_field_name=self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
imputed_field_name=self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
),
|
||||
ImputeVariables(
|
||||
raw_field_name=self.COLLEGE_ATTENDANCE_FIELD,
|
||||
imputed_field_name=self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
|
||||
),
|
||||
],
|
||||
geo_df=df,
|
||||
geoid_field=self.GEOID_TRACT_FIELD_NAME,
|
||||
)
|
||||
|
||||
logger.info("Calculating with imputed values")
|
||||
|
||||
df[
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
|
||||
] = (
|
||||
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME].fillna(
|
||||
df[self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME]
|
||||
)
|
||||
- df[self.COLLEGE_ATTENDANCE_FIELD].fillna(
|
||||
df[self.IMPUTED_COLLEGE_ATTENDANCE_FIELD]
|
||||
)
|
||||
).clip(
|
||||
lower=0
|
||||
)
|
||||
|
||||
# All values should have a value at this point
|
||||
assert (
|
||||
df[
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
|
||||
]
|
||||
.isna()
|
||||
.sum()
|
||||
== 0
|
||||
), "Error: not all values were filled..."
|
||||
|
||||
logger.info("Renaming columns...")
|
||||
df = df.rename(
|
||||
columns={
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME: field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
|
||||
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME: field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
||||
}
|
||||
)
|
||||
|
||||
# Strip columns and save results to self.
|
||||
self.df = df[self.COLUMNS_TO_KEEP]
|
||||
|
||||
def load(self) -> None:
|
||||
logger.info("Saving Census ACS Data")
|
||||
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
from typing import List, NamedTuple
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
import numpy as np
|
||||
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
def _get_fips_mask(
|
||||
geo_df: gpd.GeoDataFrame,
|
||||
row: gpd.GeoSeries,
|
||||
fips_digits: int,
|
||||
geoid_field: str = "GEOID10_TRACT",
|
||||
) -> pd.Series:
|
||||
return (
|
||||
geo_df[geoid_field].str[:fips_digits] == row[geoid_field][:fips_digits]
|
||||
)
|
||||
|
||||
|
||||
def _get_neighbor_mask(
|
||||
geo_df: gpd.GeoDataFrame, row: gpd.GeoSeries
|
||||
) -> pd.Series:
|
||||
return geo_df["geometry"].touches(row["geometry"])
|
||||
|
||||
|
||||
def _choose_best_mask(
|
||||
geo_df: gpd.GeoDataFrame,
|
||||
masks_in_priority_order: List[pd.Series],
|
||||
column_to_impute: str,
|
||||
) -> pd.Series:
|
||||
for mask in masks_in_priority_order:
|
||||
if any(geo_df[mask][column_to_impute].notna()):
|
||||
return mask
|
||||
raise Exception("No mask found")
|
||||
|
||||
|
||||
def _prepare_dataframe_for_imputation(
|
||||
impute_var_named_tup_list: List[NamedTuple],
|
||||
geo_df: gpd.GeoDataFrame,
|
||||
geoid_field: str = "GEOID10_TRACT",
|
||||
) -> tuple[list, gpd.GeoDataFrame]:
|
||||
imputing_cols = [
|
||||
impute_var_pair.raw_field_name
|
||||
for impute_var_pair in impute_var_named_tup_list
|
||||
]
|
||||
|
||||
# prime column to exist
|
||||
for impute_var_pair in impute_var_named_tup_list:
|
||||
geo_df[impute_var_pair.imputed_field_name] = geo_df[
|
||||
impute_var_pair.raw_field_name
|
||||
].copy()
|
||||
|
||||
# generate a list of tracts for which at least one of the imputation
|
||||
# columns is null
|
||||
tract_list = geo_df[geo_df[imputing_cols].isna().any(axis=1)][
|
||||
geoid_field
|
||||
].unique()
|
||||
|
||||
# Check that imputation is a valid choice for this set of fields
|
||||
logger.info(f"Imputing values for {len(tract_list)} unique tracts.")
|
||||
assert len(tract_list) > 0, "Error: No missing values to impute"
|
||||
|
||||
return tract_list, geo_df
|
||||
|
||||
|
||||
def calculate_income_measures(
|
||||
impute_var_named_tup_list: list,
|
||||
geo_df: gpd.GeoDataFrame,
|
||||
geoid_field: str,
|
||||
) -> pd.DataFrame:
|
||||
"""Impute values based on geographic neighbors
|
||||
|
||||
We only want to check neighbors a single time, so all variables
|
||||
that we impute get imputed here.
|
||||
|
||||
Takes in:
|
||||
required:
|
||||
impute_var_named_tup_list: list of named tuples (imputed field, raw field)
|
||||
geo_df: geo dataframe that already has the census shapefiles merged
|
||||
geoid field: tract level ID
|
||||
|
||||
Returns: non-geometry pd.DataFrame
|
||||
"""
|
||||
# Determine where to impute variables and fill a column with nulls
|
||||
tract_list, geo_df = _prepare_dataframe_for_imputation(
|
||||
impute_var_named_tup_list=impute_var_named_tup_list,
|
||||
geo_df=geo_df,
|
||||
geoid_field=geoid_field,
|
||||
)
|
||||
|
||||
# Iterate through the dataframe to impute in place
|
||||
for index, row in geo_df.iterrows():
|
||||
if row[geoid_field] in tract_list:
|
||||
neighbor_mask = _get_neighbor_mask(geo_df, row)
|
||||
county_mask = _get_fips_mask(
|
||||
geo_df=geo_df, row=row, fips_digits=5, geoid_field=geoid_field
|
||||
)
|
||||
state_mask = _get_fips_mask(
|
||||
geo_df=geo_df, row=row, fips_digits=2, geoid_field=geoid_field
|
||||
)
|
||||
|
||||
# Impute fields for every row missing at least one value using the best possible set of neighbors
|
||||
# Note that later, we will pull raw.fillna(imputed), so the mechanics of this step aren't critical
|
||||
for impute_var_pair in impute_var_named_tup_list:
|
||||
mask_to_use = _choose_best_mask(
|
||||
geo_df=geo_df,
|
||||
masks_in_priority_order=[
|
||||
neighbor_mask,
|
||||
county_mask,
|
||||
state_mask,
|
||||
],
|
||||
column_to_impute=impute_var_pair.raw_field_name,
|
||||
)
|
||||
geo_df.loc[index, impute_var_pair.imputed_field_name] = geo_df[
|
||||
mask_to_use
|
||||
][impute_var_pair.raw_field_name].mean()
|
||||
|
||||
logger.info("Casting geodataframe as a typical dataframe")
|
||||
# get rid of the geometry column and cast as a typical df
|
||||
df = pd.DataFrame(
|
||||
geo_df[[col for col in geo_df.columns if col != "geometry"]]
|
||||
)
|
||||
|
||||
# finally, return the df
|
||||
return df
|
|
@ -4,6 +4,7 @@ from typing import List
|
|||
import censusdata
|
||||
import pandas as pd
|
||||
|
||||
|
||||
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
|
|
|
@ -56,6 +56,19 @@ M_HEALTH = "Health Factor (Definition M)"
|
|||
M_WORKFORCE = "Workforce Factor (Definition M)"
|
||||
M_NON_WORKFORCE = "Any Non-Workforce Factor (Definition M)"
|
||||
|
||||
# Definition Narwhal fields
|
||||
SCORE_N = "Definition N"
|
||||
SCORE_N_COMMUNITIES = "Definition N (communities)"
|
||||
N_CLIMATE = "Climate Factor (Definition N)"
|
||||
N_ENERGY = "Energy Factor (Definition N)"
|
||||
N_TRANSPORTATION = "Transportation Factor (Definition N)"
|
||||
N_HOUSING = "Housing Factor (Definition N)"
|
||||
N_POLLUTION = "Pollution Factor (Definition N)"
|
||||
N_WATER = "Water Factor (Definition N)"
|
||||
N_HEALTH = "Health Factor (Definition N)"
|
||||
N_WORKFORCE = "Workforce Factor (Definition N)"
|
||||
N_NON_WORKFORCE = "Any Non-Workforce Factor (Definition N)"
|
||||
|
||||
PERCENTILE = 90
|
||||
MEDIAN_HOUSE_VALUE_PERCENTILE = 90
|
||||
|
||||
|
@ -93,9 +106,19 @@ HEALTH_SOCIO_INDICATORS_EXCEEDED = (
|
|||
|
||||
# Poverty / Income
|
||||
POVERTY_FIELD = "Poverty (Less than 200% of federal poverty line)"
|
||||
|
||||
# this is the raw, unadjusted variable
|
||||
POVERTY_LESS_THAN_200_FPL_FIELD = (
|
||||
"Percent of individuals below 200% Federal Poverty Line"
|
||||
)
|
||||
|
||||
# this is for use in the donuts
|
||||
ADJUSTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Adjusted percent of individuals < 200% Federal Poverty Line"
|
||||
)
|
||||
|
||||
# this is what gets used in the score
|
||||
POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD = "Percent of individuals below 200% Federal Poverty Line, imputed and adjusted"
|
||||
POVERTY_LESS_THAN_150_FPL_FIELD = (
|
||||
"Percent of individuals < 150% Federal Poverty Line"
|
||||
)
|
||||
|
@ -412,6 +435,7 @@ SCORE_M_LOW_INCOME_SUFFIX = (
|
|||
", is low income, and has a low percent of higher ed students"
|
||||
)
|
||||
|
||||
|
||||
COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD = (
|
||||
"Percent higher ed enrollment rate is less than 20%"
|
||||
)
|
||||
|
@ -651,6 +675,7 @@ THRESHOLD_COUNT = "Total threshold criteria exceeded"
|
|||
CATEGORY_COUNT = "Total categories exceeded"
|
||||
|
||||
FPL_200_SERIES = "Is low income?"
|
||||
FPL_200_SERIES_IMPUTED_AND_ADJUSTED = "Is low income (imputed and adjusted)?"
|
||||
FPL_200_AND_COLLEGE_ATTENDANCE_SERIES = (
|
||||
"Is low income and has a low percent of higher ed students?"
|
||||
)
|
||||
|
|
808
data/data-pipeline/data_pipeline/score/score_narwhal.py
Normal file
808
data/data-pipeline/data_pipeline/score/score_narwhal.py
Normal file
|
@ -0,0 +1,808 @@
|
|||
from typing import Tuple
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from data_pipeline.score.score import Score
|
||||
import data_pipeline.score.field_names as field_names
|
||||
from data_pipeline.utils import get_module_logger
|
||||
import data_pipeline.etl.score.constants as constants
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class ScoreNarwhal(Score):
|
||||
"""Very similar to Score M, at present."""
|
||||
|
||||
def __init__(self, df: pd.DataFrame) -> None:
|
||||
self.LOW_INCOME_THRESHOLD: float = 0.65
|
||||
self.MAX_COLLEGE_ATTENDANCE_THRESHOLD: float = 0.20
|
||||
self.ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
|
||||
self.MEDIAN_HOUSE_VALUE_THRESHOLD: float = 0.90
|
||||
self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD: float = 0.10
|
||||
|
||||
super().__init__(df)
|
||||
|
||||
def _combine_island_areas_with_states_and_set_thresholds(
|
||||
self,
|
||||
df: pd.DataFrame,
|
||||
column_from_island_areas: str,
|
||||
column_from_decennial_census: str,
|
||||
combined_column_name: str,
|
||||
threshold_cutoff_for_island_areas: float,
|
||||
) -> Tuple[pd.DataFrame, str]:
|
||||
"""Steps to set thresholds for island areas.
|
||||
|
||||
This function is fairly logically complicated. It takes the following steps:
|
||||
|
||||
1. Combine the two different fields into a single field.
|
||||
2. Calculate the 90th percentile for the combined field.
|
||||
3. Create a boolean series that is true for any census tract in the island
|
||||
areas (and only the island areas) that exceeds this percentile.
|
||||
|
||||
For step one, it combines data that is either the island area's Decennial Census
|
||||
value in 2009 or the state's value in 5-year ACS ending in 2010.
|
||||
|
||||
This will be used to generate the percentile cutoff for the 90th percentile.
|
||||
|
||||
The stateside decennial census stopped asking economic comparisons,
|
||||
so this is as close to apples-to-apples as we get. We use 5-year ACS for data
|
||||
robustness over 1-year ACS.
|
||||
"""
|
||||
# Create the combined field.
|
||||
# TODO: move this combined field percentile calculation to `etl_score`,
|
||||
# since most other percentile logic is there.
|
||||
# There should only be one entry in either 2009 or 2019 fields, not one in both.
|
||||
# But just to be safe, we take the mean and ignore null values so if there
|
||||
# *were* entries in both, this result would make sense.
|
||||
df[combined_column_name] = df[
|
||||
[column_from_island_areas, column_from_decennial_census]
|
||||
].mean(axis=1, skipna=True)
|
||||
|
||||
# Create a percentile field for use in the Islands / PR visualization
|
||||
# TODO: move this code
|
||||
# In the code below, percentiles are constructed based on the combined column
|
||||
# of census and island data, but only reported for the island areas (where there
|
||||
# is no other comprehensive percentile information)
|
||||
return_series_name = (
|
||||
column_from_island_areas
|
||||
+ field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
)
|
||||
df[return_series_name] = np.where(
|
||||
df[column_from_decennial_census].isna(),
|
||||
df[combined_column_name].rank(pct=True),
|
||||
np.nan,
|
||||
)
|
||||
|
||||
threshold_column_name = (
|
||||
f"{column_from_island_areas} exceeds "
|
||||
f"{threshold_cutoff_for_island_areas*100:.0f}th percentile"
|
||||
)
|
||||
|
||||
df[threshold_column_name] = (
|
||||
df[return_series_name] >= threshold_cutoff_for_island_areas
|
||||
)
|
||||
|
||||
return df, threshold_column_name
|
||||
|
||||
def _increment_total_eligibility_exceeded(
|
||||
self, columns_for_subset: list, skip_fips: tuple = ()
|
||||
) -> None:
|
||||
"""
|
||||
Increments the total eligible factors for a given tract
|
||||
|
||||
The new skip_fips argument specifies which (if any) fips codes to
|
||||
skip over for incrementing.
|
||||
This allows us to essentially skip data we think is of limited veracity,
|
||||
without overriding any values in the data.
|
||||
THIS IS A TEMPORARY FIX.
|
||||
"""
|
||||
if skip_fips:
|
||||
self.df[field_names.THRESHOLD_COUNT] += np.where(
|
||||
self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
|
||||
skip_fips
|
||||
),
|
||||
0,
|
||||
self.df[columns_for_subset].sum(axis=1, skipna=True),
|
||||
)
|
||||
else:
|
||||
self.df[field_names.THRESHOLD_COUNT] += self.df[
|
||||
columns_for_subset
|
||||
].sum(axis=1, skipna=True)
|
||||
|
||||
def _climate_factor(self) -> bool:
|
||||
# In Xth percentile or above for FEMA’s Risk Index (Source: FEMA
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and there is low higher ed attendance
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
climate_eligibility_columns = [
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.CLIMATE_THRESHOLD_EXCEEDED] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
| self.df[
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
| self.df[
|
||||
field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_POPULATION_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
self.df[
|
||||
field_names.EXPECTED_AGRICULTURAL_LOSS_EXCEEDS_PCTILE_THRESHOLD
|
||||
]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.EXPECTED_BUILDING_LOSS_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
climate_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[climate_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _energy_factor(self) -> bool:
|
||||
# In Xth percentile or above for DOE’s energy cost burden score (Source: LEAD Score)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has low higher ed attendance.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
energy_eligibility_columns = [
|
||||
field_names.PM25_EXPOSURE_LOW_INCOME_FIELD,
|
||||
field_names.ENERGY_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.ENERGY_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.PM25_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ENERGY_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.PM25_EXCEEDS_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
self.df[field_names.PM25_EXPOSURE_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.PM25_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self.df[field_names.ENERGY_BURDEN_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.ENERGY_BURDEN_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
energy_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[energy_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _transportation_factor(self) -> bool:
|
||||
# In Xth percentile or above for diesel particulate matter (Source: EPA National Air Toxics Assessment (NATA)
|
||||
# or
|
||||
# In Xth percentile or above for PM 2.5 (Source: EPA, Office of Air and Radiation (OAR) fusion of model and monitor data)]
|
||||
# or
|
||||
# In Xth percentile or above traffic proximity and volume (Source: 2017 U.S. Department of Transportation (DOT) traffic data
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
transportion_eligibility_columns = [
|
||||
field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD,
|
||||
field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
self.df[field_names.DIESEL_PARTICULATE_MATTER_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.DIESEL_EXCEEDS_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.TRAFFIC_PROXIMITY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
transportion_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[transportion_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _housing_factor(self) -> bool:
|
||||
# (
|
||||
# In Xth percentile or above for lead paint (Source: Census's American Community Survey’s
|
||||
# percent of housing units built pre-1960, used as an indicator of potential lead paint exposure in homes)
|
||||
# AND
|
||||
# In Yth percentile or below for Median House Value (Source: Census's American Community Survey)
|
||||
# )
|
||||
# or
|
||||
# In Xth percentile or above for housing cost burden (Source: HUD's Comprehensive Housing Affordability Strategy dataset
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
housing_eligibility_columns = [
|
||||
field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD,
|
||||
field_names.HOUSING_BURDEN_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LEAD_PAINT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
) & (
|
||||
self.df[
|
||||
field_names.MEDIAN_HOUSE_VALUE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
<= self.MEDIAN_HOUSE_VALUE_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.HOUSING_BURDEN_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_THREHSOLD_EXCEEDED] = (
|
||||
self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
# series by series indicators
|
||||
self.df[field_names.LEAD_PAINT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.LEAD_PAINT_PROXY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self.df[field_names.HOUSING_BURDEN_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.HOUSING_BURDEN_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
housing_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[housing_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _pollution_factor(self) -> bool:
|
||||
# Proximity to Risk Management Plan sites is > X
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students.
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
pollution_eligibility_columns = [
|
||||
field_names.RMP_LOW_INCOME_FIELD,
|
||||
field_names.SUPERFUND_LOW_INCOME_FIELD,
|
||||
field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.RMP_PCTILE_THRESHOLD] = (
|
||||
self.df[field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.NPL_PCTILE_THRESHOLD] = (
|
||||
self.df[field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.TSDF_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.POLLUTION_THRESHOLD_EXCEEDED] = (
|
||||
self.df[field_names.RMP_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.NPL_PCTILE_THRESHOLD]
|
||||
) | self.df[field_names.TSDF_PCTILE_THRESHOLD]
|
||||
|
||||
# individual series-by-series
|
||||
self.df[field_names.RMP_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.RMP_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
self.df[field_names.SUPERFUND_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.NPL_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
self.df[field_names.HAZARDOUS_WASTE_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.TSDF_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
pollution_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[pollution_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _water_factor(self) -> bool:
|
||||
# In Xth percentile or above for wastewater discharge (Source: EPA Risk-Screening Environmental Indicators (RSEI) Model)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
self.df[field_names.WASTEWATER_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.WASTEWATER_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
# Straight copy here in case we add additional water fields.
|
||||
self.df[field_names.WATER_THRESHOLD_EXCEEDED] = self.df[
|
||||
field_names.WASTEWATER_PCTILE_THRESHOLD
|
||||
].copy()
|
||||
|
||||
self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.WASTEWATER_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD],
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[field_names.WASTEWATER_DISCHARGE_LOW_INCOME_FIELD]
|
||||
|
||||
def _health_factor(self) -> bool:
|
||||
# In Xth percentile or above for diabetes (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for asthma (Source: CDC Places)
|
||||
# or
|
||||
# In Xth percentile or above for heart disease
|
||||
# or
|
||||
# In Xth percentile or above for low life expectancy (Source: CDC Places)
|
||||
# AND
|
||||
# Low income: In Nth percentile or above for percent of block group population
|
||||
# of households where household income is less than or equal to twice the federal
|
||||
# poverty level and has a low percent of higher ed students
|
||||
# Source: Census's American Community Survey
|
||||
|
||||
health_eligibility_columns = [
|
||||
field_names.DIABETES_LOW_INCOME_FIELD,
|
||||
field_names.ASTHMA_LOW_INCOME_FIELD,
|
||||
field_names.HEART_DISEASE_LOW_INCOME_FIELD,
|
||||
field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.DIABETES_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ASTHMA_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HEART_DISEASE_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.HEART_DISEASE_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_LIFE_EXPECTANCY_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.HEALTH_THRESHOLD_EXCEEDED] = (
|
||||
(
|
||||
self.df[field_names.DIABETES_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.ASTHMA_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.HEART_DISEASE_PCTILE_THRESHOLD]
|
||||
) | self.df[field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD]
|
||||
|
||||
self.df[field_names.DIABETES_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.DIABETES_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
self.df[field_names.ASTHMA_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.ASTHMA_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
self.df[field_names.HEART_DISEASE_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.HEART_DISEASE_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_LOW_INCOME_FIELD] = (
|
||||
self.df[field_names.LOW_LIFE_EXPECTANCY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED]
|
||||
)
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
health_eligibility_columns,
|
||||
skip_fips=constants.DROP_FIPS_FROM_NON_WTD_THRESHOLDS,
|
||||
)
|
||||
|
||||
return self.df[health_eligibility_columns].any(axis="columns")
|
||||
|
||||
def _workforce_factor(self) -> bool:
|
||||
# Where unemployment is above Xth percentile
|
||||
# or
|
||||
# Where median income as a percent of area median income is above Xth percentile
|
||||
# or
|
||||
# Where the percent of households at or below 100% of the federal poverty level
|
||||
# is above Xth percentile
|
||||
# or
|
||||
# Where linguistic isolation is above Xth percentile
|
||||
# AND
|
||||
# Where the high school degree achievement rates for adults 25 years and older
|
||||
# is less than Y%
|
||||
# AND the higher ed attendance rates are under Z%
|
||||
# (necessary to screen out university tracts)
|
||||
|
||||
# Workforce criteria for states fields.
|
||||
workforce_eligibility_columns = [
|
||||
field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
self.df[field_names.LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.HIGH_SCHOOL_ED_FIELD]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.UNEMPLOYMENT_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LINGUISTIC_ISO_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.POVERTY_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.POVERTY_LESS_THAN_100_FPL_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.POVERTY_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_states = self.df[
|
||||
workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
workforce_eligibility_columns
|
||||
)
|
||||
|
||||
# Now, calculate workforce criteria for island territories.
|
||||
island_areas_workforce_eligibility_columns = [
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD,
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD,
|
||||
]
|
||||
|
||||
# First, combine unemployment.
|
||||
# This will include an adjusted percentile column for the island areas
|
||||
# to be used by the front end.
|
||||
(
|
||||
self.df,
|
||||
island_areas_unemployment_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_UNEMPLOYMENT_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_UNEMPLOYMENT_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# TODO: Remove this, it's for checking only
|
||||
assert (
|
||||
island_areas_unemployment_criteria_field_name
|
||||
== field_names.ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD
|
||||
), "Error combining island columns"
|
||||
|
||||
# Next, combine poverty.
|
||||
# This will include an adjusted percentile column for the island areas
|
||||
# to be used by the front end.
|
||||
(
|
||||
self.df,
|
||||
island_areas_poverty_criteria_field_name,
|
||||
) = self._combine_island_areas_with_states_and_set_thresholds(
|
||||
df=self.df,
|
||||
column_from_island_areas=field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009,
|
||||
column_from_decennial_census=field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
|
||||
threshold_cutoff_for_island_areas=self.ENVIRONMENTAL_BURDEN_THRESHOLD,
|
||||
)
|
||||
|
||||
# TODO: Remove this, it's for checking only
|
||||
assert (
|
||||
island_areas_poverty_criteria_field_name
|
||||
== field_names.ISLAND_POVERTY_PCTILE_THRESHOLD
|
||||
), "Error combining island columns"
|
||||
|
||||
# Also check whether low area median income is 90th percentile or higher
|
||||
# within the islands.
|
||||
|
||||
# Note that because the field for low median does not have to be combined,
|
||||
# unlike the other fields, we do not need to create a new percentile
|
||||
# column. This code should probably be refactored when (TODO) we do the big
|
||||
# refactor.
|
||||
self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD] = (
|
||||
self.df[
|
||||
field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009]
|
||||
>= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[island_areas_unemployment_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[field_names.ISLAND_AREAS_POVERTY_LOW_HS_EDUCATION_FIELD] = (
|
||||
self.df[island_areas_poverty_criteria_field_name]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
self.df[
|
||||
field_names.ISLAND_AREAS_LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD
|
||||
] = (
|
||||
self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
& self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
workforce_combined_criteria_for_island_areas = self.df[
|
||||
island_areas_workforce_eligibility_columns
|
||||
].any(axis="columns")
|
||||
|
||||
self._increment_total_eligibility_exceeded(
|
||||
island_areas_workforce_eligibility_columns
|
||||
)
|
||||
|
||||
percent_of_island_tracts_highlighted = (
|
||||
100
|
||||
* workforce_combined_criteria_for_island_areas.sum()
|
||||
# Choosing a random column from island areas to calculate the denominator.
|
||||
/ self.df[field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009]
|
||||
.notnull()
|
||||
.sum()
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"For workforce criteria in island areas, "
|
||||
f"{workforce_combined_criteria_for_island_areas.sum()} ("
|
||||
f"{percent_of_island_tracts_highlighted:.2f}% of tracts that have non-null data "
|
||||
f"in the column) have a value of TRUE."
|
||||
)
|
||||
|
||||
# Because these criteria are calculated differently for the islands, we also calculate the
|
||||
# thresholds to pass to the FE slightly differently
|
||||
|
||||
self.df[field_names.WORKFORCE_THRESHOLD_EXCEEDED] = (
|
||||
## First we calculate for the non-island areas
|
||||
(
|
||||
(
|
||||
self.df[field_names.POVERTY_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD]
|
||||
) | (
|
||||
## then we calculate just for the island areas
|
||||
(
|
||||
self.df[field_names.ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD]
|
||||
| self.df[field_names.ISLAND_POVERTY_PCTILE_THRESHOLD]
|
||||
)
|
||||
| self.df[field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD]
|
||||
)
|
||||
|
||||
# Because of the island complications, we also have to separately calculate the threshold for
|
||||
# socioeconomic thresholds
|
||||
self.df[field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED] = (
|
||||
self.df[field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD]
|
||||
| self.df[field_names.LOW_HS_EDUCATION_FIELD]
|
||||
)
|
||||
|
||||
# A tract is included if it meets either the states tract criteria or the
|
||||
# island areas tract criteria.
|
||||
return (
|
||||
workforce_combined_criteria_for_states
|
||||
| workforce_combined_criteria_for_island_areas
|
||||
)
|
||||
|
||||
def add_columns(self) -> pd.DataFrame:
|
||||
logger.info("Adding Score M")
|
||||
|
||||
self.df[field_names.THRESHOLD_COUNT] = 0
|
||||
|
||||
# TODO: move this inside of
|
||||
# `_create_low_income_and_low_college_attendance_threshold`
|
||||
# and change the return signature of that method.
|
||||
# Create a standalone field that captures the college attendance boolean
|
||||
# threshold.
|
||||
self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (
|
||||
self.df[
|
||||
# UPDATE: Pull the imputed poverty statistic
|
||||
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
||||
]
|
||||
>= self.LOW_INCOME_THRESHOLD
|
||||
)
|
||||
|
||||
self.df[field_names.N_CLIMATE] = self._climate_factor()
|
||||
self.df[field_names.N_ENERGY] = self._energy_factor()
|
||||
self.df[field_names.N_TRANSPORTATION] = self._transportation_factor()
|
||||
self.df[field_names.N_HOUSING] = self._housing_factor()
|
||||
self.df[field_names.N_POLLUTION] = self._pollution_factor()
|
||||
self.df[field_names.N_WATER] = self._water_factor()
|
||||
self.df[field_names.N_HEALTH] = self._health_factor()
|
||||
self.df[field_names.N_WORKFORCE] = self._workforce_factor()
|
||||
|
||||
factors = [
|
||||
field_names.N_CLIMATE,
|
||||
field_names.N_ENERGY,
|
||||
field_names.N_TRANSPORTATION,
|
||||
field_names.N_HOUSING,
|
||||
field_names.N_POLLUTION,
|
||||
field_names.N_WATER,
|
||||
field_names.N_HEALTH,
|
||||
field_names.N_WORKFORCE,
|
||||
]
|
||||
self.df[field_names.CATEGORY_COUNT] = self.df[factors].sum(axis=1)
|
||||
self.df[field_names.SCORE_N_COMMUNITIES] = self.df[factors].any(axis=1)
|
||||
|
||||
return self.df
|
|
@ -10,6 +10,7 @@ from data_pipeline.score.score_i import ScoreI
|
|||
from data_pipeline.score.score_k import ScoreK
|
||||
from data_pipeline.score.score_l import ScoreL
|
||||
from data_pipeline.score.score_m import ScoreM
|
||||
from data_pipeline.score.score_narwhal import ScoreNarwhal
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
@ -35,6 +36,7 @@ class ScoreRunner:
|
|||
self.df = ScoreK(df=self.df).add_columns()
|
||||
self.df = ScoreL(df=self.df).add_columns()
|
||||
self.df = ScoreM(df=self.df).add_columns()
|
||||
self.df = ScoreNarwhal(df=self.df).add_columns()
|
||||
|
||||
# TODO do this with each score instead of in a bundle
|
||||
# Create percentiles for these index scores
|
||||
|
|
154
data/data-pipeline/poetry.lock
generated
154
data/data-pipeline/poetry.lock
generated
|
@ -50,7 +50,7 @@ tests = ["pytest"]
|
|||
|
||||
[[package]]
|
||||
name = "astroid"
|
||||
version = "2.11.2"
|
||||
version = "2.11.3"
|
||||
description = "An abstract syntax tree for Python with inference support."
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -411,19 +411,20 @@ pyflakes = ">=2.3.0,<2.4.0"
|
|||
|
||||
[[package]]
|
||||
name = "fonttools"
|
||||
version = "4.32.0"
|
||||
version = "4.33.3"
|
||||
description = "Tools to manipulate font files"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
|
||||
[package.extras]
|
||||
all = ["fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "zopfli (>=0.1.4)", "lz4 (>=1.7.4.2)", "matplotlib", "sympy", "skia-pathops (>=0.5.0)", "brotlicffi (>=0.8.0)", "scipy", "brotli (>=1.0.1)", "munkres", "unicodedata2 (>=14.0.0)", "xattr"]
|
||||
all = ["fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "zopfli (>=0.1.4)", "lz4 (>=1.7.4.2)", "matplotlib", "sympy", "skia-pathops (>=0.5.0)", "uharfbuzz (>=0.23.0)", "brotlicffi (>=0.8.0)", "scipy", "brotli (>=1.0.1)", "munkres", "unicodedata2 (>=14.0.0)", "xattr"]
|
||||
graphite = ["lz4 (>=1.7.4.2)"]
|
||||
interpolatable = ["scipy", "munkres"]
|
||||
lxml = ["lxml (>=4.0,<5)"]
|
||||
pathops = ["skia-pathops (>=0.5.0)"]
|
||||
plot = ["matplotlib"]
|
||||
repacker = ["uharfbuzz (>=0.23.0)"]
|
||||
symfont = ["sympy"]
|
||||
type1 = ["xattr"]
|
||||
ufo = ["fs (>=2.2.0,<3)"]
|
||||
|
@ -657,7 +658,7 @@ qtconsole = "*"
|
|||
|
||||
[[package]]
|
||||
name = "jupyter-client"
|
||||
version = "7.2.2"
|
||||
version = "7.3.0"
|
||||
description = "Jupyter protocol implementation and client libraries"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -879,7 +880,7 @@ tests = ["pytest", "pytz", "simplejson"]
|
|||
|
||||
[[package]]
|
||||
name = "marshmallow-dataclass"
|
||||
version = "8.5.3"
|
||||
version = "8.5.7"
|
||||
description = "Python library to convert dataclasses into marshmallow schemas."
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -890,11 +891,11 @@ marshmallow = ">=3.13.0,<4.0"
|
|||
typing-inspect = ">=0.7.1"
|
||||
|
||||
[package.extras]
|
||||
dev = ["marshmallow-enum", "typeguard", "pre-commit (>=1.18,<2.0)", "sphinx", "pytest (>=5.4)", "pytest-mypy-plugins (>=1.2.0)", "typing-extensions (>=3.7.2,<3.8.0)"]
|
||||
dev = ["marshmallow-enum", "typeguard", "pre-commit (>=2.17,<3.0)", "sphinx", "pytest (>=5.4)", "pytest-mypy-plugins (>=1.2.0)", "typing-extensions (>=3.7.2)"]
|
||||
docs = ["sphinx"]
|
||||
enum = ["marshmallow-enum"]
|
||||
lint = ["pre-commit (>=1.18,<2.0)"]
|
||||
tests = ["pytest (>=5.4)", "pytest-mypy-plugins (>=1.2.0)", "typing-extensions (>=3.7.2,<3.8.0)"]
|
||||
lint = ["pre-commit (>=2.17,<3.0)"]
|
||||
tests = ["pytest (>=5.4)", "pytest-mypy-plugins (>=1.2.0)", "typing-extensions (>=3.7.2)"]
|
||||
union = ["typeguard"]
|
||||
|
||||
[[package]]
|
||||
|
@ -1348,6 +1349,21 @@ category = "main"
|
|||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
|
||||
[[package]]
|
||||
name = "pydantic"
|
||||
version = "1.9.0"
|
||||
description = "Data validation and settings management using python 3.6 type hinting"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6.1"
|
||||
|
||||
[package.dependencies]
|
||||
typing-extensions = ">=3.7.4.3"
|
||||
|
||||
[package.extras]
|
||||
dotenv = ["python-dotenv (>=0.10.4)"]
|
||||
email = ["email-validator (>=1.0.3)"]
|
||||
|
||||
[[package]]
|
||||
name = "pyflakes"
|
||||
version = "2.3.1"
|
||||
|
@ -1358,22 +1374,22 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
|||
|
||||
[[package]]
|
||||
name = "pygments"
|
||||
version = "2.11.2"
|
||||
version = "2.12.0"
|
||||
description = "Pygments is a syntax highlighting package written in Python."
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[[package]]
|
||||
name = "pylint"
|
||||
version = "2.13.5"
|
||||
version = "2.13.7"
|
||||
description = "python code static checker"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.6.2"
|
||||
|
||||
[package.dependencies]
|
||||
astroid = ">=2.11.2,<=2.12.0-dev0"
|
||||
astroid = ">=2.11.3,<=2.12.0-dev0"
|
||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||
dill = ">=0.2"
|
||||
isort = ">=4.2.5,<6"
|
||||
|
@ -1406,7 +1422,7 @@ diagrams = ["railroad-diagrams", "jinja2"]
|
|||
|
||||
[[package]]
|
||||
name = "pyproj"
|
||||
version = "3.3.0"
|
||||
version = "3.3.1"
|
||||
description = "Python interface to PROJ (cartographic projections and coordinate transformations library)"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -1828,7 +1844,7 @@ test = ["pytest"]
|
|||
|
||||
[[package]]
|
||||
name = "types-requests"
|
||||
version = "2.27.19"
|
||||
version = "2.27.22"
|
||||
description = "Typing stubs for requests"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -1965,7 +1981,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "120a7d23ab8c6bb5f17e226f844627d124e7e3a986d1b7fe72b41ce5b45bbb78"
|
||||
content-hash = "e4462f3e9a5d1cf2449ac9ad0d9ed250a5fda5d03d04e2845e4be3526d943b2b"
|
||||
|
||||
[metadata.files]
|
||||
ansiwrap = [
|
||||
|
@ -2004,8 +2020,8 @@ argon2-cffi-bindings = [
|
|||
{file = "argon2_cffi_bindings-21.2.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5e00316dabdaea0b2dd82d141cc66889ced0cdcbfa599e8b471cf22c620c329a"},
|
||||
]
|
||||
astroid = [
|
||||
{file = "astroid-2.11.2-py3-none-any.whl", hash = "sha256:cc8cc0d2d916c42d0a7c476c57550a4557a083081976bf42a73414322a6411d9"},
|
||||
{file = "astroid-2.11.2.tar.gz", hash = "sha256:8d0a30fe6481ce919f56690076eafbb2fb649142a89dc874f1ec0e7a011492d0"},
|
||||
{file = "astroid-2.11.3-py3-none-any.whl", hash = "sha256:f1af57483cd17e963b2eddce8361e00fc593d1520fe19948488e94ff6476bd71"},
|
||||
{file = "astroid-2.11.3.tar.gz", hash = "sha256:4e5ba10571e197785e312966ea5efb2f5783176d4c1a73fa922d474ae2be59f7"},
|
||||
]
|
||||
atomicwrites = [
|
||||
{file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"},
|
||||
|
@ -2196,8 +2212,8 @@ flake8 = [
|
|||
{file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"},
|
||||
]
|
||||
fonttools = [
|
||||
{file = "fonttools-4.32.0-py3-none-any.whl", hash = "sha256:b038d1a0dee0079de7ade57071e2e2aced6e35bd697de244ac62938b2b1628c1"},
|
||||
{file = "fonttools-4.32.0.zip", hash = "sha256:59a90de72149893167e3d552ae2402c6874e006b9adc3feaf5f6d706fe20d392"},
|
||||
{file = "fonttools-4.33.3-py3-none-any.whl", hash = "sha256:f829c579a8678fa939a1d9e9894d01941db869de44390adb49ce67055a06cc2a"},
|
||||
{file = "fonttools-4.33.3.zip", hash = "sha256:c0fdcfa8ceebd7c1b2021240bd46ef77aa8e7408cf10434be55df52384865f8e"},
|
||||
]
|
||||
geopandas = [
|
||||
{file = "geopandas-0.9.0-py2.py3-none-any.whl", hash = "sha256:79f6e557ba0dba76eec44f8351b1c6b42a17c38f5f08fef347e98fe4dae563c7"},
|
||||
|
@ -2259,8 +2275,8 @@ jupyter = [
|
|||
{file = "jupyter-1.0.0.zip", hash = "sha256:3e1f86076bbb7c8c207829390305a2b1fe836d471ed54be66a3b8c41e7f46cc7"},
|
||||
]
|
||||
jupyter-client = [
|
||||
{file = "jupyter_client-7.2.2-py3-none-any.whl", hash = "sha256:44045448eadc12493d819d965eb1dc9d10d1927698adbb9b14eb9a3a4a45ba53"},
|
||||
{file = "jupyter_client-7.2.2.tar.gz", hash = "sha256:8fdbad344a8baa6a413d86d25bbf87ce21cb2b4aa5a8e0413863b9754eb8eb8a"},
|
||||
{file = "jupyter_client-7.3.0-py3-none-any.whl", hash = "sha256:671dd2d90d03f41716b09627a4eb06bb37875f92bf6563cc2ce4fe71c61c5cda"},
|
||||
{file = "jupyter_client-7.3.0.tar.gz", hash = "sha256:3bcc8e08a294d0fa9406e48cfe17e11ef0efdb7c504fe8cc335128e3ef8f3dac"},
|
||||
]
|
||||
jupyter-console = [
|
||||
{file = "jupyter_console-6.4.3-py3-none-any.whl", hash = "sha256:e630bcb682c0088dda45688ad7c2424d4a825c8acf494cb036ced03ed0424841"},
|
||||
|
@ -2503,8 +2519,8 @@ marshmallow = [
|
|||
{file = "marshmallow-3.15.0.tar.gz", hash = "sha256:2aaaab4f01ef4f5a011a21319af9fce17ab13bf28a026d1252adab0e035648d5"},
|
||||
]
|
||||
marshmallow-dataclass = [
|
||||
{file = "marshmallow_dataclass-8.5.3-py3-none-any.whl", hash = "sha256:eefeff62ee975c64d293d2db9370e7e748a2ff83dcb5109416b75e087a2ac02e"},
|
||||
{file = "marshmallow_dataclass-8.5.3.tar.gz", hash = "sha256:c0c5e1ea8d0e557b6fa00343799a9a9e60757b948fb096076beb6aa76bd68d30"},
|
||||
{file = "marshmallow_dataclass-8.5.7-py3-none-any.whl", hash = "sha256:da530f92f806673b9f40d8dc671ca18848b6cebded0eaecef720e256b5143e69"},
|
||||
{file = "marshmallow_dataclass-8.5.7.tar.gz", hash = "sha256:0bdb779939b4656a40430a6a8390af698676eef89c2e583deb06e3585bf81bba"},
|
||||
]
|
||||
marshmallow-enum = [
|
||||
{file = "marshmallow-enum-1.5.1.tar.gz", hash = "sha256:38e697e11f45a8e64b4a1e664000897c659b60aa57bfa18d44e226a9920b6e58"},
|
||||
|
@ -2799,17 +2815,54 @@ pycparser = [
|
|||
{file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"},
|
||||
{file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
|
||||
]
|
||||
pydantic = [
|
||||
{file = "pydantic-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb23bcc093697cdea2708baae4f9ba0e972960a835af22560f6ae4e7e47d33f5"},
|
||||
{file = "pydantic-1.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1d5278bd9f0eee04a44c712982343103bba63507480bfd2fc2790fa70cd64cf4"},
|
||||
{file = "pydantic-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab624700dc145aa809e6f3ec93fb8e7d0f99d9023b713f6a953637429b437d37"},
|
||||
{file = "pydantic-1.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8d7da6f1c1049eefb718d43d99ad73100c958a5367d30b9321b092771e96c25"},
|
||||
{file = "pydantic-1.9.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3c3b035103bd4e2e4a28da9da7ef2fa47b00ee4a9cf4f1a735214c1bcd05e0f6"},
|
||||
{file = "pydantic-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3011b975c973819883842c5ab925a4e4298dffccf7782c55ec3580ed17dc464c"},
|
||||
{file = "pydantic-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:086254884d10d3ba16da0588604ffdc5aab3f7f09557b998373e885c690dd398"},
|
||||
{file = "pydantic-1.9.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0fe476769acaa7fcddd17cadd172b156b53546ec3614a4d880e5d29ea5fbce65"},
|
||||
{file = "pydantic-1.9.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8e9dcf1ac499679aceedac7e7ca6d8641f0193c591a2d090282aaf8e9445a46"},
|
||||
{file = "pydantic-1.9.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1e4c28f30e767fd07f2ddc6f74f41f034d1dd6bc526cd59e63a82fe8bb9ef4c"},
|
||||
{file = "pydantic-1.9.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:c86229333cabaaa8c51cf971496f10318c4734cf7b641f08af0a6fbf17ca3054"},
|
||||
{file = "pydantic-1.9.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:c0727bda6e38144d464daec31dff936a82917f431d9c39c39c60a26567eae3ed"},
|
||||
{file = "pydantic-1.9.0-cp36-cp36m-win_amd64.whl", hash = "sha256:dee5ef83a76ac31ab0c78c10bd7d5437bfdb6358c95b91f1ba7ff7b76f9996a1"},
|
||||
{file = "pydantic-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9c9bdb3af48e242838f9f6e6127de9be7063aad17b32215ccc36a09c5cf1070"},
|
||||
{file = "pydantic-1.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ee7e3209db1e468341ef41fe263eb655f67f5c5a76c924044314e139a1103a2"},
|
||||
{file = "pydantic-1.9.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b6037175234850ffd094ca77bf60fb54b08b5b22bc85865331dd3bda7a02fa1"},
|
||||
{file = "pydantic-1.9.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b2571db88c636d862b35090ccf92bf24004393f85c8870a37f42d9f23d13e032"},
|
||||
{file = "pydantic-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8b5ac0f1c83d31b324e57a273da59197c83d1bb18171e512908fe5dc7278a1d6"},
|
||||
{file = "pydantic-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:bbbc94d0c94dd80b3340fc4f04fd4d701f4b038ebad72c39693c794fd3bc2d9d"},
|
||||
{file = "pydantic-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e0896200b6a40197405af18828da49f067c2fa1f821491bc8f5bde241ef3f7d7"},
|
||||
{file = "pydantic-1.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7bdfdadb5994b44bd5579cfa7c9b0e1b0e540c952d56f627eb227851cda9db77"},
|
||||
{file = "pydantic-1.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:574936363cd4b9eed8acdd6b80d0143162f2eb654d96cb3a8ee91d3e64bf4cf9"},
|
||||
{file = "pydantic-1.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c556695b699f648c58373b542534308922c46a1cda06ea47bc9ca45ef5b39ae6"},
|
||||
{file = "pydantic-1.9.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f947352c3434e8b937e3aa8f96f47bdfe6d92779e44bb3f41e4c213ba6a32145"},
|
||||
{file = "pydantic-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5e48ef4a8b8c066c4a31409d91d7ca372a774d0212da2787c0d32f8045b1e034"},
|
||||
{file = "pydantic-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:96f240bce182ca7fe045c76bcebfa0b0534a1bf402ed05914a6f1dadff91877f"},
|
||||
{file = "pydantic-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:815ddebb2792efd4bba5488bc8fde09c29e8ca3227d27cf1c6990fc830fd292b"},
|
||||
{file = "pydantic-1.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c5b77947b9e85a54848343928b597b4f74fc364b70926b3c4441ff52620640c"},
|
||||
{file = "pydantic-1.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c68c3bc88dbda2a6805e9a142ce84782d3930f8fdd9655430d8576315ad97ce"},
|
||||
{file = "pydantic-1.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a79330f8571faf71bf93667d3ee054609816f10a259a109a0738dac983b23c3"},
|
||||
{file = "pydantic-1.9.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f5a64b64ddf4c99fe201ac2724daada8595ada0d102ab96d019c1555c2d6441d"},
|
||||
{file = "pydantic-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a733965f1a2b4090a5238d40d983dcd78f3ecea221c7af1497b845a9709c1721"},
|
||||
{file = "pydantic-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cc6a4cb8a118ffec2ca5fcb47afbacb4f16d0ab8b7350ddea5e8ef7bcc53a16"},
|
||||
{file = "pydantic-1.9.0-py3-none-any.whl", hash = "sha256:085ca1de245782e9b46cefcf99deecc67d418737a1fd3f6a4f511344b613a5b3"},
|
||||
{file = "pydantic-1.9.0.tar.gz", hash = "sha256:742645059757a56ecd886faf4ed2441b9c0cd406079c2b4bee51bcc3fbcd510a"},
|
||||
]
|
||||
pyflakes = [
|
||||
{file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"},
|
||||
{file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"},
|
||||
]
|
||||
pygments = [
|
||||
{file = "Pygments-2.11.2-py3-none-any.whl", hash = "sha256:44238f1b60a76d78fc8ca0528ee429702aae011c265fe6a8dd8b63049ae41c65"},
|
||||
{file = "Pygments-2.11.2.tar.gz", hash = "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"},
|
||||
{file = "Pygments-2.12.0-py3-none-any.whl", hash = "sha256:dc9c10fb40944260f6ed4c688ece0cd2048414940f1cea51b8b226318411c519"},
|
||||
{file = "Pygments-2.12.0.tar.gz", hash = "sha256:5eb116118f9612ff1ee89ac96437bb6b49e8f04d8a13b514ba26f620208e26eb"},
|
||||
]
|
||||
pylint = [
|
||||
{file = "pylint-2.13.5-py3-none-any.whl", hash = "sha256:c149694cfdeaee1aa2465e6eaab84c87a881a7d55e6e93e09466be7164764d1e"},
|
||||
{file = "pylint-2.13.5.tar.gz", hash = "sha256:dab221658368c7a05242e673c275c488670144123f4bd262b2777249c1c0de9b"},
|
||||
{file = "pylint-2.13.7-py3-none-any.whl", hash = "sha256:13ddbbd8872c804574149e81197c28877eba75224ba6b76cd8652fc31df55c1c"},
|
||||
{file = "pylint-2.13.7.tar.gz", hash = "sha256:911d3a97c808f7554643bcc5416028cfdc42eae34ed129b150741888c688d5d5"},
|
||||
]
|
||||
pypandoc = [
|
||||
{file = "pypandoc-1.7.5.tar.gz", hash = "sha256:802c26aae17b64136c6d006949d8ce183a7d4d9fbd4f2d051e66f4fb9f45ca50"},
|
||||
|
@ -2819,26 +2872,29 @@ pyparsing = [
|
|||
{file = "pyparsing-3.0.8.tar.gz", hash = "sha256:7bf433498c016c4314268d95df76c81b842a4cb2b276fa3312cfb1e1d85f6954"},
|
||||
]
|
||||
pyproj = [
|
||||
{file = "pyproj-3.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2c41c9b7b5e1a1b0acc2b7b2f5de65b226f7b96c870888e4f679ff96322b1ed0"},
|
||||
{file = "pyproj-3.3.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0e1fd560b509b722db6566fa9685252f25640e93464d09e13d5190ed7ab491ba"},
|
||||
{file = "pyproj-3.3.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277072176a17471c0b1d25d6cae75401d81e9b50ea625ba546f5b79acd757dfc"},
|
||||
{file = "pyproj-3.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eca8ecf2b6b3225d93c723e6a2f51143d9195ac407f69e979363cdde344b93bb"},
|
||||
{file = "pyproj-3.3.0-cp310-cp310-win32.whl", hash = "sha256:4d2fc49c73d9f34e932bf37926d56916ba1b6f2f693cd4d8cc1d0d9eacc0e537"},
|
||||
{file = "pyproj-3.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:ce1adec823738e2d7c6af019fc38f58b4204bacfc782e4430373578c672f3833"},
|
||||
{file = "pyproj-3.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e70a1ea6f198cace1a492397bdd0a46e640201120973293d6c48031e370d6a87"},
|
||||
{file = "pyproj-3.3.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:99f171da5f885efeec8d7fb2e2557175ffa8834eeb488842b1f52ac78a9a98e5"},
|
||||
{file = "pyproj-3.3.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:3d28b84913cd849832a8f154c0e0c2ee4618057f7389ee68bfdb2145e7ed78cc"},
|
||||
{file = "pyproj-3.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab4baf781721640659db83a6b4da636fc403008f4978c668275754284c946778"},
|
||||
{file = "pyproj-3.3.0-cp38-cp38-win32.whl", hash = "sha256:4125e6704751d0e82d8d912d9851da097e8d38599d4c45f9944faaeb21771938"},
|
||||
{file = "pyproj-3.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:b15e199c1da8fd132e11dfa68d8cf65d4812dedabc776b308df778ecd0d07658"},
|
||||
{file = "pyproj-3.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fcceb6736085bf19291b707bc67c8cebe05330bd02268e9b8eba6d28a1905fce"},
|
||||
{file = "pyproj-3.3.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:dbf479bd481774ad217e9db5674868eee8f01dfe3868f61753328895ae7da61a"},
|
||||
{file = "pyproj-3.3.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:067a5c6099949edd66e9a10b139af4e2f65ebadb9f59583923a1d3feefac749a"},
|
||||
{file = "pyproj-3.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:235b52d8700ffb6be1f3638b1e25d83a9c13edcdb793236d8a98fd39227c5c27"},
|
||||
{file = "pyproj-3.3.0-cp39-cp39-win32.whl", hash = "sha256:44b5590c0b8dd002154916e170ef88f57abf91005b34bcb23faef97abb4d42c2"},
|
||||
{file = "pyproj-3.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:b48dd9e5736957707fce1d9253fb0772bcf80480198c7790e21fed73fee61240"},
|
||||
{file = "pyproj-3.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:5a105bfe37c78416d2641cd5d3368c99057d041f15f8d51ea3898953b21395c9"},
|
||||
{file = "pyproj-3.3.0.tar.gz", hash = "sha256:ce8bfbc212729e9a643f5f5d77f7a93394e032eda1e2d8799ae902d08add747e"},
|
||||
{file = "pyproj-3.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:473961faef7a9fd723c5d432f65220ea6ab3854e606bf84b4d409a75a4261c78"},
|
||||
{file = "pyproj-3.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fef9c1e339f25c57f6ae0558b5ab1bbdf7994529a30d8d7504fc6302ea51c03"},
|
||||
{file = "pyproj-3.3.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:140fa649fedd04f680a39f8ad339799a55cb1c49f6a84e1b32b97e49646647aa"},
|
||||
{file = "pyproj-3.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b59c08aea13ee428cf8a919212d55c036cc94784805ed77c8f31a4d1f541058c"},
|
||||
{file = "pyproj-3.3.1-cp310-cp310-win32.whl", hash = "sha256:1adc9ccd1bf04998493b6a2e87e60656c75ab790653b36cfe351e9ef214828ed"},
|
||||
{file = "pyproj-3.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:42eea10afc750fccd1c5c4ba56de29ab791ab4d83c1f7db72705566282ac5396"},
|
||||
{file = "pyproj-3.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:531ea36519fa7b581466d4b6ab32f66ae4dadd9499d726352f71ee5e19c3d1c5"},
|
||||
{file = "pyproj-3.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67025e37598a6bbed2c9c6c9e4c911f6dd39315d3e1148ead935a5c4d64309d5"},
|
||||
{file = "pyproj-3.3.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aed1a3c0cd4182425f91b48d5db39f459bc2fe0d88017ead6425a1bc85faee33"},
|
||||
{file = "pyproj-3.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cc4771403db54494e1e55bca8e6d33cde322f8cf0ed39f1557ff109c66d2cd1"},
|
||||
{file = "pyproj-3.3.1-cp38-cp38-win32.whl", hash = "sha256:c99f7b5757a28040a2dd4a28c9805fdf13eef79a796f4a566ab5cb362d10630d"},
|
||||
{file = "pyproj-3.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:5dac03d4338a4c8bd0f69144c527474f517b4cbd7d2d8c532cd8937799723248"},
|
||||
{file = "pyproj-3.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:56b0f9ee2c5b2520b18db30a393a7b86130cf527ddbb8c96e7f3c837474a9d79"},
|
||||
{file = "pyproj-3.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f92d8f6514516124abb714dce912b20867831162cfff9fae2678ef07b6fcf0f"},
|
||||
{file = "pyproj-3.3.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ef1bfbe2dcc558c7a98e2f1836abdcd630390f3160724a6f4f5c818b2be0ad5"},
|
||||
{file = "pyproj-3.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ca5f32b56210429b367ca4f9a57ffe67975c487af82e179a24370879a3daf68"},
|
||||
{file = "pyproj-3.3.1-cp39-cp39-win32.whl", hash = "sha256:aba199704c824fb84ab64927e7bc9ef71e603e483130ec0f7e09e97259b8f61f"},
|
||||
{file = "pyproj-3.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:120d45ed73144c65e9677dc73ba8a531c495d179dd9f9f0471ac5acc02d7ac4b"},
|
||||
{file = "pyproj-3.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:52efb681647dfac185cc655a709bc0caaf910031a0390f816f5fc8ce150cbedc"},
|
||||
{file = "pyproj-3.3.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ab0d6e38fda7c13726afacaf62e9f9dd858089d67910471758afd9cb24e0ecd"},
|
||||
{file = "pyproj-3.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45487942c19c5a8b09c91964ea3201f4e094518e34743cae373889a36e3d9260"},
|
||||
{file = "pyproj-3.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:797ad5655d484feac14b0fbb4a4efeaac0cf780a223046e2465494c767fd1c3b"},
|
||||
{file = "pyproj-3.3.1.tar.gz", hash = "sha256:b3d8e14d91cc95fb3dbc03a9d0588ac58326803eefa5bbb0978d109de3304fbe"},
|
||||
]
|
||||
pyrsistent = [
|
||||
{file = "pyrsistent-0.18.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:df46c854f490f81210870e509818b729db4488e1f30f2a1ce1698b2295a878d1"},
|
||||
|
@ -3161,8 +3217,8 @@ traitlets = [
|
|||
{file = "traitlets-5.1.1.tar.gz", hash = "sha256:059f456c5a7c1c82b98c2e8c799f39c9b8128f6d0d46941ee118daace9eb70c7"},
|
||||
]
|
||||
types-requests = [
|
||||
{file = "types-requests-2.27.19.tar.gz", hash = "sha256:795e378117088d1e4bf41a2c01a153b73d6ea40aa9d7c0ac753abde84c0d3a8f"},
|
||||
{file = "types_requests-2.27.19-py3-none-any.whl", hash = "sha256:c6c5384677d98f212516de50c4b2c38ef659b93008fbc5bb4b81726138bc8485"},
|
||||
{file = "types-requests-2.27.22.tar.gz", hash = "sha256:2e81a74d2db1e6d06baa4a9e1896720543739297a23daac0436a34e2fc732574"},
|
||||
{file = "types_requests-2.27.22-py3-none-any.whl", hash = "sha256:58730c31469fb959a21496d97d2e59c06ca6de2ccdfecb583cb924b83cb0811e"},
|
||||
]
|
||||
types-urllib3 = [
|
||||
{file = "types-urllib3-1.26.13.tar.gz", hash = "sha256:40f8fb5e8cd7d57e8aefdee3fdd5e930aa1a1bb4179cdadd55226cea588af790"},
|
||||
|
|
|
@ -39,6 +39,7 @@ tqdm = "4.62.0"
|
|||
types-requests = "^2.25.0"
|
||||
us = "^2.0.2"
|
||||
xlsxwriter = "^2.0.0"
|
||||
pydantic = "^1.9.0"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
black = {version = "^21.6b0", allow-prereleases = true}
|
||||
|
|
Loading…
Add table
Reference in a new issue