Issue 308 python linting (#443)

* Adds flake8, pylint, liccheck, flake8 to dependencies for data-pipeline

* Sets up and runs black autoformatting

* Adds flake8 to tox linting

* Fixes flake8 error F541 f string missing placeholders

* Fixes flake8 E501 line too long

* Fixes flake8 F401 imported but not used

* Adds pylint to tox and disables the following pylint errors:
- C0114: module docstrings
- R0201: method could have been a function
- R0903: too few public methods
- C0103: name case styling
- W0511: fix me
- W1203: f-string interpolation in logging

* Adds utils.py to tox.ini linting, runs black on utils.py

* Fixes import related pylint errors: C0411 and C0412

* Fixes or ignores remaining pylint errors (for discussion later)

* Adds safety and liccheck to tox.ini
This commit is contained in:
Billy Daly 2021-08-02 12:16:38 -04:00 committed by GitHub
parent 51f7666062
commit 5504528fdf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 709 additions and 228 deletions

View file

@ -0,0 +1,7 @@
[flake8]
ignore =
E266, # to many leading '#' for block comment
W503 # line break before binary operator
max-line-length = 150
max-complexity = 18
select = B,C,E,F,W,T4,B9

View file

@ -1,7 +1,6 @@
import click import click
from config import settings from config import settings
from etl.sources.census.etl_utils import reset_data_directories as census_reset
from utils import ( from utils import (
get_module_logger, get_module_logger,
data_folder_cleanup, data_folder_cleanup,
@ -9,6 +8,7 @@ from utils import (
temp_folder_cleanup, temp_folder_cleanup,
) )
from etl.sources.census.etl import download_census_csvs from etl.sources.census.etl import download_census_csvs
from etl.sources.census.etl_utils import reset_data_directories as census_reset
from etl.runner import etl_runner, score_generate, score_geo from etl.runner import etl_runner, score_generate, score_geo
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
@ -30,7 +30,7 @@ def census_cleanup():
data_path = settings.APP_ROOT / "data" data_path = settings.APP_ROOT / "data"
# census directories # census directories
logger.info(f"Initializing all census data") logger.info("Initializing all census data")
census_reset(data_path) census_reset(data_path)
logger.info("Cleaned up all census data files") logger.info("Cleaned up all census data files")

View file

@ -1,6 +1,7 @@
from dynaconf import Dynaconf
from pathlib import Path from pathlib import Path
from dynaconf import Dynaconf
settings = Dynaconf( settings = Dynaconf(
envvar_prefix="DYNACONF", envvar_prefix="DYNACONF",
settings_files=["settings.toml", ".secrets.toml"], settings_files=["settings.toml", ".secrets.toml"],

View file

@ -1,11 +1,10 @@
from pathlib import Path from pathlib import Path
import pathlib
from config import settings from config import settings
from utils import unzip_file_from_url, remove_all_from_dir from utils import unzip_file_from_url, remove_all_from_dir
class ExtractTransformLoad(object): class ExtractTransformLoad:
""" """
A class used to instantiate an ETL object to retrieve and process data from A class used to instantiate an ETL object to retrieve and process data from
datasets. datasets.
@ -34,9 +33,7 @@ class ExtractTransformLoad(object):
pass pass
def extract( def extract(self, source_url: str = None, extract_path: Path = None) -> None:
self, source_url: str = None, extract_path: Path = None
) -> None:
"""Extract the data from """Extract the data from
a remote source. By default it provides code to get the file from a source url, a remote source. By default it provides code to get the file from a source url,
unzips it and stores it on an extract_path.""" unzips it and stores it on an extract_path."""

View file

@ -67,9 +67,7 @@ def etl_runner(dataset_to_run: str = None) -> None:
# Run the ETLs for the dataset_list # Run the ETLs for the dataset_list
for dataset in dataset_list: for dataset in dataset_list:
etl_module = importlib.import_module( etl_module = importlib.import_module(f"etl.sources.{dataset['module_dir']}.etl")
f"etl.sources.{dataset['module_dir']}.etl"
)
etl_class = getattr(etl_module, dataset["class_name"]) etl_class = getattr(etl_module, dataset["class_name"])
etl_instance = etl_class() etl_instance = etl_class()

View file

@ -4,7 +4,6 @@ import pandas as pd
from etl.base import ExtractTransformLoad from etl.base import ExtractTransformLoad
from utils import get_module_logger from utils import get_module_logger
from etl.sources.census.etl_utils import get_state_fips_codes
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
@ -28,10 +27,10 @@ class ScoreETL(ExtractTransformLoad):
self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)" self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)"
self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)" self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)"
self.HOUSING_BURDEN_FIELD_NAME = "Housing burden (percent)" self.HOUSING_BURDEN_FIELD_NAME = "Housing burden (percent)"
self.POVERTY_FIELD_NAME = ( self.POVERTY_FIELD_NAME = "Poverty (Less than 200% of federal poverty line)"
"Poverty (Less than 200% of federal poverty line)" self.HIGH_SCHOOL_FIELD_NAME = (
"Percent individuals age 25 or over with less than high school degree"
) )
self.HIGH_SCHOOL_FIELD_NAME = "Percent individuals age 25 or over with less than high school degree"
# There's another aggregation level (a second level of "buckets"). # There's another aggregation level (a second level of "buckets").
self.AGGREGATION_POLLUTION = "Pollution Burden" self.AGGREGATION_POLLUTION = "Pollution Burden"
@ -55,9 +54,7 @@ class ScoreETL(ExtractTransformLoad):
self.ejscreen_df = pd.read_csv( self.ejscreen_df = pd.read_csv(
ejscreen_csv, dtype={"ID": "string"}, low_memory=False ejscreen_csv, dtype={"ID": "string"}, low_memory=False
) )
self.ejscreen_df.rename( self.ejscreen_df.rename(columns={"ID": self.GEOID_FIELD_NAME}, inplace=True)
columns={"ID": self.GEOID_FIELD_NAME}, inplace=True
)
# Load census data # Load census data
census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv" census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
@ -69,10 +66,7 @@ class ScoreETL(ExtractTransformLoad):
# Load housing and transportation data # Load housing and transportation data
housing_and_transportation_index_csv = ( housing_and_transportation_index_csv = (
self.DATA_PATH self.DATA_PATH / "dataset" / "housing_and_transportation_index" / "usa.csv"
/ "dataset"
/ "housing_and_transportation_index"
/ "usa.csv"
) )
self.housing_and_transportation_df = pd.read_csv( self.housing_and_transportation_df = pd.read_csv(
housing_and_transportation_index_csv, housing_and_transportation_index_csv,
@ -89,7 +83,7 @@ class ScoreETL(ExtractTransformLoad):
) )
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Transforming Score Data") logger.info("Transforming Score Data")
# Join all the data sources that use census block groups # Join all the data sources that use census block groups
census_block_group_dfs = [ census_block_group_dfs = [
@ -106,10 +100,7 @@ class ScoreETL(ExtractTransformLoad):
) )
# Sanity check the join. # Sanity check the join.
if ( if len(census_block_group_df[self.GEOID_FIELD_NAME].str.len().unique()) != 1:
len(census_block_group_df[self.GEOID_FIELD_NAME].str.len().unique())
!= 1
):
raise ValueError( raise ValueError(
f"One of the input CSVs uses {self.GEOID_FIELD_NAME} with a different length." f"One of the input CSVs uses {self.GEOID_FIELD_NAME} with a different length."
) )
@ -119,9 +110,9 @@ class ScoreETL(ExtractTransformLoad):
census_tract_df = self.hud_housing_df census_tract_df = self.hud_housing_df
# Calculate the tract for the CBG data. # Calculate the tract for the CBG data.
census_block_group_df[ census_block_group_df[self.GEOID_TRACT_FIELD_NAME] = census_block_group_df[
self.GEOID_TRACT_FIELD_NAME self.GEOID_FIELD_NAME
] = census_block_group_df[self.GEOID_FIELD_NAME].str[0:11] ].str[0:11]
self.df = census_block_group_df.merge( self.df = census_block_group_df.merge(
census_tract_df, on=self.GEOID_TRACT_FIELD_NAME census_tract_df, on=self.GEOID_TRACT_FIELD_NAME
@ -254,8 +245,7 @@ class ScoreETL(ExtractTransformLoad):
# Rename columns: # Rename columns:
renaming_dict = { renaming_dict = {
data_set.input_field: data_set.renamed_field data_set.input_field: data_set.renamed_field for data_set in data_sets
for data_set in data_sets
} }
self.df.rename( self.df.rename(
@ -310,7 +300,7 @@ class ScoreETL(ExtractTransformLoad):
) / (max_value - min_value) ) / (max_value - min_value)
# Graph distributions and correlations. # Graph distributions and correlations.
min_max_fields = [ min_max_fields = [ # noqa: F841
f"{data_set.renamed_field}{self.MIN_MAX_FIELD_SUFFIX}" f"{data_set.renamed_field}{self.MIN_MAX_FIELD_SUFFIX}"
for data_set in data_sets for data_set in data_sets
if data_set.renamed_field != self.GEOID_FIELD_NAME if data_set.renamed_field != self.GEOID_FIELD_NAME
@ -324,9 +314,7 @@ class ScoreETL(ExtractTransformLoad):
] ]
].mean(axis=1) ].mean(axis=1)
self.df["Score B"] = ( self.df["Score B"] = (
self.df[ self.df["Poverty (Less than 200% of federal poverty line) (percentile)"]
"Poverty (Less than 200% of federal poverty line) (percentile)"
]
* self.df[ * self.df[
"Percent individuals age 25 or over with less than high school degree (percentile)" "Percent individuals age 25 or over with less than high school degree (percentile)"
] ]
@ -342,21 +330,26 @@ class ScoreETL(ExtractTransformLoad):
] ]
self.df[f"{bucket}"] = self.df[fields_in_bucket].mean(axis=1) self.df[f"{bucket}"] = self.df[fields_in_bucket].mean(axis=1)
# Combine the score from the two Exposures and Environmental Effects buckets into a single score called "Pollution Burden". The math for this score is: (1.0 * Exposures Score + 0.5 * Environment Effects score) / 1.5. # Combine the score from the two Exposures and Environmental Effects buckets
# into a single score called "Pollution Burden".
# The math for this score is:
# (1.0 * Exposures Score + 0.5 * Environment Effects score) / 1.5.
self.df[self.AGGREGATION_POLLUTION] = ( self.df[self.AGGREGATION_POLLUTION] = (
1.0 * self.df[f"{self.BUCKET_EXPOSURES}"] 1.0 * self.df[f"{self.BUCKET_EXPOSURES}"]
+ 0.5 * self.df[f"{self.BUCKET_ENVIRONMENTAL}"] + 0.5 * self.df[f"{self.BUCKET_ENVIRONMENTAL}"]
) / 1.5 ) / 1.5
# Average the score from the two Sensitive populations and Socioeconomic factors buckets into a single score called "Population Characteristics". # Average the score from the two Sensitive populations and
# Socioeconomic factors buckets into a single score called
# "Population Characteristics".
self.df[self.AGGREGATION_POPULATION] = self.df[ self.df[self.AGGREGATION_POPULATION] = self.df[
[f"{self.BUCKET_SENSITIVE}", f"{self.BUCKET_SOCIOECONOMIC}"] [f"{self.BUCKET_SENSITIVE}", f"{self.BUCKET_SOCIOECONOMIC}"]
].mean(axis=1) ].mean(axis=1)
# Multiply the "Pollution Burden" score and the "Population Characteristics" together to produce the cumulative impact score. # Multiply the "Pollution Burden" score and the "Population Characteristics"
# together to produce the cumulative impact score.
self.df["Score C"] = ( self.df["Score C"] = (
self.df[self.AGGREGATION_POLLUTION] self.df[self.AGGREGATION_POLLUTION] * self.df[self.AGGREGATION_POPULATION]
* self.df[self.AGGREGATION_POPULATION]
) )
if len(census_block_group_df) > 220333: if len(census_block_group_df) > 220333:
@ -371,12 +364,10 @@ class ScoreETL(ExtractTransformLoad):
] ]
fields_min_max = [ fields_min_max = [
f"{field}{self.MIN_MAX_FIELD_SUFFIX}" f"{field}{self.MIN_MAX_FIELD_SUFFIX}" for field in fields_to_use_in_score
for field in fields_to_use_in_score
] ]
fields_percentile = [ fields_percentile = [
f"{field}{self.PERCENTILE_FIELD_SUFFIX}" f"{field}{self.PERCENTILE_FIELD_SUFFIX}" for field in fields_to_use_in_score
for field in fields_to_use_in_score
] ]
# Calculate "Score D", which uses min-max normalization # Calculate "Score D", which uses min-max normalization
@ -396,17 +387,22 @@ class ScoreETL(ExtractTransformLoad):
"Score E", "Score E",
"Poverty (Less than 200% of federal poverty line)", "Poverty (Less than 200% of federal poverty line)",
]: ]:
self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] = self.df[score_field].rank(pct=True) self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] = self.df[
score_field
].rank(pct=True)
for threshold in [0.25, 0.3, 0.35, 0.4]: for threshold in [0.25, 0.3, 0.35, 0.4]:
fraction_converted_to_percent = int(100 * threshold) fraction_converted_to_percent = int(100 * threshold)
self.df[f"{score_field} (top {fraction_converted_to_percent}th percentile)"] = ( self.df[
self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"] >= 1 - threshold f"{score_field} (top {fraction_converted_to_percent}th percentile)"
] = (
self.df[f"{score_field}{self.PERCENTILE_FIELD_SUFFIX}"]
>= 1 - threshold
) )
def load(self) -> None: def load(self) -> None:
logger.info(f"Saving Score CSV") logger.info("Saving Score CSV")
# write nationwide csv # write nationwide csv
self.SCORE_CSV_PATH.mkdir(parents=True, exist_ok=True) self.SCORE_CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(self.SCORE_CSV_PATH / f"usa.csv", index=False) self.df.to_csv(self.SCORE_CSV_PATH / "usa.csv", index=False)

View file

@ -1,6 +1,7 @@
import math
import pandas as pd import pandas as pd
import geopandas as gpd import geopandas as gpd
import math
from etl.base import ExtractTransformLoad from etl.base import ExtractTransformLoad
from utils import get_module_logger from utils import get_module_logger
@ -21,9 +22,7 @@ class GeoScoreETL(ExtractTransformLoad):
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv" self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
self.TILE_SCORE_CSV = self.SCORE_CSV_PATH / "tiles" / "usa.csv" self.TILE_SCORE_CSV = self.SCORE_CSV_PATH / "tiles" / "usa.csv"
self.CENSUS_USA_GEOJSON = ( self.CENSUS_USA_GEOJSON = self.DATA_PATH / "census" / "geojson" / "us.json"
self.DATA_PATH / "census" / "geojson" / "us.json"
)
self.TARGET_SCORE_NAME = "Score E (percentile)" self.TARGET_SCORE_NAME = "Score E (percentile)"
self.TARGET_SCORE_RENAME_TO = "E_SCORE" self.TARGET_SCORE_RENAME_TO = "E_SCORE"
@ -36,7 +35,7 @@ class GeoScoreETL(ExtractTransformLoad):
self.geojson_score_usa_low: gpd.GeoDataFrame self.geojson_score_usa_low: gpd.GeoDataFrame
def extract(self) -> None: def extract(self) -> None:
logger.info(f"Reading US GeoJSON (~6 minutes)") logger.info("Reading US GeoJSON (~6 minutes)")
self.geojson_usa_df = gpd.read_file( self.geojson_usa_df = gpd.read_file(
self.CENSUS_USA_GEOJSON, self.CENSUS_USA_GEOJSON,
dtype={"GEOID10": "string"}, dtype={"GEOID10": "string"},
@ -45,7 +44,7 @@ class GeoScoreETL(ExtractTransformLoad):
) )
self.geojson_usa_df.head() self.geojson_usa_df.head()
logger.info(f"Reading score CSV") logger.info("Reading score CSV")
self.score_usa_df = pd.read_csv( self.score_usa_df = pd.read_csv(
self.TILE_SCORE_CSV, self.TILE_SCORE_CSV,
dtype={"GEOID10": "string"}, dtype={"GEOID10": "string"},
@ -53,11 +52,11 @@ class GeoScoreETL(ExtractTransformLoad):
) )
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Pruning Census GeoJSON") logger.info("Pruning Census GeoJSON")
fields = ["GEOID10", "geometry"] fields = ["GEOID10", "geometry"]
self.geojson_usa_df = self.geojson_usa_df[fields] self.geojson_usa_df = self.geojson_usa_df[fields]
logger.info(f"Merging and compressing score CSV with USA GeoJSON") logger.info("Merging and compressing score CSV with USA GeoJSON")
self.geojson_score_usa_high = self.score_usa_df.merge( self.geojson_score_usa_high = self.score_usa_df.merge(
self.geojson_usa_df, on="GEOID10", how="left" self.geojson_usa_df, on="GEOID10", how="left"
) )
@ -75,7 +74,7 @@ class GeoScoreETL(ExtractTransformLoad):
inplace=True, inplace=True,
) )
logger.info(f"Aggregating into tracts (~5 minutes)") logger.info("Aggregating into tracts (~5 minutes)")
usa_tracts = self._aggregate_to_tracts(usa_simplified) usa_tracts = self._aggregate_to_tracts(usa_simplified)
usa_tracts = gpd.GeoDataFrame( usa_tracts = gpd.GeoDataFrame(
@ -84,17 +83,15 @@ class GeoScoreETL(ExtractTransformLoad):
crs="EPSG:4326", crs="EPSG:4326",
) )
logger.info(f"Creating buckets from tracts") logger.info("Creating buckets from tracts")
usa_bucketed = self._create_buckets_from_tracts( usa_bucketed = self._create_buckets_from_tracts(
usa_tracts, self.NUMBER_OF_BUCKETS usa_tracts, self.NUMBER_OF_BUCKETS
) )
logger.info(f"Aggregating buckets") logger.info("Aggregating buckets")
usa_aggregated = self._aggregate_buckets(usa_bucketed, agg_func="mean") usa_aggregated = self._aggregate_buckets(usa_bucketed, agg_func="mean")
compressed = self._breakup_multipolygons( compressed = self._breakup_multipolygons(usa_aggregated, self.NUMBER_OF_BUCKETS)
usa_aggregated, self.NUMBER_OF_BUCKETS
)
self.geojson_score_usa_low = gpd.GeoDataFrame( self.geojson_score_usa_low = gpd.GeoDataFrame(
compressed, compressed,
@ -118,9 +115,7 @@ class GeoScoreETL(ExtractTransformLoad):
# assign tracts to buckets by D_SCORE # assign tracts to buckets by D_SCORE
state_tracts.sort_values(self.TARGET_SCORE_RENAME_TO, inplace=True) state_tracts.sort_values(self.TARGET_SCORE_RENAME_TO, inplace=True)
SCORE_bucket = [] SCORE_bucket = []
bucket_size = math.ceil( bucket_size = math.ceil(len(state_tracts.index) / self.NUMBER_OF_BUCKETS)
len(state_tracts.index) / self.NUMBER_OF_BUCKETS
)
for i in range(len(state_tracts.index)): for i in range(len(state_tracts.index)):
SCORE_bucket.extend([math.floor(i / bucket_size)]) SCORE_bucket.extend([math.floor(i / bucket_size)])
state_tracts[f"{self.TARGET_SCORE_RENAME_TO}_bucket"] = SCORE_bucket state_tracts[f"{self.TARGET_SCORE_RENAME_TO}_bucket"] = SCORE_bucket
@ -155,14 +150,10 @@ class GeoScoreETL(ExtractTransformLoad):
return compressed return compressed
def load(self) -> None: def load(self) -> None:
logger.info(f"Writing usa-high (~9 minutes)") logger.info("Writing usa-high (~9 minutes)")
self.geojson_score_usa_high.to_file( self.geojson_score_usa_high.to_file(self.SCORE_HIGH_GEOJSON, driver="GeoJSON")
self.SCORE_HIGH_GEOJSON, driver="GeoJSON" logger.info("Completed writing usa-high")
)
logger.info(f"Completed writing usa-high")
logger.info(f"Writing usa-low (~9 minutes)") logger.info("Writing usa-low (~9 minutes)")
self.geojson_score_usa_low.to_file( self.geojson_score_usa_low.to_file(self.SCORE_LOW_GEOJSON, driver="GeoJSON")
self.SCORE_LOW_GEOJSON, driver="GeoJSON" logger.info("Completed writing usa-low")
)
logger.info(f"Completed writing usa-low")

View file

@ -19,9 +19,7 @@ class PostScoreETL(ExtractTransformLoad):
self.CENSUS_USA_CSV = self.DATA_PATH / "census" / "csv" / "us.csv" self.CENSUS_USA_CSV = self.DATA_PATH / "census" / "csv" / "us.csv"
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv" self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
self.STATE_CSV = ( self.STATE_CSV = self.DATA_PATH / "census" / "csv" / "fips_states_2010.csv"
self.DATA_PATH / "census" / "csv" / "fips_states_2010.csv"
)
self.FULL_SCORE_CSV = self.SCORE_CSV_PATH / "full" / "usa.csv" self.FULL_SCORE_CSV = self.SCORE_CSV_PATH / "full" / "usa.csv"
self.TILR_SCORE_CSV = self.SCORE_CSV_PATH / "tile" / "usa.csv" self.TILR_SCORE_CSV = self.SCORE_CSV_PATH / "tile" / "usa.csv"
@ -49,7 +47,7 @@ class PostScoreETL(ExtractTransformLoad):
self.TMP_PATH, self.TMP_PATH,
) )
logger.info(f"Reading Counties CSV") logger.info("Reading Counties CSV")
self.counties_df = pd.read_csv( self.counties_df = pd.read_csv(
self.CENSUS_COUNTIES_TXT, self.CENSUS_COUNTIES_TXT,
sep="\t", sep="\t",
@ -58,16 +56,14 @@ class PostScoreETL(ExtractTransformLoad):
encoding="latin-1", encoding="latin-1",
) )
logger.info(f"Reading States CSV") logger.info("Reading States CSV")
self.states_df = pd.read_csv( self.states_df = pd.read_csv(
self.STATE_CSV, dtype={"fips": "string", "state_code": "string"} self.STATE_CSV, dtype={"fips": "string", "state_code": "string"}
) )
self.score_df = pd.read_csv( self.score_df = pd.read_csv(self.FULL_SCORE_CSV, dtype={"GEOID10": "string"})
self.FULL_SCORE_CSV, dtype={"GEOID10": "string"}
)
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Transforming data sources for Score + County CSV") logger.info("Transforming data sources for Score + County CSV")
# rename some of the columns to prepare for merge # rename some of the columns to prepare for merge
self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]] self.counties_df = self.counties_df[["USPS", "GEOID", "NAME"]]
@ -101,7 +97,7 @@ class PostScoreETL(ExtractTransformLoad):
) )
# check if there are census cbgs without score # check if there are census cbgs without score
logger.info(f"Removing CBG rows without score") logger.info("Removing CBG rows without score")
## load cbgs ## load cbgs
cbg_usa_df = pd.read_csv( cbg_usa_df = pd.read_csv(
@ -121,19 +117,19 @@ class PostScoreETL(ExtractTransformLoad):
null_cbg_df = merged_df[merged_df["Score E (percentile)"].isnull()] null_cbg_df = merged_df[merged_df["Score E (percentile)"].isnull()]
# subsctract data sets # subsctract data sets
removed_df = pd.concat( removed_df = pd.concat([merged_df, null_cbg_df, null_cbg_df]).drop_duplicates(
[merged_df, null_cbg_df, null_cbg_df] keep=False
).drop_duplicates(keep=False) )
# set the score to the new df # set the score to the new df
self.score_county_state_merged = removed_df self.score_county_state_merged = removed_df
def load(self) -> None: def load(self) -> None:
logger.info(f"Saving Full Score CSV with County Information") logger.info("Saving Full Score CSV with County Information")
self.SCORE_CSV_PATH.mkdir(parents=True, exist_ok=True) self.SCORE_CSV_PATH.mkdir(parents=True, exist_ok=True)
self.score_county_state_merged.to_csv(self.FULL_SCORE_CSV, index=False) self.score_county_state_merged.to_csv(self.FULL_SCORE_CSV, index=False)
logger.info(f"Saving Tile Score CSV") logger.info("Saving Tile Score CSV")
# TODO: check which are the columns we'll use # TODO: check which are the columns we'll use
# Related to: https://github.com/usds/justice40-tool/issues/302 # Related to: https://github.com/usds/justice40-tool/issues/302
score_tiles = self.score_county_state_merged[self.TILES_SCORE_COLUMNS] score_tiles = self.score_county_state_merged[self.TILES_SCORE_COLUMNS]

View file

@ -9,16 +9,12 @@ logger = get_module_logger(__name__)
class CalEnviroScreenETL(ExtractTransformLoad): class CalEnviroScreenETL(ExtractTransformLoad):
def __init__(self): def __init__(self):
self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/data-sources/CalEnviroScreen_4.0_2021.zip" self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/data-sources/CalEnviroScreen_4.0_2021.zip"
self.CALENVIROSCREEN_CSV = ( self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
)
self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4" self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4"
# Definining some variable names # Definining some variable names
self.CALENVIROSCREEN_SCORE_FIELD_NAME = "calenviroscreen_score" self.CALENVIROSCREEN_SCORE_FIELD_NAME = "calenviroscreen_score"
self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = ( self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = "calenviroscreen_percentile"
"calenviroscreen_percentile"
)
self.CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME = ( self.CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME = (
"calenviroscreen_priority_community" "calenviroscreen_priority_community"
) )
@ -30,14 +26,14 @@ class CalEnviroScreenETL(ExtractTransformLoad):
self.df: pd.DataFrame self.df: pd.DataFrame
def extract(self) -> None: def extract(self) -> None:
logger.info(f"Downloading CalEnviroScreen Data") logger.info("Downloading CalEnviroScreen Data")
super().extract( super().extract(
self.CALENVIROSCREEN_FTP_URL, self.CALENVIROSCREEN_FTP_URL,
self.TMP_PATH, self.TMP_PATH,
) )
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Transforming CalEnviroScreen Data") logger.info("Transforming CalEnviroScreen Data")
# Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically: # Data from https://calenviroscreen-oehha.hub.arcgis.com/#Data, specifically:
# https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip # https://oehha.ca.gov/media/downloads/calenviroscreen/document/calenviroscreen40resultsdatadictionaryd12021.zip
@ -67,7 +63,7 @@ class CalEnviroScreenETL(ExtractTransformLoad):
) )
def load(self) -> None: def load(self) -> None:
logger.info(f"Saving CalEnviroScreen CSV") logger.info("Saving CalEnviroScreen CSV")
# write nationwide csv # write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True) self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(self.CSV_PATH / f"data06.csv", index=False) self.df.to_csv(self.CSV_PATH / "data06.csv", index=False)

View file

@ -1,11 +1,12 @@
import csv
import os import os
import csv
import json import json
from pathlib import Path from pathlib import Path
import geopandas as gpd import geopandas as gpd
from .etl_utils import get_state_fips_codes
from utils import unzip_file_from_url, get_module_logger from utils import unzip_file_from_url, get_module_logger
from .etl_utils import get_state_fips_codes
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
@ -29,9 +30,7 @@ def download_census_csvs(data_path: Path) -> None:
for fips in state_fips_codes: for fips in state_fips_codes:
# check if file exists # check if file exists
shp_file_path = ( shp_file_path = data_path / "census" / "shp" / fips / f"tl_2010_{fips}_bg10.shp"
data_path / "census" / "shp" / fips / f"tl_2010_{fips}_bg10.shp"
)
logger.info(f"Checking if {fips} file exists") logger.info(f"Checking if {fips} file exists")
if not os.path.isfile(shp_file_path): if not os.path.isfile(shp_file_path):
@ -110,7 +109,7 @@ def download_census_csvs(data_path: Path) -> None:
) )
## create national geojson ## create national geojson
logger.info(f"Generating national geojson file") logger.info("Generating national geojson file")
usa_df = gpd.GeoDataFrame() usa_df = gpd.GeoDataFrame()
for file_name in geojson_dir_path.rglob("*.json"): for file_name in geojson_dir_path.rglob("*.json"):
@ -119,7 +118,7 @@ def download_census_csvs(data_path: Path) -> None:
usa_df = usa_df.append(state_gdf) usa_df = usa_df.append(state_gdf)
usa_df = usa_df.to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs") usa_df = usa_df.to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
logger.info(f"Writing national geojson file") logger.info("Writing national geojson file")
usa_df.to_file(geojson_dir_path / "us.json", driver="GeoJSON") usa_df.to_file(geojson_dir_path / "us.json", driver="GeoJSON")
logger.info("Census block groups downloading complete") logger.info("Census block groups downloading complete")

View file

@ -1,7 +1,8 @@
from pathlib import Path
import csv
import pandas as pd
import os import os
import csv
from pathlib import Path
import pandas as pd
from config import settings from config import settings
from utils import ( from utils import (
@ -35,7 +36,7 @@ def get_state_fips_codes(data_path: Path) -> list:
# check if file exists # check if file exists
if not os.path.isfile(fips_csv_path): if not os.path.isfile(fips_csv_path):
logger.info(f"Downloading fips from S3 repository") logger.info("Downloading fips from S3 repository")
unzip_file_from_url( unzip_file_from_url(
settings.AWS_JUSTICE40_DATA_URL + "/Census/fips_states_2010.zip", settings.AWS_JUSTICE40_DATA_URL + "/Census/fips_states_2010.zip",
data_path / "tmp", data_path / "tmp",

View file

@ -11,14 +11,10 @@ logger = get_module_logger(__name__)
class CensusACSETL(ExtractTransformLoad): class CensusACSETL(ExtractTransformLoad):
def __init__(self): def __init__(self):
self.ACS_YEAR = 2019 self.ACS_YEAR = 2019
self.OUTPUT_PATH = ( self.OUTPUT_PATH = self.DATA_PATH / "dataset" / f"census_acs_{self.ACS_YEAR}"
self.DATA_PATH / "dataset" / f"census_acs_{self.ACS_YEAR}"
)
self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)" self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)"
self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)" self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)"
self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = ( self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = "Linguistic isolation (total)"
"Linguistic isolation (total)"
)
self.LINGUISTIC_ISOLATION_FIELDS = [ self.LINGUISTIC_ISOLATION_FIELDS = [
"C16002_001E", "C16002_001E",
"C16002_004E", "C16002_004E",
@ -28,9 +24,7 @@ class CensusACSETL(ExtractTransformLoad):
] ]
self.df: pd.DataFrame self.df: pd.DataFrame
def _fips_from_censusdata_censusgeo( def _fips_from_censusdata_censusgeo(self, censusgeo: censusdata.censusgeo) -> str:
self, censusgeo: censusdata.censusgeo
) -> str:
"""Create a FIPS code from the proprietary censusgeo index.""" """Create a FIPS code from the proprietary censusgeo index."""
fips = "".join([value for (key, value) in censusgeo.params()]) fips = "".join([value for (key, value) in censusgeo.params()])
return fips return fips
@ -38,9 +32,7 @@ class CensusACSETL(ExtractTransformLoad):
def extract(self) -> None: def extract(self) -> None:
dfs = [] dfs = []
for fips in get_state_fips_codes(self.DATA_PATH): for fips in get_state_fips_codes(self.DATA_PATH):
logger.info( logger.info(f"Downloading data for state/territory with FIPS code {fips}")
f"Downloading data for state/territory with FIPS code {fips}"
)
dfs.append( dfs.append(
censusdata.download( censusdata.download(
@ -65,13 +57,11 @@ class CensusACSETL(ExtractTransformLoad):
) )
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Starting Census ACS Transform") logger.info("Starting Census ACS Transform")
# Calculate percent unemployment. # Calculate percent unemployment.
# TODO: remove small-sample data that should be `None` instead of a high-variance fraction. # TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
self.df[self.UNEMPLOYED_FIELD_NAME] = ( self.df[self.UNEMPLOYED_FIELD_NAME] = self.df.B23025_005E / self.df.B23025_003E
self.df.B23025_005E / self.df.B23025_003E
)
# Calculate linguistic isolation. # Calculate linguistic isolation.
individual_limited_english_fields = [ individual_limited_english_fields = [
@ -92,7 +82,7 @@ class CensusACSETL(ExtractTransformLoad):
self.df[self.LINGUISTIC_ISOLATION_FIELD_NAME].describe() self.df[self.LINGUISTIC_ISOLATION_FIELD_NAME].describe()
def load(self) -> None: def load(self) -> None:
logger.info(f"Saving Census ACS Data") logger.info("Saving Census ACS Data")
# mkdir census # mkdir census
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True) self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
@ -108,6 +98,6 @@ class CensusACSETL(ExtractTransformLoad):
) )
def validate(self) -> None: def validate(self) -> None:
logger.info(f"Validating Census ACS Data") logger.info("Validating Census ACS Data")
pass pass

View file

@ -8,20 +8,22 @@ logger = get_module_logger(__name__)
class EJScreenETL(ExtractTransformLoad): class EJScreenETL(ExtractTransformLoad):
def __init__(self): def __init__(self):
self.EJSCREEN_FTP_URL = "https://gaftp.epa.gov/EJSCREEN/2019/EJSCREEN_2019_StatePctile.csv.zip" self.EJSCREEN_FTP_URL = (
"https://gaftp.epa.gov/EJSCREEN/2019/EJSCREEN_2019_StatePctile.csv.zip"
)
self.EJSCREEN_CSV = self.TMP_PATH / "EJSCREEN_2019_StatePctiles.csv" self.EJSCREEN_CSV = self.TMP_PATH / "EJSCREEN_2019_StatePctiles.csv"
self.CSV_PATH = self.DATA_PATH / "dataset" / "ejscreen_2019" self.CSV_PATH = self.DATA_PATH / "dataset" / "ejscreen_2019"
self.df: pd.DataFrame self.df: pd.DataFrame
def extract(self) -> None: def extract(self) -> None:
logger.info(f"Downloading EJScreen Data") logger.info("Downloading EJScreen Data")
super().extract( super().extract(
self.EJSCREEN_FTP_URL, self.EJSCREEN_FTP_URL,
self.TMP_PATH, self.TMP_PATH,
) )
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Transforming EJScreen Data") logger.info("Transforming EJScreen Data")
self.df = pd.read_csv( self.df = pd.read_csv(
self.EJSCREEN_CSV, self.EJSCREEN_CSV,
dtype={"ID": "string"}, dtype={"ID": "string"},
@ -31,7 +33,7 @@ class EJScreenETL(ExtractTransformLoad):
) )
def load(self) -> None: def load(self) -> None:
logger.info(f"Saving EJScreen CSV") logger.info("Saving EJScreen CSV")
# write nationwide csv # write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True) self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(self.CSV_PATH / f"usa.csv", index=False) self.df.to_csv(self.CSV_PATH / "usa.csv", index=False)

View file

@ -35,9 +35,7 @@ class HousingTransportationETL(ExtractTransformLoad):
) )
# New file name: # New file name:
tmp_csv_file_path = ( tmp_csv_file_path = zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
)
tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path) tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
dfs.append(tmp_df) dfs.append(tmp_df)
@ -45,16 +43,16 @@ class HousingTransportationETL(ExtractTransformLoad):
self.df = pd.concat(dfs) self.df = pd.concat(dfs)
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Transforming Housing and Transportation Data") logger.info("Transforming Housing and Transportation Data")
# Rename and reformat block group ID # Rename and reformat block group ID
self.df.rename(columns={"blkgrp": self.GEOID_FIELD_NAME}, inplace=True) self.df.rename(columns={"blkgrp": self.GEOID_FIELD_NAME}, inplace=True)
self.df[self.GEOID_FIELD_NAME] = self.df[ self.df[self.GEOID_FIELD_NAME] = self.df[self.GEOID_FIELD_NAME].str.replace(
self.GEOID_FIELD_NAME '"', ""
].str.replace('"', "") )
def load(self) -> None: def load(self) -> None:
logger.info(f"Saving Housing and Transportation Data") logger.info("Saving Housing and Transportation Data")
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True) self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False) self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)

View file

@ -1,8 +1,7 @@
import pandas as pd import pandas as pd
from etl.base import ExtractTransformLoad from etl.base import ExtractTransformLoad
from etl.sources.census.etl_utils import get_state_fips_codes from utils import get_module_logger
from utils import get_module_logger, unzip_file_from_url, remove_all_from_dir
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
@ -11,33 +10,37 @@ class HudHousingETL(ExtractTransformLoad):
def __init__(self): def __init__(self):
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "hud_housing" self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "hud_housing"
self.GEOID_TRACT_FIELD_NAME = "GEOID10_TRACT" self.GEOID_TRACT_FIELD_NAME = "GEOID10_TRACT"
self.HOUSING_FTP_URL = "https://www.huduser.gov/portal/datasets/cp/2012thru2016-140-csv.zip" self.HOUSING_FTP_URL = (
"https://www.huduser.gov/portal/datasets/cp/2012thru2016-140-csv.zip"
)
self.HOUSING_ZIP_FILE_DIR = self.TMP_PATH / "hud_housing" self.HOUSING_ZIP_FILE_DIR = self.TMP_PATH / "hud_housing"
# We measure households earning less than 80% of HUD Area Median Family Income by county # We measure households earning less than 80% of HUD Area Median Family Income by county
# and paying greater than 30% of their income to housing costs. # and paying greater than 30% of their income to housing costs.
self.HOUSING_BURDEN_FIELD_NAME = "Housing burden (percent)" self.HOUSING_BURDEN_FIELD_NAME = "Housing burden (percent)"
self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME = "HOUSING_BURDEN_NUMERATOR" self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME = "HOUSING_BURDEN_NUMERATOR"
self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME = ( self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME = "HOUSING_BURDEN_DENOMINATOR"
"HOUSING_BURDEN_DENOMINATOR"
)
# Note: some variable definitions. # Note: some variable definitions.
# HUD-adjusted median family income (HAMFI). # HUD-adjusted median family income (HAMFI).
# The four housing problems are: incomplete kitchen facilities, incomplete plumbing facilities, more than 1 person per room, and cost burden greater than 30%. # The four housing problems are:
# - incomplete kitchen facilities,
# - incomplete plumbing facilities,
# - more than 1 person per room,
# - cost burden greater than 30%.
# Table 8 is the desired table. # Table 8 is the desired table.
self.df: pd.DataFrame self.df: pd.DataFrame
def extract(self) -> None: def extract(self) -> None:
logger.info(f"Extracting HUD Housing Data") logger.info("Extracting HUD Housing Data")
super().extract( super().extract(
self.HOUSING_FTP_URL, self.HOUSING_FTP_URL,
self.HOUSING_ZIP_FILE_DIR, self.HOUSING_ZIP_FILE_DIR,
) )
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Transforming HUD Housing Data") logger.info("Transforming HUD Housing Data")
# New file name: # New file name:
tmp_csv_file_path = ( tmp_csv_file_path = (
@ -53,9 +56,7 @@ class HudHousingETL(ExtractTransformLoad):
) )
# Rename and reformat block group ID # Rename and reformat block group ID
self.df.rename( self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)
columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True
)
# The CHAS data has census tract ids such as `14000US01001020100` # The CHAS data has census tract ids such as `14000US01001020100`
# Whereas the rest of our data uses, for the same tract, `01001020100`. # Whereas the rest of our data uses, for the same tract, `01001020100`.
@ -70,69 +71,177 @@ class HudHousingETL(ExtractTransformLoad):
# Owner occupied numerator fields # Owner occupied numerator fields
OWNER_OCCUPIED_NUMERATOR_FIELDS = [ OWNER_OCCUPIED_NUMERATOR_FIELDS = [
# Key: Column Name Line_Type Tenure Household income Cost burden Facilities # Column Name
# T8_est7 Subtotal Owner occupied less than or equal to 30% of HAMFI greater than 30% but less than or equal to 50% All # Line_Type
# Tenure
# Household income
# Cost burden
# Facilities
"T8_est7", "T8_est7",
# T8_est10 Subtotal Owner occupied less than or equal to 30% of HAMFI greater than 50% All # Subtotal
# Owner occupied
# less than or equal to 30% of HAMFI
# greater than 30% but less than or equal to 50%
# All
"T8_est10", "T8_est10",
# T8_est20 Subtotal Owner occupied greater than 30% but less than or equal to 50% of HAMFI greater than 30% but less than or equal to 50% All # Subtotal
# Owner occupied
# less than or equal to 30% of HAMFI
# greater than 50%
# All
"T8_est20", "T8_est20",
# T8_est23 Subtotal Owner occupied greater than 30% but less than or equal to 50% of HAMFI greater than 50% All # Subtotal
# Owner occupied
# greater than 30% but less than or equal to 50% of HAMFI
# greater than 30% but less than or equal to 50%
# All
"T8_est23", "T8_est23",
# T8_est33 Subtotal Owner occupied greater than 50% but less than or equal to 80% of HAMFI greater than 30% but less than or equal to 50% All # Subtotal
# Owner occupied
# greater than 30% but less than or equal to 50% of HAMFI
# greater than 50%
# All
"T8_est33", "T8_est33",
# T8_est36 Subtotal Owner occupied greater than 50% but less than or equal to 80% of HAMFI greater than 50% All # Subtotal
# Owner occupied
# greater than 50% but less than or equal to 80% of HAMFI
# greater than 30% but less than or equal to 50%
# All
"T8_est36", "T8_est36",
# Subtotal
# Owner occupied
# greater than 50% but less than or equal to 80% of HAMFI
# greater than 50%
# All
] ]
# These rows have the values where HAMFI was not computed, b/c of no or negative income. # These rows have the values where HAMFI was not computed, b/c of no or negative income.
OWNER_OCCUPIED_NOT_COMPUTED_FIELDS = [ OWNER_OCCUPIED_NOT_COMPUTED_FIELDS = [
# Key: Column Name Line_Type Tenure Household income Cost burden Facilities # Column Name
# T8_est13 Subtotal Owner occupied less than or equal to 30% of HAMFI not computed (no/negative income) All # Line_Type
# Tenure
# Household income
# Cost burden
# Facilities
"T8_est13", "T8_est13",
# T8_est26 Subtotal Owner occupied greater than 30% but less than or equal to 50% of HAMFI not computed (no/negative income) All # Subtotal
# Owner occupied
# less than or equal to 30% of HAMFI
# not computed (no/negative income)
# All
"T8_est26", "T8_est26",
# T8_est39 Subtotal Owner occupied greater than 50% but less than or equal to 80% of HAMFI not computed (no/negative income) All # Subtotal
# Owner occupied
# greater than 30% but less than or equal to 50% of HAMFI
# not computed (no/negative income)
# All
"T8_est39", "T8_est39",
# T8_est52 Subtotal Owner occupied greater than 80% but less than or equal to 100% of HAMFI not computed (no/negative income) All # Subtotal
# Owner occupied
# greater than 50% but less than or equal to 80% of HAMFI
# not computed (no/negative income)
# All
"T8_est52", "T8_est52",
# T8_est65 Subtotal Owner occupied greater than 100% of HAMFI not computed (no/negative income) All # Subtotal
# Owner occupied
# greater than 80% but less than or equal to 100% of HAMFI
# not computed (no/negative income)
# All
"T8_est65", "T8_est65",
# Subtotal
# Owner occupied
# greater than 100% of HAMFI
# not computed (no/negative income)
# All
] ]
# T8_est2 Subtotal Owner occupied All All All
OWNER_OCCUPIED_POPULATION_FIELD = "T8_est2" OWNER_OCCUPIED_POPULATION_FIELD = "T8_est2"
# Subtotal
# Owner occupied
# All
# All
# All
# Renter occupied numerator fields # Renter occupied numerator fields
RENTER_OCCUPIED_NUMERATOR_FIELDS = [ RENTER_OCCUPIED_NUMERATOR_FIELDS = [
# Key: Column Name Line_Type Tenure Household income Cost burden Facilities # Column Name
# T8_est73 Subtotal Renter occupied less than or equal to 30% of HAMFI greater than 30% but less than or equal to 50% All # Line_Type
# Tenure
# Household income
# Cost burden
# Facilities
"T8_est73", "T8_est73",
# T8_est76 Subtotal Renter occupied less than or equal to 30% of HAMFI greater than 50% All # Subtotal
# Renter occupied
# less than or equal to 30% of HAMFI
# greater than 30% but less than or equal to 50%
# All
"T8_est76", "T8_est76",
# T8_est86 Subtotal Renter occupied greater than 30% but less than or equal to 50% of HAMFI greater than 30% but less than or equal to 50% All # Subtotal
# Renter occupied
# less than or equal to 30% of HAMFI
# greater than 50%
# All
"T8_est86", "T8_est86",
# T8_est89 Subtotal Renter occupied greater than 30% but less than or equal to 50% of HAMFI greater than 50% All # Subtotal
# Renter occupied
# greater than 30% but less than or equal to 50% of HAMFI
# greater than 30% but less than or equal to 50%
# All
"T8_est89", "T8_est89",
# T8_est99 Subtotal Renter occupied greater than 50% but less than or equal to 80% of HAMFI greater than 30% but less than or equal to 50% All # Subtotal
# Renter occupied
# greater than 30% but less than or equal to 50% of HAMFI
# greater than 50%
# All
"T8_est99", "T8_est99",
# T8_est102 Subtotal Renter occupied greater than 50% but less than or equal to 80% of HAMFI greater than 50% All # Subtotal
# Renter occupied greater than 50% but less than or equal to 80% of HAMFI
# greater than 30% but less than or equal to 50%
# All
"T8_est102", "T8_est102",
# Subtotal
# Renter occupied
# greater than 50% but less than or equal to 80% of HAMFI
# greater than 50%
# All
] ]
# These rows have the values where HAMFI was not computed, b/c of no or negative income. # These rows have the values where HAMFI was not computed, b/c of no or negative income.
RENTER_OCCUPIED_NOT_COMPUTED_FIELDS = [ RENTER_OCCUPIED_NOT_COMPUTED_FIELDS = [
# Key: Column Name Line_Type Tenure Household income Cost burden Facilities # Column Name
# T8_est79 Subtotal Renter occupied less than or equal to 30% of HAMFI not computed (no/negative income) All # Line_Type
# Tenure
# Household income
# Cost burden
# Facilities
"T8_est79", "T8_est79",
# T8_est92 Subtotal Renter occupied greater than 30% but less than or equal to 50% of HAMFI not computed (no/negative income) All # Subtotal
# Renter occupied less than or equal to 30% of HAMFI
# not computed (no/negative income)
# All
"T8_est92", "T8_est92",
# T8_est105 Subtotal Renter occupied greater than 50% but less than or equal to 80% of HAMFI not computed (no/negative income) All # Subtotal
# Renter occupied greater than 30% but less than or equal to 50% of HAMFI
# not computed (no/negative income)
# All
"T8_est105", "T8_est105",
# T8_est118 Subtotal Renter occupied greater than 80% but less than or equal to 100% of HAMFI not computed (no/negative income) All # Subtotal
# Renter occupied
# greater than 50% but less than or equal to 80% of HAMFI
# not computed (no/negative income)
# All
"T8_est118", "T8_est118",
# T8_est131 Subtotal Renter occupied greater than 100% of HAMFI not computed (no/negative income) All # Subtotal
# Renter occupied greater than 80% but less than or equal to 100% of HAMFI
# not computed (no/negative income)
# All
"T8_est131", "T8_est131",
# Subtotal
# Renter occupied
# greater than 100% of HAMFI
# not computed (no/negative income)
# All
] ]
# T8_est68 Subtotal Renter occupied All All All # T8_est68 Subtotal Renter occupied All All All
@ -165,14 +274,12 @@ class HudHousingETL(ExtractTransformLoad):
# TODO: add small sample size checks # TODO: add small sample size checks
self.df[self.HOUSING_BURDEN_FIELD_NAME] = self.df[ self.df[self.HOUSING_BURDEN_FIELD_NAME] = self.df[
self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME
].astype(float) / self.df[ ].astype(float) / self.df[self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME].astype(
self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME
].astype(
float float
) )
def load(self) -> None: def load(self) -> None:
logger.info(f"Saving HUD Housing Data") logger.info("Saving HUD Housing Data")
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True) self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

View file

@ -9,7 +9,8 @@ logger = get_module_logger(__name__)
class HudRecapETL(ExtractTransformLoad): class HudRecapETL(ExtractTransformLoad):
def __init__(self): def __init__(self):
self.HUD_RECAP_CSV_URL = "https://opendata.arcgis.com/api/v3/datasets/56de4edea8264fe5a344da9811ef5d6e_0/downloads/data?format=csv&spatialRefId=4326" # pylint: disable=line-too-long
self.HUD_RECAP_CSV_URL = "https://opendata.arcgis.com/api/v3/datasets/56de4edea8264fe5a344da9811ef5d6e_0/downloads/data?format=csv&spatialRefId=4326" # noqa: E501
self.HUD_RECAP_CSV = ( self.HUD_RECAP_CSV = (
self.TMP_PATH self.TMP_PATH
/ "Racially_or_Ethnically_Concentrated_Areas_of_Poverty__R_ECAPs_.csv" / "Racially_or_Ethnically_Concentrated_Areas_of_Poverty__R_ECAPs_.csv"
@ -22,7 +23,7 @@ class HudRecapETL(ExtractTransformLoad):
self.df: pd.DataFrame self.df: pd.DataFrame
def extract(self) -> None: def extract(self) -> None:
logger.info(f"Downloading HUD Recap Data") logger.info("Downloading HUD Recap Data")
download = requests.get(self.HUD_RECAP_CSV_URL, verify=None) download = requests.get(self.HUD_RECAP_CSV_URL, verify=None)
file_contents = download.content file_contents = download.content
csv_file = open(self.HUD_RECAP_CSV, "wb") csv_file = open(self.HUD_RECAP_CSV, "wb")
@ -30,7 +31,7 @@ class HudRecapETL(ExtractTransformLoad):
csv_file.close() csv_file.close()
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Transforming HUD Recap Data") logger.info("Transforming HUD Recap Data")
# Load comparison index (CalEnviroScreen 4) # Load comparison index (CalEnviroScreen 4)
self.df = pd.read_csv(self.HUD_RECAP_CSV, dtype={"GEOID": "string"}) self.df = pd.read_csv(self.HUD_RECAP_CSV, dtype={"GEOID": "string"})
@ -57,7 +58,7 @@ class HudRecapETL(ExtractTransformLoad):
self.df.sort_values(by=self.GEOID_TRACT_FIELD_NAME, inplace=True) self.df.sort_values(by=self.GEOID_TRACT_FIELD_NAME, inplace=True)
def load(self) -> None: def load(self) -> None:
logger.info(f"Saving HUD Recap CSV") logger.info("Saving HUD Recap CSV")
# write nationwide csv # write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True) self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(self.CSV_PATH / f"usa.csv", index=False) self.df.to_csv(self.CSV_PATH / "usa.csv", index=False)

View file

@ -3,25 +3,72 @@ import geopandas as gpd
from etl.base import ExtractTransformLoad from etl.base import ExtractTransformLoad
from utils import get_module_logger from utils import get_module_logger
import os
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
class TreeEquityScoreETL(ExtractTransformLoad): class TreeEquityScoreETL(ExtractTransformLoad):
def __init__(self): def __init__(self):
self.TES_URL = "https://national-tes-data-share.s3.amazonaws.com/national_tes_share/" self.TES_URL = (
"https://national-tes-data-share.s3.amazonaws.com/national_tes_share/"
)
self.TES_CSV = self.TMP_PATH / "tes_2021_data.csv" self.TES_CSV = self.TMP_PATH / "tes_2021_data.csv"
self.CSV_PATH = self.DATA_PATH / "dataset" / "tree_equity_score" self.CSV_PATH = self.DATA_PATH / "dataset" / "tree_equity_score"
self.df: gpd.GeoDataFrame self.df: gpd.GeoDataFrame
self.states = ["al", "az", "ar", "ca", "co", "ct", "de", "dc", "fl", self.states = [
"ga", "id", "il", "in", "ia", "ks", "ky", "la", "me", "al",
"md", "ma", "mi", "mn", "ms", "mo", "mt", "ne", "nv", "nh", "az",
"nj", "nm", "ny", "nc", "nd", "oh", "ok", "or", "pa", "ar",
"ri", "sc", "sd", "tn", "tx", "ut", "vt", "va", "wa", "wv", "wi", "wy"] "ca",
"co",
"ct",
"de",
"dc",
"fl",
"ga",
"id",
"il",
"in",
"ia",
"ks",
"ky",
"la",
"me",
"md",
"ma",
"mi",
"mn",
"ms",
"mo",
"mt",
"ne",
"nv",
"nh",
"nj",
"nm",
"ny",
"nc",
"nd",
"oh",
"ok",
"or",
"pa",
"ri",
"sc",
"sd",
"tn",
"tx",
"ut",
"vt",
"va",
"wa",
"wv",
"wi",
"wy",
]
def extract(self) -> None: def extract(self) -> None:
logger.info(f"Downloading Tree Equity Score Data") logger.info("Downloading Tree Equity Score Data")
for state in self.states: for state in self.states:
super().extract( super().extract(
f"{self.TES_URL}{state}.zip.zip", f"{self.TES_URL}{state}.zip.zip",
@ -29,14 +76,14 @@ class TreeEquityScoreETL(ExtractTransformLoad):
) )
def transform(self) -> None: def transform(self) -> None:
logger.info(f"Transforming Tree Equity Score Data") logger.info("Transforming Tree Equity Score Data")
tes_state_dfs = [] tes_state_dfs = []
for state in self.states: for state in self.states:
tes_state_dfs.append(gpd.read_file(f"{self.TMP_PATH}/{state}/{state}.shp")) tes_state_dfs.append(gpd.read_file(f"{self.TMP_PATH}/{state}/{state}.shp"))
self.df = gpd.GeoDataFrame(pd.concat(tes_state_dfs), crs=tes_state_dfs[0].crs) self.df = gpd.GeoDataFrame(pd.concat(tes_state_dfs), crs=tes_state_dfs[0].crs)
def load(self) -> None: def load(self) -> None:
logger.info(f"Saving Tree Equity Score GeoJSON") logger.info("Saving Tree Equity Score GeoJSON")
# write nationwide csv # write nationwide csv
self.CSV_PATH.mkdir(parents=True, exist_ok=True) self.CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_file(self.CSV_PATH / "tes_conus.geojson", driver='GeoJSON') self.df.to_file(self.CSV_PATH / "tes_conus.geojson", driver="GeoJSON")

View file

@ -31,6 +31,20 @@ dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest", "sphinx", "wheel", "p
docs = ["sphinx"] docs = ["sphinx"]
tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest"] tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pytest"]
[[package]]
name = "astroid"
version = "2.6.5"
description = "An abstract syntax tree for Python with inference support."
category = "dev"
optional = false
python-versions = "~=3.6"
[package.dependencies]
lazy-object-proxy = ">=1.4.0"
typed-ast = {version = ">=1.4.0,<1.5", markers = "implementation_name == \"cpython\" and python_version < \"3.8\""}
typing-extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""}
wrapt = ">=1.11,<1.13"
[[package]] [[package]]
name = "async-generator" name = "async-generator"
version = "1.10" version = "1.10"
@ -203,6 +217,18 @@ category = "main"
optional = false optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "configparser"
version = "5.0.2"
description = "Updated configparser from Python 3.8 for Python 2.6+."
category = "dev"
optional = false
python-versions = ">=3.6"
[package.extras]
docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pytest-cov", "pytest-enabler", "pytest-black (>=0.3.7)", "pytest-mypy"]
[[package]] [[package]]
name = "debugpy" name = "debugpy"
version = "1.4.0" version = "1.4.0"
@ -235,6 +261,22 @@ category = "dev"
optional = false optional = false
python-versions = "*" python-versions = "*"
[[package]]
name = "dparse"
version = "0.5.1"
description = "A parser for Python dependency files"
category = "dev"
optional = false
python-versions = ">=3.5"
[package.dependencies]
packaging = "*"
pyyaml = "*"
toml = "*"
[package.extras]
pipenv = ["pipenv"]
[[package]] [[package]]
name = "dynaconf" name = "dynaconf"
version = "3.1.4" version = "3.1.4"
@ -291,6 +333,20 @@ calc = ["shapely"]
s3 = ["boto3 (>=1.2.4)"] s3 = ["boto3 (>=1.2.4)"]
test = ["pytest (>=3)", "pytest-cov", "boto3 (>=1.2.4)", "mock"] test = ["pytest (>=3)", "pytest-cov", "boto3 (>=1.2.4)", "mock"]
[[package]]
name = "flake8"
version = "3.9.2"
description = "the modular source code checker: pep8 pyflakes and co"
category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
[package.dependencies]
importlib-metadata = {version = "*", markers = "python_version < \"3.8\""}
mccabe = ">=0.6.0,<0.7.0"
pycodestyle = ">=2.7.0,<2.8.0"
pyflakes = ">=2.3.0,<2.4.0"
[[package]] [[package]]
name = "geopandas" name = "geopandas"
version = "0.9.0" version = "0.9.0"
@ -409,6 +465,20 @@ widgetsnbextension = ">=3.5.0,<3.6.0"
[package.extras] [package.extras]
test = ["pytest (>=3.6.0)", "pytest-cov", "mock"] test = ["pytest (>=3.6.0)", "pytest-cov", "mock"]
[[package]]
name = "isort"
version = "5.9.3"
description = "A Python utility / library to sort Python imports."
category = "dev"
optional = false
python-versions = ">=3.6.1,<4.0"
[package.extras]
pipfile_deprecated_finder = ["pipreqs", "requirementslib"]
requirements_deprecated_finder = ["pipreqs", "pip-api"]
colors = ["colorama (>=0.4.3,<0.5.0)"]
plugins = ["setuptools"]
[[package]] [[package]]
name = "jedi" name = "jedi"
version = "0.18.0" version = "0.18.0"
@ -625,6 +695,27 @@ category = "main"
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
[[package]]
name = "lazy-object-proxy"
version = "1.6.0"
description = "A fast and thorough lazy object proxy."
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
[[package]]
name = "liccheck"
version = "0.6.2"
description = "Check python packages from requirement.txt and report issues"
category = "dev"
optional = false
python-versions = ">=2.7"
[package.dependencies]
configparser = {version = "*", markers = "python_version >= \"3.4\""}
semantic-version = ">=2.7.0"
toml = "*"
[[package]] [[package]]
name = "lxml" name = "lxml"
version = "4.6.3" version = "4.6.3"
@ -658,6 +749,14 @@ python-versions = ">=3.5"
[package.dependencies] [package.dependencies]
traitlets = "*" traitlets = "*"
[[package]]
name = "mccabe"
version = "0.6.1"
description = "McCabe checker, plugin for flake8"
category = "dev"
optional = false
python-versions = "*"
[[package]] [[package]]
name = "mistune" name = "mistune"
version = "0.8.4" version = "0.8.4"
@ -954,6 +1053,14 @@ category = "main"
optional = false optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]]
name = "pycodestyle"
version = "2.7.0"
description = "Python style guide checker"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]] [[package]]
name = "pycparser" name = "pycparser"
version = "2.20" version = "2.20"
@ -962,6 +1069,14 @@ category = "main"
optional = false optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]]
name = "pyflakes"
version = "2.3.1"
description = "passive checker of Python programs"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]] [[package]]
name = "pygments" name = "pygments"
version = "2.9.0" version = "2.9.0"
@ -970,6 +1085,21 @@ category = "main"
optional = false optional = false
python-versions = ">=3.5" python-versions = ">=3.5"
[[package]]
name = "pylint"
version = "2.9.6"
description = "python code static checker"
category = "dev"
optional = false
python-versions = "~=3.6"
[package.dependencies]
astroid = ">=2.6.5,<2.7"
colorama = {version = "*", markers = "sys_platform == \"win32\""}
isort = ">=4.2.5,<6"
mccabe = ">=0.6,<0.7"
toml = ">=0.7.1"
[[package]] [[package]]
name = "pyparsing" name = "pyparsing"
version = "2.4.7" version = "2.4.7"
@ -1108,6 +1238,28 @@ urllib3 = ">=1.21.1,<1.27"
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
[[package]]
name = "safety"
version = "1.10.3"
description = "Checks installed dependencies for known vulnerabilities."
category = "dev"
optional = false
python-versions = ">=3.5"
[package.dependencies]
Click = ">=6.0"
dparse = ">=0.5.1"
packaging = "*"
requests = "*"
[[package]]
name = "semantic-version"
version = "2.8.5"
description = "A library implementing the 'SemVer' scheme."
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[[package]] [[package]]
name = "send2trash" name = "send2trash"
version = "1.7.1" version = "1.7.1"
@ -1312,6 +1464,14 @@ python-versions = "*"
[package.dependencies] [package.dependencies]
notebook = ">=4.4.1" notebook = ">=4.4.1"
[[package]]
name = "wrapt"
version = "1.12.1"
description = "Module for decorators, wrappers and monkey patching."
category = "dev"
optional = false
python-versions = "*"
[[package]] [[package]]
name = "zipp" name = "zipp"
version = "3.5.0" version = "3.5.0"
@ -1327,7 +1487,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.7.1" python-versions = "^3.7.1"
content-hash = "e6692af9b40f2508a858739de08cb9c1a2e86b54a219b8196ca736981a61ce4d" content-hash = "705b0cf25d9ecd3028ba5b71581b5139608cb3b0b4d13c4817b4f3a49643308c"
[metadata.files] [metadata.files]
appdirs = [ appdirs = [
@ -1362,6 +1522,10 @@ argon2-cffi = [
{file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3aa804c0e52f208973845e8b10c70d8957c9e5a666f702793256242e9167c4e0"}, {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:3aa804c0e52f208973845e8b10c70d8957c9e5a666f702793256242e9167c4e0"},
{file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:36320372133a003374ef4275fbfce78b7ab581440dfca9f9471be3dd9a522428"}, {file = "argon2_cffi-20.1.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:36320372133a003374ef4275fbfce78b7ab581440dfca9f9471be3dd9a522428"},
] ]
astroid = [
{file = "astroid-2.6.5-py3-none-any.whl", hash = "sha256:7b963d1c590d490f60d2973e57437115978d3a2529843f160b5003b721e1e925"},
{file = "astroid-2.6.5.tar.gz", hash = "sha256:83e494b02d75d07d4e347b27c066fd791c0c74fc96c613d1ea3de0c82c48168f"},
]
async-generator = [ async-generator = [
{file = "async_generator-1.10-py3-none-any.whl", hash = "sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b"}, {file = "async_generator-1.10-py3-none-any.whl", hash = "sha256:01c7bf666359b4967d2cda0000cc2e4af16a0ae098cbffcb8472fb9e8ad6585b"},
{file = "async_generator-1.10.tar.gz", hash = "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"}, {file = "async_generator-1.10.tar.gz", hash = "sha256:6ebb3d106c12920aaae42ccb6f787ef5eefdcdd166ea3d628fa8476abe712144"},
@ -1460,6 +1624,10 @@ colorama = [
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
] ]
configparser = [
{file = "configparser-5.0.2-py3-none-any.whl", hash = "sha256:af59f2cdd7efbdd5d111c1976ecd0b82db9066653362f0962d7bf1d3ab89a1fa"},
{file = "configparser-5.0.2.tar.gz", hash = "sha256:85d5de102cfe6d14a5172676f09d19c465ce63d6019cf0a4ef13385fc535e828"},
]
debugpy = [ debugpy = [
{file = "debugpy-1.4.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:55d12ee03b3b705af5250b8344a87fbd9bb720d00bd9d281d2998dbf9f60c8d3"}, {file = "debugpy-1.4.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:55d12ee03b3b705af5250b8344a87fbd9bb720d00bd9d281d2998dbf9f60c8d3"},
{file = "debugpy-1.4.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:712ef6a4be1ee4b9a954c6f36788ac12686dc1d5eeef501e0b81e1c89c16484d"}, {file = "debugpy-1.4.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:712ef6a4be1ee4b9a954c6f36788ac12686dc1d5eeef501e0b81e1c89c16484d"},
@ -1530,6 +1698,10 @@ distlib = [
{file = "distlib-0.3.2-py2.py3-none-any.whl", hash = "sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c"}, {file = "distlib-0.3.2-py2.py3-none-any.whl", hash = "sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c"},
{file = "distlib-0.3.2.zip", hash = "sha256:106fef6dc37dd8c0e2c0a60d3fca3e77460a48907f335fa28420463a6f799736"}, {file = "distlib-0.3.2.zip", hash = "sha256:106fef6dc37dd8c0e2c0a60d3fca3e77460a48907f335fa28420463a6f799736"},
] ]
dparse = [
{file = "dparse-0.5.1-py3-none-any.whl", hash = "sha256:e953a25e44ebb60a5c6efc2add4420c177f1d8404509da88da9729202f306994"},
{file = "dparse-0.5.1.tar.gz", hash = "sha256:a1b5f169102e1c894f9a7d5ccf6f9402a836a5d24be80a986c7ce9eaed78f367"},
]
dynaconf = [ dynaconf = [
{file = "dynaconf-3.1.4-py2.py3-none-any.whl", hash = "sha256:e6f383b84150b70fc439c8b2757581a38a58d07962aa14517292dcce1a77e160"}, {file = "dynaconf-3.1.4-py2.py3-none-any.whl", hash = "sha256:e6f383b84150b70fc439c8b2757581a38a58d07962aa14517292dcce1a77e160"},
{file = "dynaconf-3.1.4.tar.gz", hash = "sha256:b2f472d83052f809c5925565b8a2ba76a103d5dc1dbb9748b693ed67212781b9"}, {file = "dynaconf-3.1.4.tar.gz", hash = "sha256:b2f472d83052f809c5925565b8a2ba76a103d5dc1dbb9748b693ed67212781b9"},
@ -1553,6 +1725,10 @@ fiona = [
{file = "Fiona-1.8.20-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e72e4a5b84ec410be531d4fe4c1a5c87c6c0e92d01116c145c0f1b33f81c8080"}, {file = "Fiona-1.8.20-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e72e4a5b84ec410be531d4fe4c1a5c87c6c0e92d01116c145c0f1b33f81c8080"},
{file = "Fiona-1.8.20.tar.gz", hash = "sha256:a70502d2857b82f749c09cb0dea3726787747933a2a1599b5ab787d74e3c143b"}, {file = "Fiona-1.8.20.tar.gz", hash = "sha256:a70502d2857b82f749c09cb0dea3726787747933a2a1599b5ab787d74e3c143b"},
] ]
flake8 = [
{file = "flake8-3.9.2-py2.py3-none-any.whl", hash = "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"},
{file = "flake8-3.9.2.tar.gz", hash = "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b"},
]
geopandas = [ geopandas = [
{file = "geopandas-0.9.0-py2.py3-none-any.whl", hash = "sha256:79f6e557ba0dba76eec44f8351b1c6b42a17c38f5f08fef347e98fe4dae563c7"}, {file = "geopandas-0.9.0-py2.py3-none-any.whl", hash = "sha256:79f6e557ba0dba76eec44f8351b1c6b42a17c38f5f08fef347e98fe4dae563c7"},
{file = "geopandas-0.9.0.tar.gz", hash = "sha256:63972ab4dc44c4029f340600dcb83264eb8132dd22b104da0b654bef7f42630a"}, {file = "geopandas-0.9.0.tar.gz", hash = "sha256:63972ab4dc44c4029f340600dcb83264eb8132dd22b104da0b654bef7f42630a"},
@ -1581,6 +1757,10 @@ ipywidgets = [
{file = "ipywidgets-7.6.3-py2.py3-none-any.whl", hash = "sha256:e6513cfdaf5878de30f32d57f6dc2474da395a2a2991b94d487406c0ab7f55ca"}, {file = "ipywidgets-7.6.3-py2.py3-none-any.whl", hash = "sha256:e6513cfdaf5878de30f32d57f6dc2474da395a2a2991b94d487406c0ab7f55ca"},
{file = "ipywidgets-7.6.3.tar.gz", hash = "sha256:9f1a43e620530f9e570e4a493677d25f08310118d315b00e25a18f12913c41f0"}, {file = "ipywidgets-7.6.3.tar.gz", hash = "sha256:9f1a43e620530f9e570e4a493677d25f08310118d315b00e25a18f12913c41f0"},
] ]
isort = [
{file = "isort-5.9.3-py3-none-any.whl", hash = "sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2"},
{file = "isort-5.9.3.tar.gz", hash = "sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899"},
]
jedi = [ jedi = [
{file = "jedi-0.18.0-py2.py3-none-any.whl", hash = "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93"}, {file = "jedi-0.18.0-py2.py3-none-any.whl", hash = "sha256:18456d83f65f400ab0c2d3319e48520420ef43b23a086fdc05dff34132f0fb93"},
{file = "jedi-0.18.0.tar.gz", hash = "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707"}, {file = "jedi-0.18.0.tar.gz", hash = "sha256:92550a404bad8afed881a137ec9a461fed49eca661414be45059329614ed0707"},
@ -1636,6 +1816,34 @@ jupyterlab-widgets = [
{file = "jupyterlab_widgets-1.0.0-py3-none-any.whl", hash = "sha256:caeaf3e6103180e654e7d8d2b81b7d645e59e432487c1d35a41d6d3ee56b3fef"}, {file = "jupyterlab_widgets-1.0.0-py3-none-any.whl", hash = "sha256:caeaf3e6103180e654e7d8d2b81b7d645e59e432487c1d35a41d6d3ee56b3fef"},
{file = "jupyterlab_widgets-1.0.0.tar.gz", hash = "sha256:5c1a29a84d3069208cb506b10609175b249b6486d6b1cbae8fcde2a11584fb78"}, {file = "jupyterlab_widgets-1.0.0.tar.gz", hash = "sha256:5c1a29a84d3069208cb506b10609175b249b6486d6b1cbae8fcde2a11584fb78"},
] ]
lazy-object-proxy = [
{file = "lazy-object-proxy-1.6.0.tar.gz", hash = "sha256:489000d368377571c6f982fba6497f2aa13c6d1facc40660963da62f5c379726"},
{file = "lazy_object_proxy-1.6.0-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:c6938967f8528b3668622a9ed3b31d145fab161a32f5891ea7b84f6b790be05b"},
{file = "lazy_object_proxy-1.6.0-cp27-cp27m-win32.whl", hash = "sha256:ebfd274dcd5133e0afae738e6d9da4323c3eb021b3e13052d8cbd0e457b1256e"},
{file = "lazy_object_proxy-1.6.0-cp27-cp27m-win_amd64.whl", hash = "sha256:ed361bb83436f117f9917d282a456f9e5009ea12fd6de8742d1a4752c3017e93"},
{file = "lazy_object_proxy-1.6.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:d900d949b707778696fdf01036f58c9876a0d8bfe116e8d220cfd4b15f14e741"},
{file = "lazy_object_proxy-1.6.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:5743a5ab42ae40caa8421b320ebf3a998f89c85cdc8376d6b2e00bd12bd1b587"},
{file = "lazy_object_proxy-1.6.0-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:bf34e368e8dd976423396555078def5cfc3039ebc6fc06d1ae2c5a65eebbcde4"},
{file = "lazy_object_proxy-1.6.0-cp36-cp36m-win32.whl", hash = "sha256:b579f8acbf2bdd9ea200b1d5dea36abd93cabf56cf626ab9c744a432e15c815f"},
{file = "lazy_object_proxy-1.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:4f60460e9f1eb632584c9685bccea152f4ac2130e299784dbaf9fae9f49891b3"},
{file = "lazy_object_proxy-1.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:d7124f52f3bd259f510651450e18e0fd081ed82f3c08541dffc7b94b883aa981"},
{file = "lazy_object_proxy-1.6.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:22ddd618cefe54305df49e4c069fa65715be4ad0e78e8d252a33debf00f6ede2"},
{file = "lazy_object_proxy-1.6.0-cp37-cp37m-win32.whl", hash = "sha256:9d397bf41caad3f489e10774667310d73cb9c4258e9aed94b9ec734b34b495fd"},
{file = "lazy_object_proxy-1.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:24a5045889cc2729033b3e604d496c2b6f588c754f7a62027ad4437a7ecc4837"},
{file = "lazy_object_proxy-1.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:17e0967ba374fc24141738c69736da90e94419338fd4c7c7bef01ee26b339653"},
{file = "lazy_object_proxy-1.6.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:410283732af311b51b837894fa2f24f2c0039aa7f220135192b38fcc42bd43d3"},
{file = "lazy_object_proxy-1.6.0-cp38-cp38-win32.whl", hash = "sha256:85fb7608121fd5621cc4377a8961d0b32ccf84a7285b4f1d21988b2eae2868e8"},
{file = "lazy_object_proxy-1.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:d1c2676e3d840852a2de7c7d5d76407c772927addff8d742b9808fe0afccebdf"},
{file = "lazy_object_proxy-1.6.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:b865b01a2e7f96db0c5d12cfea590f98d8c5ba64ad222300d93ce6ff9138bcad"},
{file = "lazy_object_proxy-1.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:4732c765372bd78a2d6b2150a6e99d00a78ec963375f236979c0626b97ed8e43"},
{file = "lazy_object_proxy-1.6.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:9698110e36e2df951c7c36b6729e96429c9c32b3331989ef19976592c5f3c77a"},
{file = "lazy_object_proxy-1.6.0-cp39-cp39-win32.whl", hash = "sha256:1fee665d2638491f4d6e55bd483e15ef21f6c8c2095f235fef72601021e64f61"},
{file = "lazy_object_proxy-1.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:f5144c75445ae3ca2057faac03fda5a902eff196702b0a24daf1d6ce0650514b"},
]
liccheck = [
{file = "liccheck-0.6.2-py2.py3-none-any.whl", hash = "sha256:e6583fc327126695a31a7ed8941e784ecd5c84bb2aecbe2782d925cac5c3fe47"},
{file = "liccheck-0.6.2.tar.gz", hash = "sha256:5667be7c9ef6496bd381e709e938e9fe51c31d601afc44965615cdfbce375eab"},
]
lxml = [ lxml = [
{file = "lxml-4.6.3-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2"}, {file = "lxml-4.6.3-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:df7c53783a46febb0e70f6b05df2ba104610f2fb0d27023409734a3ecbb78fb2"},
{file = "lxml-4.6.3-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f"}, {file = "lxml-4.6.3-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:1b7584d421d254ab86d4f0b13ec662a9014397678a7c4265a02a6d7c2b18a75f"},
@ -1724,6 +1932,10 @@ matplotlib-inline = [
{file = "matplotlib-inline-0.1.2.tar.gz", hash = "sha256:f41d5ff73c9f5385775d5c0bc13b424535c8402fe70ea8210f93e11f3683993e"}, {file = "matplotlib-inline-0.1.2.tar.gz", hash = "sha256:f41d5ff73c9f5385775d5c0bc13b424535c8402fe70ea8210f93e11f3683993e"},
{file = "matplotlib_inline-0.1.2-py3-none-any.whl", hash = "sha256:5cf1176f554abb4fa98cb362aa2b55c500147e4bdbb07e3fda359143e1da0811"}, {file = "matplotlib_inline-0.1.2-py3-none-any.whl", hash = "sha256:5cf1176f554abb4fa98cb362aa2b55c500147e4bdbb07e3fda359143e1da0811"},
] ]
mccabe = [
{file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"},
{file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"},
]
mistune = [ mistune = [
{file = "mistune-0.8.4-py2.py3-none-any.whl", hash = "sha256:88a1051873018da288eee8538d476dffe1262495144b33ecb586c4ab266bb8d4"}, {file = "mistune-0.8.4-py2.py3-none-any.whl", hash = "sha256:88a1051873018da288eee8538d476dffe1262495144b33ecb586c4ab266bb8d4"},
{file = "mistune-0.8.4.tar.gz", hash = "sha256:59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e"}, {file = "mistune-0.8.4.tar.gz", hash = "sha256:59a3429db53c50b5c6bcc8a07f8848cb00d7dc8bdb431a4ab41920d201d4756e"},
@ -1879,14 +2091,26 @@ py = [
{file = "py-1.10.0-py2.py3-none-any.whl", hash = "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"}, {file = "py-1.10.0-py2.py3-none-any.whl", hash = "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"},
{file = "py-1.10.0.tar.gz", hash = "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3"}, {file = "py-1.10.0.tar.gz", hash = "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3"},
] ]
pycodestyle = [
{file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"},
{file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"},
]
pycparser = [ pycparser = [
{file = "pycparser-2.20-py2.py3-none-any.whl", hash = "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"}, {file = "pycparser-2.20-py2.py3-none-any.whl", hash = "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"},
{file = "pycparser-2.20.tar.gz", hash = "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0"}, {file = "pycparser-2.20.tar.gz", hash = "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0"},
] ]
pyflakes = [
{file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"},
{file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"},
]
pygments = [ pygments = [
{file = "Pygments-2.9.0-py3-none-any.whl", hash = "sha256:d66e804411278594d764fc69ec36ec13d9ae9147193a1740cd34d272ca383b8e"}, {file = "Pygments-2.9.0-py3-none-any.whl", hash = "sha256:d66e804411278594d764fc69ec36ec13d9ae9147193a1740cd34d272ca383b8e"},
{file = "Pygments-2.9.0.tar.gz", hash = "sha256:a18f47b506a429f6f4b9df81bb02beab9ca21d0a5fee38ed15aef65f0545519f"}, {file = "Pygments-2.9.0.tar.gz", hash = "sha256:a18f47b506a429f6f4b9df81bb02beab9ca21d0a5fee38ed15aef65f0545519f"},
] ]
pylint = [
{file = "pylint-2.9.6-py3-none-any.whl", hash = "sha256:2e1a0eb2e8ab41d6b5dbada87f066492bb1557b12b76c47c2ee8aa8a11186594"},
{file = "pylint-2.9.6.tar.gz", hash = "sha256:8b838c8983ee1904b2de66cce9d0b96649a91901350e956d78f289c3bc87b48e"},
]
pyparsing = [ pyparsing = [
{file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"},
{file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"}, {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"},
@ -2082,6 +2306,14 @@ requests = [
{file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"}, {file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"},
{file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"}, {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},
] ]
safety = [
{file = "safety-1.10.3-py2.py3-none-any.whl", hash = "sha256:5f802ad5df5614f9622d8d71fedec2757099705c2356f862847c58c6dfe13e84"},
{file = "safety-1.10.3.tar.gz", hash = "sha256:30e394d02a20ac49b7f65292d19d38fa927a8f9582cdfd3ad1adbbc66c641ad5"},
]
semantic-version = [
{file = "semantic_version-2.8.5-py2.py3-none-any.whl", hash = "sha256:45e4b32ee9d6d70ba5f440ec8cc5221074c7f4b0e8918bdab748cc37912440a9"},
{file = "semantic_version-2.8.5.tar.gz", hash = "sha256:d2cb2de0558762934679b9a104e82eca7af448c9f4974d1f3eeccff651df8a54"},
]
send2trash = [ send2trash = [
{file = "Send2Trash-1.7.1-py3-none-any.whl", hash = "sha256:c20fee8c09378231b3907df9c215ec9766a84ee20053d99fbad854fe8bd42159"}, {file = "Send2Trash-1.7.1-py3-none-any.whl", hash = "sha256:c20fee8c09378231b3907df9c215ec9766a84ee20053d99fbad854fe8bd42159"},
{file = "Send2Trash-1.7.1.tar.gz", hash = "sha256:17730aa0a33ab82ed6ca76be3bb25f0433d0014f1ccf63c979bab13a5b9db2b2"}, {file = "Send2Trash-1.7.1.tar.gz", hash = "sha256:17730aa0a33ab82ed6ca76be3bb25f0433d0014f1ccf63c979bab13a5b9db2b2"},
@ -2243,6 +2475,9 @@ widgetsnbextension = [
{file = "widgetsnbextension-3.5.1-py2.py3-none-any.whl", hash = "sha256:bd314f8ceb488571a5ffea6cc5b9fc6cba0adaf88a9d2386b93a489751938bcd"}, {file = "widgetsnbextension-3.5.1-py2.py3-none-any.whl", hash = "sha256:bd314f8ceb488571a5ffea6cc5b9fc6cba0adaf88a9d2386b93a489751938bcd"},
{file = "widgetsnbextension-3.5.1.tar.gz", hash = "sha256:079f87d87270bce047512400efd70238820751a11d2d8cb137a5a5bdbaf255c7"}, {file = "widgetsnbextension-3.5.1.tar.gz", hash = "sha256:079f87d87270bce047512400efd70238820751a11d2d8cb137a5a5bdbaf255c7"},
] ]
wrapt = [
{file = "wrapt-1.12.1.tar.gz", hash = "sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"},
]
zipp = [ zipp = [
{file = "zipp-3.5.0-py3-none-any.whl", hash = "sha256:957cfda87797e389580cb8b9e3870841ca991e2125350677b2ca83a0e99390a3"}, {file = "zipp-3.5.0-py3-none-any.whl", hash = "sha256:957cfda87797e389580cb8b9e3870841ca991e2125350677b2ca83a0e99390a3"},
{file = "zipp-3.5.0.tar.gz", hash = "sha256:f5812b1e007e48cff63449a5e9f4e7ebea716b4111f9c4f9a645f91d579bf0c4"}, {file = "zipp-3.5.0.tar.gz", hash = "sha256:f5812b1e007e48cff63449a5e9f4e7ebea716b4111f9c4f9a645f91d579bf0c4"},

View file

@ -25,7 +25,79 @@ types-requests = "^2.25.0"
black = {version = "^21.6b0", allow-prereleases = true} black = {version = "^21.6b0", allow-prereleases = true}
mypy = "^0.910" mypy = "^0.910"
tox = "^3.24.0" tox = "^3.24.0"
flake8 = "^3.9.2"
pylint = "^2.9.6"
liccheck = "^0.6.2"
safety = "^1.10.3"
[build-system] [build-system]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
requires = ["poetry-core>=1.0.0"] requires = ["poetry-core>=1.0.0"]
[tool.pylint]
[tool.pylint."MESSAGE CONTROL"]
disable = [
"C0114", # Disables module docstrings
"R0201", # Disables method could have been a function
"R0903", # Disables too few public methods
"C0103", # Disables name case styling
"W0511", # Disables FIXME warning
"W1203", # Disables f-string interpolation for logging warning
# Errors temporarily ignored for further discussion
"W0107", # Disables unnecessary pass
"W0221", # Disables arguments differ
"R0902", # Disables too many instance attributes
"R0914", # Disables too many local variables
"W0621", # Disables redefined outer name
"C0302", # Disables too many lines in module
"R1732", # Disables consider using "with"
"R1720", # Disables unnecessary "else" after "raise"
"C0206", # Disables consider iteratig with ".items()"
"C0200", # Disables consider using "enumerate" instead of "range" + "len"
"W0612", # Disables unused variable
"W0613", # Disables unused argument
"C0116", # Disables missing function or method docstring
"C0115", # Disables missing class docstring
]
[tool.pylint.FORMAT]
max-line-length=150
[tool.pylint.SIMILARITIES]
# Configures how pylint detects repetitive code
min-similarity-lines = 4
ignore-comments = "yes"
ignore-docstrings = "yes"
ignore-imports = "yes"
[tool.liccheck]
# Authorized and unauthorized licenses in LOWER CASE
authorized_licenses = [
"bsd",
"new bsd",
"bsd license",
"bsd 3-clause",
"new bsd license",
"simplified bsd",
"apache",
"apache 2.0",
"apache license 2.0",
"apache software license",
"apache software",
"gnu lgpl",
"gnu lesser general public license v2 (lgplv2)",
"gnu general public license v2 (gplv2)",
"gnu library or lesser general public license (lgpl)",
"lgpl with exceptions or zpl",
"isc license",
"isc license (iscl)",
"mit",
"mit license",
"mozilla public license 2.0 (mpl 2.0)",
"public domain",
"python software foundation license",
"python software foundation",
"zpl 2.1",
"gpl v3"
]

View file

@ -1,30 +1,40 @@
appnope==0.1.2; sys_platform == "darwin" and python_version >= "3.7" appdirs==1.4.4; python_full_version >= "3.6.2"
appnope==0.1.2; sys_platform == "darwin" and python_version >= "3.7" and platform_system == "Darwin"
argon2-cffi==20.1.0; python_version >= "3.6" argon2-cffi==20.1.0; python_version >= "3.6"
astroid==2.6.5; python_version >= "3.6" and python_version < "4.0"
async-generator==1.10; python_full_version >= "3.6.1" and python_version >= "3.7" async-generator==1.10; python_full_version >= "3.6.1" and python_version >= "3.7"
attrs==21.2.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6" attrs==21.2.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
backcall==0.2.0; python_version >= "3.7" backcall==0.2.0; python_version >= "3.7"
bleach==3.3.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" backports.entry-points-selectable==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7"
black==21.7b0; python_full_version >= "3.6.2"
bleach==3.3.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
censusdata==1.13; python_version >= "2.7" censusdata==1.13; python_version >= "2.7"
certifi==2021.5.30; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" certifi==2021.5.30; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.7"
cffi==1.14.6; implementation_name == "pypy" and python_version >= "3.6" cffi==1.14.6; implementation_name == "pypy" and python_version >= "3.6"
chardet==4.0.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7" charset-normalizer==2.0.3; python_full_version >= "3.6.0" and python_version >= "3"
click-plugins==1.1.1; python_version >= "3.6" click-plugins==1.1.1; python_version >= "3.6"
click==8.0.1; python_version >= "3.6" click==8.0.1; python_version >= "3.6"
cligj==0.7.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version < "4" and python_version >= "3.6" cligj==0.7.2; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version < "4" and python_version >= "3.6"
colorama==0.4.4; python_version >= "3.7" and python_full_version < "3.0.0" and platform_system == "Windows" and sys_platform == "win32" or platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.5.0" and sys_platform == "win32" colorama==0.4.4; platform_system == "Windows" and python_version >= "3.7" and python_full_version >= "3.6.2" and sys_platform == "win32" and python_version < "4.0" and (python_version >= "3.7" and python_full_version < "3.0.0" and sys_platform == "win32" or sys_platform == "win32" and python_version >= "3.7" and python_full_version >= "3.5.0")
debugpy==1.3.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" configparser==5.0.2; python_version >= "3.6"
debugpy==1.4.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
decorator==5.0.9; python_version >= "3.7" decorator==5.0.9; python_version >= "3.7"
defusedxml==0.7.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" defusedxml==0.7.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
distlib==0.3.2; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
dparse==0.5.1; python_version >= "3.5"
dynaconf==3.1.4 dynaconf==3.1.4
entrypoints==0.3; python_version >= "3.7" entrypoints==0.3; python_version >= "3.7"
filelock==3.0.12; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
fiona==1.8.20; python_version >= "3.6" fiona==1.8.20; python_version >= "3.6"
flake8==3.9.2; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
geopandas==0.9.0; python_version >= "3.6" geopandas==0.9.0; python_version >= "3.6"
idna==2.10; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "2.7" idna==3.2; python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.5"
importlib-metadata==3.10.1; python_version < "3.8" and python_version >= "3.7" importlib-metadata==3.10.1; python_version < "3.8" and python_version >= "3.7" and (python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "3.8" or python_full_version >= "3.5.0" and python_version < "3.8" and python_version >= "3.6") and python_full_version >= "3.6.2"
ipykernel==6.0.1; python_version >= "3.7" ipykernel==6.0.3; python_version >= "3.7"
ipython-genutils==0.2.0; python_version >= "3.7" ipython-genutils==0.2.0; python_version >= "3.7"
ipython==7.25.0; python_version >= "3.7" ipython==7.25.0; python_version >= "3.7"
ipywidgets==7.6.3 ipywidgets==7.6.3
isort==5.9.3; python_full_version >= "3.6.1" and python_version < "4.0" and python_version >= "3.6"
jedi==0.18.0; python_version >= "3.7" jedi==0.18.0; python_version >= "3.7"
jinja2==3.0.1; python_version >= "3.7" jinja2==3.0.1; python_version >= "3.7"
jsonschema==3.2.0; python_version >= "3.5" jsonschema==3.2.0; python_version >= "3.5"
@ -39,52 +49,72 @@ jupyter-nbextensions-configurator==0.4.1
jupyter==1.0.0 jupyter==1.0.0
jupyterlab-pygments==0.1.2; python_version >= "3.7" jupyterlab-pygments==0.1.2; python_version >= "3.7"
jupyterlab-widgets==1.0.0; python_version >= "3.6" jupyterlab-widgets==1.0.0; python_version >= "3.6"
lazy-object-proxy==1.6.0; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.6" and python_version < "4.0" and python_full_version >= "3.6.0"
liccheck==0.6.2; python_version >= "2.7"
lxml==4.6.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" lxml==4.6.3; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
markupsafe==2.0.1; python_version >= "3.7" markupsafe==2.0.1; python_version >= "3.7"
matplotlib-inline==0.1.2; platform_system == "Darwin" and python_version >= "3.7" matplotlib-inline==0.1.2; python_version >= "3.7"
mccabe==0.6.1; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_version >= "3.6" and python_version < "4.0" and python_full_version >= "3.5.0"
mistune==0.8.4; python_version >= "3.7" mistune==0.8.4; python_version >= "3.7"
munch==2.5.0; python_version >= "3.6" munch==2.5.0; python_version >= "3.6"
mypy-extensions==0.4.3; python_full_version >= "3.6.2" and python_version >= "3.5"
mypy==0.910; python_version >= "3.5"
nbclient==0.5.3; python_full_version >= "3.6.1" and python_version >= "3.7" nbclient==0.5.3; python_full_version >= "3.6.1" and python_version >= "3.7"
nbconvert==6.1.0; python_version >= "3.7" nbconvert==6.1.0; python_version >= "3.7"
nbformat==5.1.3; python_full_version >= "3.6.1" and python_version >= "3.7" nbformat==5.1.3; python_full_version >= "3.6.1" and python_version >= "3.7"
nest-asyncio==1.5.1; python_full_version >= "3.6.1" and python_version >= "3.7" nest-asyncio==1.5.1; python_full_version >= "3.6.1" and python_version >= "3.7"
notebook==6.4.0; python_version >= "3.6" notebook==6.4.0; python_version >= "3.6"
numpy==1.21.0; python_version >= "3.7" numpy==1.21.1; python_version >= "3.7"
packaging==21.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" packaging==21.0; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
pandas==1.3.0; python_full_version >= "3.7.1" pandas==1.3.0; python_full_version >= "3.7.1"
pandocfilters==1.4.3; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.7" pandocfilters==1.4.3; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.7"
parso==0.8.2; python_version >= "3.7" parso==0.8.2; python_version >= "3.7"
pathspec==0.9.0; python_full_version >= "3.6.2"
pexpect==4.8.0; sys_platform != "win32" and python_version >= "3.7" pexpect==4.8.0; sys_platform != "win32" and python_version >= "3.7"
pickleshare==0.7.5; python_version >= "3.7" pickleshare==0.7.5; python_version >= "3.7"
platformdirs==2.0.2; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
pluggy==0.13.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
prometheus-client==0.11.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" prometheus-client==0.11.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
prompt-toolkit==3.0.19; python_full_version >= "3.6.1" and python_version >= "3.7" prompt-toolkit==3.0.19; python_full_version >= "3.6.1" and python_version >= "3.7"
ptyprocess==0.7.0; sys_platform != "win32" and python_version >= "3.7" and os_name != "nt" ptyprocess==0.7.0; sys_platform != "win32" and python_version >= "3.7" and os_name != "nt"
py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" and implementation_name == "pypy" or implementation_name == "pypy" and python_version >= "3.6" and python_full_version >= "3.4.0" py==1.10.0; python_version >= "3.6" and python_full_version < "3.0.0" and implementation_name == "pypy" or implementation_name == "pypy" and python_version >= "3.6" and python_full_version >= "3.5.0"
pycodestyle==2.7.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6" pycparser==2.20; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
pyflakes==2.3.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
pygments==2.9.0; python_version >= "3.7" pygments==2.9.0; python_version >= "3.7"
pyparsing==2.4.7; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" pylint==2.9.6; python_version >= "3.6" and python_version < "4.0"
pyparsing==2.4.7; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
pyproj==3.1.0; python_version >= "3.7" pyproj==3.1.0; python_version >= "3.7"
pyrsistent==0.18.0; python_version >= "3.6" pyrsistent==0.18.0; python_version >= "3.6"
python-dateutil==2.8.1; python_full_version >= "3.7.1" and python_version >= "3.7" python-dateutil==2.8.2; python_full_version >= "3.7.1" and python_version >= "3.7"
pytz==2021.1; python_full_version >= "3.7.1" and python_version >= "2.7" pytz==2021.1; python_full_version >= "3.7.1" and python_version >= "2.7"
pywin32==301; sys_platform == "win32" and python_version >= "3.6" pywin32==301; sys_platform == "win32" and python_version >= "3.6"
pywinpty==1.1.3; os_name == "nt" and python_version >= "3.6" pywinpty==1.1.3; os_name == "nt" and python_version >= "3.6"
pyyaml==5.4.1; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" pyyaml==5.4.1; python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version >= "3.5"
pyzmq==22.1.0; python_full_version >= "3.6.1" and python_version >= "3.7" pyzmq==22.1.0; python_full_version >= "3.6.1" and python_version >= "3.7"
qtconsole==5.1.1; python_version >= "3.6" qtconsole==5.1.1; python_version >= "3.6"
qtpy==1.9.0; python_version >= "3.6" qtpy==1.9.0; python_version >= "3.6"
requests==2.25.1; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0") regex==2021.7.6; python_full_version >= "3.6.2"
requests==2.26.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.6.0")
safety==1.10.3; python_version >= "3.5"
semantic-version==2.8.5; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "2.7"
send2trash==1.7.1; python_version >= "3.6" send2trash==1.7.1; python_version >= "3.6"
shapely==1.7.1; python_version >= "3.6" shapely==1.7.1; python_version >= "3.6"
six==1.16.0; python_full_version >= "3.7.1" and python_version >= "2.7" and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.6") and (python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7") and (python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.5") six==1.16.0; python_full_version >= "3.7.1" and python_version >= "3.6" and (python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0") and (python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.6") and (python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7") and (python_version >= "3.5" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.5")
terminado==0.10.1; python_version >= "3.6" terminado==0.10.1; python_version >= "3.6"
testpath==0.5.0; python_version >= "3.7" testpath==0.5.0; python_version >= "3.7"
toml==0.10.2; python_version >= "3.6" and python_full_version < "3.0.0" and python_version < "4.0" or python_full_version >= "3.5.0" and python_version >= "3.6" and python_version < "4.0"
tomli==1.1.0; python_version >= "3.6" and python_full_version >= "3.6.2"
tornado==6.1; python_full_version >= "3.6.1" and python_version >= "3.7" tornado==6.1; python_full_version >= "3.6.1" and python_version >= "3.7"
tox==3.24.0; (python_version >= "2.7" and python_full_version < "3.0.0") or (python_full_version >= "3.5.0")
traitlets==5.0.5; python_full_version >= "3.6.1" and python_version >= "3.7" traitlets==5.0.5; python_full_version >= "3.6.1" and python_version >= "3.7"
typed-ast==1.4.3; python_version < "3.8" and python_full_version >= "3.6.2" and python_version >= "3.6" and implementation_name == "cpython"
types-requests==2.25.0 types-requests==2.25.0
typing-extensions==3.10.0.0; python_version < "3.8" and python_version >= "3.6" typing-extensions==3.10.0.0; python_version < "3.8" and python_full_version >= "3.6.2" and python_version >= "3.6"
urllib3==1.26.6; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version < "4" and python_version >= "2.7" urllib3==1.26.6; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.6.0" and python_version < "4" and python_version >= "2.7"
virtualenv==20.6.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0"
wcwidth==0.2.5; python_full_version >= "3.6.1" and python_version >= "3.7" wcwidth==0.2.5; python_full_version >= "3.6.1" and python_version >= "3.7"
webencodings==0.5.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7" webencodings==0.5.1; python_version >= "3.7" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.7"
widgetsnbextension==3.5.1 widgetsnbextension==3.5.1
wrapt==1.12.1; python_version >= "3.6" and python_version < "4.0"
zipp==3.5.0; python_version < "3.8" and python_version >= "3.6" zipp==3.5.0; python_version < "3.8" and python_version >= "3.6"

View file

@ -1,6 +1,20 @@
[tox] [tox]
# required because we use pyproject.toml # required because we use pyproject.toml
isolated_build = true isolated_build = true
envlist = py37, py38, py39 envlist = py37, py38, py39, lint, checkdeps
# only checks python versions installed locally # only checks python versions installed locally
skip_missing_interpreters = true skip_missing_interpreters = true
[testenv:lint]
# lints python code in src and tests
basepython = python3.9
deps = -rrequirements.txt
commands = black etl application.py config.py utils.py
flake8 etl application.py config.py utils.py
# pylint etl application.py config.py utils.py
[testenv:checkdeps]
# checks the dependencies for security vulnerabilities and open source licenses
deps = -rrequirements.txt
commands = safety check
liccheck

View file

@ -2,10 +2,11 @@ from pathlib import Path
import os import os
import logging import logging
import shutil import shutil
import requests
import zipfile import zipfile
import urllib3 import urllib3
import requests
from config import settings from config import settings
@ -133,12 +134,13 @@ def unzip_file_from_url(
# cleanup temporary file # cleanup temporary file
os.remove(zip_file_path) os.remove(zip_file_path)
def data_folder_cleanup() -> None: def data_folder_cleanup() -> None:
"""Remove all files and directories from the local data/dataset path""" """Remove all files and directories from the local data/dataset path"""
data_path = settings.APP_ROOT / "data" data_path = settings.APP_ROOT / "data"
logger.info(f"Initializing all dataset directoriees") logger.info("Initializing all dataset directoriees")
remove_all_from_dir(data_path / "dataset") remove_all_from_dir(data_path / "dataset")
@ -147,7 +149,7 @@ def score_folder_cleanup() -> None:
data_path = settings.APP_ROOT / "data" data_path = settings.APP_ROOT / "data"
logger.info(f"Initializing all score data") logger.info("Initializing all score data")
remove_all_from_dir(data_path / "score" / "csv") remove_all_from_dir(data_path / "score" / "csv")
remove_all_from_dir(data_path / "score" / "geojson") remove_all_from_dir(data_path / "score" / "geojson")
@ -157,9 +159,10 @@ def temp_folder_cleanup() -> None:
data_path = settings.APP_ROOT / "data" data_path = settings.APP_ROOT / "data"
logger.info(f"Initializing all temp directoriees") logger.info("Initializing all temp directoriees")
remove_all_from_dir(data_path / "tmp") remove_all_from_dir(data_path / "tmp")
def get_excel_column_name(index: int) -> str: def get_excel_column_name(index: int) -> str:
"""Map a numeric index to the appropriate column in Excel. E.g., column #95 is "CR". """Map a numeric index to the appropriate column in Excel. E.g., column #95 is "CR".
Only works for the first 1000 columns. Only works for the first 1000 columns.