Fix linter and dependency issues

This commit is contained in:
Carlos Felix 2024-11-19 15:01:27 -05:00 committed by Carlos Felix
parent 0da80c90d8
commit 3e087a37a4
7 changed files with 619 additions and 730 deletions

View file

@ -58,6 +58,7 @@ data_source_option = click.option(
help=dataset_cli_help, help=dataset_cli_help,
) )
@click.group() @click.group()
def cli(): def cli():
"""Defines a click group for the commands below""" """Defines a click group for the commands below"""
@ -415,6 +416,7 @@ def clear_data_source_cache(dataset: str):
log_goodbye() log_goodbye()
@cli.command( @cli.command(
help="Generate scoring and tiles", help="Generate scoring and tiles",
) )
@ -441,6 +443,7 @@ def full_run(ctx, use_cache):
ctx.invoke(etl_run, dataset=None, use_cache=use_cache) ctx.invoke(etl_run, dataset=None, use_cache=use_cache)
ctx.invoke(full_post_etl) ctx.invoke(full_post_etl)
def log_title(title: str, subtitle: str = None): def log_title(title: str, subtitle: str = None):
"""Logs a title in our fancy title format""" """Logs a title in our fancy title format"""
logger.info("-" * LOG_LINE_WIDTH) logger.info("-" * LOG_LINE_WIDTH)

View file

@ -10,6 +10,7 @@ from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
class Downloader: class Downloader:
"""A simple class to encapsulate the download capabilities of the application""" """A simple class to encapsulate the download capabilities of the application"""
@ -44,6 +45,7 @@ class Downloader:
file_contents = response.content file_contents = response.content
logger.debug("Downloaded.") logger.debug("Downloaded.")
else: else:
# pylint: disable-next=broad-exception-raised
raise Exception( raise Exception(
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}" f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"
) )

View file

@ -39,6 +39,7 @@ def _choose_best_mask(
for mask in masks_in_priority_order: for mask in masks_in_priority_order:
if any(geo_df[mask][column_to_impute].notna()): if any(geo_df[mask][column_to_impute].notna()):
return mask return mask
# pylint: disable-next=broad-exception-raised
raise Exception("No mask found") raise Exception("No mask found")

View file

@ -1014,12 +1014,12 @@ class ScoreNarwhal(Score):
) )
def _mark_territory_dacs(self) -> None: def _mark_territory_dacs(self) -> None:
"""Territory tracts that are flagged as low income are Score N communities. """Territory tracts that are flagged as low income are Score N communities."""
"""
self.df[field_names.SCORE_N_COMMUNITIES] = np.where( self.df[field_names.SCORE_N_COMMUNITIES] = np.where(
self.df[field_names.GEOID_TRACT_FIELD] self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
.str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)) & tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)
self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED], )
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED],
True, True,
self.df[field_names.SCORE_N_COMMUNITIES], self.df[field_names.SCORE_N_COMMUNITIES],
) )
@ -1049,9 +1049,14 @@ class ScoreNarwhal(Score):
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_200_FPL_FIELD_2010, combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_200_FPL_FIELD_2010,
threshold_cutoff_for_island_areas=self.LOW_INCOME_THRESHOLD, threshold_cutoff_for_island_areas=self.LOW_INCOME_THRESHOLD,
) )
self.df.loc[self.df[field_names.GEOID_TRACT_FIELD].str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)), self.df.loc[
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = ( self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
self.df[island_areas_poverty_200_criteria_field_name] >= self.LOW_INCOME_THRESHOLD tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)
),
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED,
] = (
self.df[island_areas_poverty_200_criteria_field_name]
>= self.LOW_INCOME_THRESHOLD
) )
def _get_percent_of_tract_that_is_dac(self) -> float: def _get_percent_of_tract_that_is_dac(self) -> float:

View file

@ -2,7 +2,6 @@
import pandas as pd import pandas as pd
import pytest import pytest
from data_pipeline.config import settings from data_pipeline.config import settings
from data_pipeline.etl.score import constants
from data_pipeline.etl.score.etl_score import ScoreETL from data_pipeline.etl.score.etl_score import ScoreETL
from data_pipeline.score import field_names from data_pipeline.score import field_names
from data_pipeline.score.score_narwhal import ScoreNarwhal from data_pipeline.score.score_narwhal import ScoreNarwhal
@ -13,9 +12,11 @@ logger = get_module_logger(__name__)
TEST_DATA_FOLDER = settings.APP_ROOT / "tests" / "score" / "test_utils" / "data" TEST_DATA_FOLDER = settings.APP_ROOT / "tests" / "score" / "test_utils" / "data"
@pytest.fixture @pytest.fixture
def toy_score_df(scope="module"): def toy_score_df(scope="module"):
return pd.read_csv(TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv", return pd.read_csv(
TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
dtype={field_names.GEOID_TRACT_FIELD: str}, dtype={field_names.GEOID_TRACT_FIELD: str},
) )
@ -84,7 +85,8 @@ def test_drop_all_tracts(toy_score_df):
def test_mark_territory_dacs(): def test_mark_territory_dacs():
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_territory_dacs.csv", test_data = pd.read_csv(
TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
dtype={field_names.GEOID_TRACT_FIELD: str}, dtype={field_names.GEOID_TRACT_FIELD: str},
) )
# Sanity check on the input data # Sanity check on the input data
@ -92,18 +94,22 @@ def test_mark_territory_dacs():
scorer = ScoreNarwhal(test_data) scorer = ScoreNarwhal(test_data)
scorer._mark_territory_dacs() scorer._mark_territory_dacs()
territory_filter = test_data[field_names.GEOID_TRACT_FIELD].str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES))
# Check territories are set to true # Check territories are set to true
expected_new_dacs_filter = ( expected_new_dacs_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
test_data[field_names.GEOID_TRACT_FIELD].isin(['60050951100', '66010951100', '69110001101', '78010990000']) ["60050951100", "66010951100", "69110001101", "78010990000"]
) )
assert test_data.loc[expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all() assert test_data.loc[
expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
].all()
# Non-territories are still false # Non-territories are still false
assert not test_data.loc[~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all() assert not test_data.loc[
~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
].all()
def test_mark_poverty_flag(): def test_mark_poverty_flag():
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_poverty_flag.csv", test_data = pd.read_csv(
TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
dtype={field_names.GEOID_TRACT_FIELD: str}, dtype={field_names.GEOID_TRACT_FIELD: str},
) )
# Sanity check on the input data # Sanity check on the input data
@ -111,14 +117,14 @@ def test_mark_poverty_flag():
scorer = ScoreNarwhal(test_data) scorer = ScoreNarwhal(test_data)
scorer._mark_poverty_flag() scorer._mark_poverty_flag()
expected_low_income_filter = ( expected_low_income_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
test_data[field_names.GEOID_TRACT_FIELD].isin(['36087011302', '66010951100', '78010990000']) ["36087011302", "66010951100", "78010990000"]
) )
# Three tracts are set to true # Three tracts are set to true
assert ( assert test_data[expected_low_income_filter][
test_data[expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all() field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
) ].all()
# Everything else is false # Everything else is false
assert ( assert not test_data[~expected_low_income_filter][
not test_data[~expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all() field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
) ].all()

File diff suppressed because it is too large Load diff

View file

@ -27,11 +27,9 @@ jupyter = "^1.0.0"
jupyter-contrib-nbextensions = "^0.5.1" jupyter-contrib-nbextensions = "^0.5.1"
marshmallow-dataclass = "^8.5.3" marshmallow-dataclass = "^8.5.3"
marshmallow-enum = "^1.5.1" marshmallow-enum = "^1.5.1"
matplotlib = "^3.4.2"
numpy = "^1.22.1" numpy = "^1.22.1"
pandas = "~1.4.3" pandas = "~1.4.3"
pylint = "^2.11.1" pylint = "^2.11.1"
pillow = "9.3.0" # Newer versions break tile generation
python = "^3.10" python = "^3.10"
pypandoc = "^1.6.3" pypandoc = "^1.6.3"
PyYAML = "^6.0" PyYAML = "^6.0"
@ -44,8 +42,8 @@ pydantic = "^1.9.0"
Rtree = "^1.0.0" Rtree = "^1.0.0"
fiona = "~1.8.21" fiona = "~1.8.21"
[tool.poetry.dev-dependencies] [tool.poetry.group.dev.dependencies]
black = {version = "^21.6b0", allow-prereleases = true} black = "^21"
flake8 = "^3.9.2" flake8 = "^3.9.2"
liccheck = "^0.6.2" liccheck = "^0.6.2"
mypy = "^0.910" mypy = "^0.910"
@ -126,6 +124,7 @@ authorized_licenses = [
"apache", "apache",
"apache 2.0", "apache 2.0",
"apache license 2.0", "apache license 2.0",
"apache license, version 2.0",
"apache software license", "apache software license",
"apache software", "apache software",
"gnu lgpl", "gnu lgpl",
@ -145,3 +144,5 @@ authorized_licenses = [
"gpl v3", "gpl v3",
"historical permission notice and disclaimer (hpnd)", "historical permission notice and disclaimer (hpnd)",
] ]
[tool.liccheck.authorized_packages]
ypy-websocket="0.8.4"