mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 17:44:20 -08:00
Fix linter and dependency issues
This commit is contained in:
parent
0da80c90d8
commit
3e087a37a4
7 changed files with 619 additions and 730 deletions
|
@ -58,6 +58,7 @@ data_source_option = click.option(
|
||||||
help=dataset_cli_help,
|
help=dataset_cli_help,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def cli():
|
def cli():
|
||||||
"""Defines a click group for the commands below"""
|
"""Defines a click group for the commands below"""
|
||||||
|
@ -415,6 +416,7 @@ def clear_data_source_cache(dataset: str):
|
||||||
|
|
||||||
log_goodbye()
|
log_goodbye()
|
||||||
|
|
||||||
|
|
||||||
@cli.command(
|
@cli.command(
|
||||||
help="Generate scoring and tiles",
|
help="Generate scoring and tiles",
|
||||||
)
|
)
|
||||||
|
@ -441,6 +443,7 @@ def full_run(ctx, use_cache):
|
||||||
ctx.invoke(etl_run, dataset=None, use_cache=use_cache)
|
ctx.invoke(etl_run, dataset=None, use_cache=use_cache)
|
||||||
ctx.invoke(full_post_etl)
|
ctx.invoke(full_post_etl)
|
||||||
|
|
||||||
|
|
||||||
def log_title(title: str, subtitle: str = None):
|
def log_title(title: str, subtitle: str = None):
|
||||||
"""Logs a title in our fancy title format"""
|
"""Logs a title in our fancy title format"""
|
||||||
logger.info("-" * LOG_LINE_WIDTH)
|
logger.info("-" * LOG_LINE_WIDTH)
|
||||||
|
|
|
@ -10,6 +10,7 @@ from data_pipeline.utils import get_module_logger
|
||||||
|
|
||||||
logger = get_module_logger(__name__)
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class Downloader:
|
class Downloader:
|
||||||
"""A simple class to encapsulate the download capabilities of the application"""
|
"""A simple class to encapsulate the download capabilities of the application"""
|
||||||
|
|
||||||
|
@ -44,6 +45,7 @@ class Downloader:
|
||||||
file_contents = response.content
|
file_contents = response.content
|
||||||
logger.debug("Downloaded.")
|
logger.debug("Downloaded.")
|
||||||
else:
|
else:
|
||||||
|
# pylint: disable-next=broad-exception-raised
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"
|
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"
|
||||||
)
|
)
|
||||||
|
|
|
@ -39,6 +39,7 @@ def _choose_best_mask(
|
||||||
for mask in masks_in_priority_order:
|
for mask in masks_in_priority_order:
|
||||||
if any(geo_df[mask][column_to_impute].notna()):
|
if any(geo_df[mask][column_to_impute].notna()):
|
||||||
return mask
|
return mask
|
||||||
|
# pylint: disable-next=broad-exception-raised
|
||||||
raise Exception("No mask found")
|
raise Exception("No mask found")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1014,12 +1014,12 @@ class ScoreNarwhal(Score):
|
||||||
)
|
)
|
||||||
|
|
||||||
def _mark_territory_dacs(self) -> None:
|
def _mark_territory_dacs(self) -> None:
|
||||||
"""Territory tracts that are flagged as low income are Score N communities.
|
"""Territory tracts that are flagged as low income are Score N communities."""
|
||||||
"""
|
|
||||||
self.df[field_names.SCORE_N_COMMUNITIES] = np.where(
|
self.df[field_names.SCORE_N_COMMUNITIES] = np.where(
|
||||||
self.df[field_names.GEOID_TRACT_FIELD]
|
self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
|
||||||
.str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)) &
|
tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)
|
||||||
self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED],
|
)
|
||||||
|
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED],
|
||||||
True,
|
True,
|
||||||
self.df[field_names.SCORE_N_COMMUNITIES],
|
self.df[field_names.SCORE_N_COMMUNITIES],
|
||||||
)
|
)
|
||||||
|
@ -1049,9 +1049,14 @@ class ScoreNarwhal(Score):
|
||||||
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_200_FPL_FIELD_2010,
|
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_200_FPL_FIELD_2010,
|
||||||
threshold_cutoff_for_island_areas=self.LOW_INCOME_THRESHOLD,
|
threshold_cutoff_for_island_areas=self.LOW_INCOME_THRESHOLD,
|
||||||
)
|
)
|
||||||
self.df.loc[self.df[field_names.GEOID_TRACT_FIELD].str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)),
|
self.df.loc[
|
||||||
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (
|
self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
|
||||||
self.df[island_areas_poverty_200_criteria_field_name] >= self.LOW_INCOME_THRESHOLD
|
tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)
|
||||||
|
),
|
||||||
|
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED,
|
||||||
|
] = (
|
||||||
|
self.df[island_areas_poverty_200_criteria_field_name]
|
||||||
|
>= self.LOW_INCOME_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_percent_of_tract_that_is_dac(self) -> float:
|
def _get_percent_of_tract_that_is_dac(self) -> float:
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
from data_pipeline.config import settings
|
from data_pipeline.config import settings
|
||||||
from data_pipeline.etl.score import constants
|
|
||||||
from data_pipeline.etl.score.etl_score import ScoreETL
|
from data_pipeline.etl.score.etl_score import ScoreETL
|
||||||
from data_pipeline.score import field_names
|
from data_pipeline.score import field_names
|
||||||
from data_pipeline.score.score_narwhal import ScoreNarwhal
|
from data_pipeline.score.score_narwhal import ScoreNarwhal
|
||||||
|
@ -13,9 +12,11 @@ logger = get_module_logger(__name__)
|
||||||
|
|
||||||
TEST_DATA_FOLDER = settings.APP_ROOT / "tests" / "score" / "test_utils" / "data"
|
TEST_DATA_FOLDER = settings.APP_ROOT / "tests" / "score" / "test_utils" / "data"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def toy_score_df(scope="module"):
|
def toy_score_df(scope="module"):
|
||||||
return pd.read_csv(TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
|
return pd.read_csv(
|
||||||
|
TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
|
||||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -84,7 +85,8 @@ def test_drop_all_tracts(toy_score_df):
|
||||||
|
|
||||||
|
|
||||||
def test_mark_territory_dacs():
|
def test_mark_territory_dacs():
|
||||||
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
|
test_data = pd.read_csv(
|
||||||
|
TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
|
||||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||||
)
|
)
|
||||||
# Sanity check on the input data
|
# Sanity check on the input data
|
||||||
|
@ -92,18 +94,22 @@ def test_mark_territory_dacs():
|
||||||
|
|
||||||
scorer = ScoreNarwhal(test_data)
|
scorer = ScoreNarwhal(test_data)
|
||||||
scorer._mark_territory_dacs()
|
scorer._mark_territory_dacs()
|
||||||
territory_filter = test_data[field_names.GEOID_TRACT_FIELD].str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES))
|
|
||||||
# Check territories are set to true
|
# Check territories are set to true
|
||||||
expected_new_dacs_filter = (
|
expected_new_dacs_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
|
||||||
test_data[field_names.GEOID_TRACT_FIELD].isin(['60050951100', '66010951100', '69110001101', '78010990000'])
|
["60050951100", "66010951100", "69110001101", "78010990000"]
|
||||||
)
|
)
|
||||||
assert test_data.loc[expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
|
assert test_data.loc[
|
||||||
|
expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
|
||||||
|
].all()
|
||||||
# Non-territories are still false
|
# Non-territories are still false
|
||||||
assert not test_data.loc[~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
|
assert not test_data.loc[
|
||||||
|
~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
|
||||||
|
].all()
|
||||||
|
|
||||||
|
|
||||||
def test_mark_poverty_flag():
|
def test_mark_poverty_flag():
|
||||||
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
|
test_data = pd.read_csv(
|
||||||
|
TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
|
||||||
dtype={field_names.GEOID_TRACT_FIELD: str},
|
dtype={field_names.GEOID_TRACT_FIELD: str},
|
||||||
)
|
)
|
||||||
# Sanity check on the input data
|
# Sanity check on the input data
|
||||||
|
@ -111,14 +117,14 @@ def test_mark_poverty_flag():
|
||||||
|
|
||||||
scorer = ScoreNarwhal(test_data)
|
scorer = ScoreNarwhal(test_data)
|
||||||
scorer._mark_poverty_flag()
|
scorer._mark_poverty_flag()
|
||||||
expected_low_income_filter = (
|
expected_low_income_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
|
||||||
test_data[field_names.GEOID_TRACT_FIELD].isin(['36087011302', '66010951100', '78010990000'])
|
["36087011302", "66010951100", "78010990000"]
|
||||||
)
|
)
|
||||||
# Three tracts are set to true
|
# Three tracts are set to true
|
||||||
assert (
|
assert test_data[expected_low_income_filter][
|
||||||
test_data[expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
|
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
|
||||||
)
|
].all()
|
||||||
# Everything else is false
|
# Everything else is false
|
||||||
assert (
|
assert not test_data[~expected_low_income_filter][
|
||||||
not test_data[~expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
|
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
|
||||||
)
|
].all()
|
||||||
|
|
1273
data/data-pipeline/poetry.lock
generated
1273
data/data-pipeline/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -27,11 +27,9 @@ jupyter = "^1.0.0"
|
||||||
jupyter-contrib-nbextensions = "^0.5.1"
|
jupyter-contrib-nbextensions = "^0.5.1"
|
||||||
marshmallow-dataclass = "^8.5.3"
|
marshmallow-dataclass = "^8.5.3"
|
||||||
marshmallow-enum = "^1.5.1"
|
marshmallow-enum = "^1.5.1"
|
||||||
matplotlib = "^3.4.2"
|
|
||||||
numpy = "^1.22.1"
|
numpy = "^1.22.1"
|
||||||
pandas = "~1.4.3"
|
pandas = "~1.4.3"
|
||||||
pylint = "^2.11.1"
|
pylint = "^2.11.1"
|
||||||
pillow = "9.3.0" # Newer versions break tile generation
|
|
||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
pypandoc = "^1.6.3"
|
pypandoc = "^1.6.3"
|
||||||
PyYAML = "^6.0"
|
PyYAML = "^6.0"
|
||||||
|
@ -44,8 +42,8 @@ pydantic = "^1.9.0"
|
||||||
Rtree = "^1.0.0"
|
Rtree = "^1.0.0"
|
||||||
fiona = "~1.8.21"
|
fiona = "~1.8.21"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
black = {version = "^21.6b0", allow-prereleases = true}
|
black = "^21"
|
||||||
flake8 = "^3.9.2"
|
flake8 = "^3.9.2"
|
||||||
liccheck = "^0.6.2"
|
liccheck = "^0.6.2"
|
||||||
mypy = "^0.910"
|
mypy = "^0.910"
|
||||||
|
@ -126,6 +124,7 @@ authorized_licenses = [
|
||||||
"apache",
|
"apache",
|
||||||
"apache 2.0",
|
"apache 2.0",
|
||||||
"apache license 2.0",
|
"apache license 2.0",
|
||||||
|
"apache license, version 2.0",
|
||||||
"apache software license",
|
"apache software license",
|
||||||
"apache software",
|
"apache software",
|
||||||
"gnu lgpl",
|
"gnu lgpl",
|
||||||
|
@ -145,3 +144,5 @@ authorized_licenses = [
|
||||||
"gpl v3",
|
"gpl v3",
|
||||||
"historical permission notice and disclaimer (hpnd)",
|
"historical permission notice and disclaimer (hpnd)",
|
||||||
]
|
]
|
||||||
|
[tool.liccheck.authorized_packages]
|
||||||
|
ypy-websocket="0.8.4"
|
||||||
|
|
Loading…
Add table
Reference in a new issue