mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 02:11:16 -07:00
Changes to allow local runs
This commit is contained in:
parent
d298f7dedb
commit
ff9e7b9aa2
11 changed files with 3231 additions and 1867 deletions
1
.github/CODEOWNERS
vendored
1
.github/CODEOWNERS
vendored
|
@ -1 +0,0 @@
|
||||||
* @vim-usds @travis-newby @sampowers-usds @mattbowen-usds
|
|
6
.github/workflows/codeql-analysis.yml
vendored
6
.github/workflows/codeql-analysis.yml
vendored
|
@ -45,7 +45,7 @@ jobs:
|
||||||
|
|
||||||
# Initializes the CodeQL tools for scanning.
|
# Initializes the CodeQL tools for scanning.
|
||||||
- name: Initialize CodeQL
|
- name: Initialize CodeQL
|
||||||
uses: github/codeql-action/init@v1
|
uses: github/codeql-action/init@v2
|
||||||
with:
|
with:
|
||||||
languages: ${{ matrix.language }}
|
languages: ${{ matrix.language }}
|
||||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||||
|
@ -56,7 +56,7 @@ jobs:
|
||||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||||
# If this step fails, then you should remove it and run the build manually (see below)
|
# If this step fails, then you should remove it and run the build manually (see below)
|
||||||
- name: Autobuild
|
- name: Autobuild
|
||||||
uses: github/codeql-action/autobuild@v1
|
uses: github/codeql-action/autobuild@v2
|
||||||
|
|
||||||
# ℹ️ Command-line programs to run using the OS shell.
|
# ℹ️ Command-line programs to run using the OS shell.
|
||||||
# 📚 https://git.io/JvXDl
|
# 📚 https://git.io/JvXDl
|
||||||
|
@ -70,4 +70,4 @@ jobs:
|
||||||
# make release
|
# make release
|
||||||
|
|
||||||
- name: Perform CodeQL Analysis
|
- name: Perform CodeQL Analysis
|
||||||
uses: github/codeql-action/analyze@v1
|
uses: github/codeql-action/analyze@v2
|
||||||
|
|
2
.github/workflows/data-checks.yml
vendored
2
.github/workflows/data-checks.yml
vendored
|
@ -16,7 +16,7 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
# checks all of the versions allowed in pyproject.toml
|
# checks all of the versions allowed in pyproject.toml
|
||||||
python-version: [3.8, 3.9]
|
python-version: [3.10.15]
|
||||||
steps:
|
steps:
|
||||||
# installs Python
|
# installs Python
|
||||||
# one execution of the tests per version listed above
|
# one execution of the tests per version listed above
|
||||||
|
|
7
data/data-pipeline/data_pipeline/constants.py
Normal file
7
data/data-pipeline/data_pipeline/constants.py
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
LOG_LEVEL = logging.DEBUG
|
||||||
|
"""Log level for all loggers."""
|
||||||
|
|
||||||
|
NO_SSL_VERIFY = True
|
||||||
|
"""Set to true to skip SSL verification when downloading files. Useful for local development."""
|
|
@ -22,6 +22,7 @@ from pathlib import Path
|
||||||
from typing import List
|
from typing import List
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from data_pipeline.constants import NO_SSL_VERIFY
|
||||||
|
|
||||||
from data_pipeline.etl.downloader import Downloader
|
from data_pipeline.etl.downloader import Downloader
|
||||||
from data_pipeline.etl.sources.census_acs.etl_utils import (
|
from data_pipeline.etl.sources.census_acs.etl_utils import (
|
||||||
|
@ -65,7 +66,7 @@ class FileDataSource(DataSource):
|
||||||
Downloader.download_file_from_url(
|
Downloader.download_file_from_url(
|
||||||
file_url=self.source,
|
file_url=self.source,
|
||||||
download_file_name=self.destination,
|
download_file_name=self.destination,
|
||||||
verify=True,
|
verify=not NO_SSL_VERIFY,
|
||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
@ -85,7 +86,7 @@ class ZIPDataSource(DataSource):
|
||||||
Downloader.download_zip_file_from_url(
|
Downloader.download_zip_file_from_url(
|
||||||
file_url=self.source,
|
file_url=self.source,
|
||||||
unzipped_file_path=self.destination,
|
unzipped_file_path=self.destination,
|
||||||
verify=True,
|
verify=not NO_SSL_VERIFY,
|
||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
|
|
@ -6,7 +6,9 @@ import shutil
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from data_pipeline.config import settings
|
from data_pipeline.config import settings
|
||||||
|
from data_pipeline.utils import get_module_logger
|
||||||
|
|
||||||
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
class Downloader:
|
class Downloader:
|
||||||
"""A simple class to encapsulate the download capabilities of the application"""
|
"""A simple class to encapsulate the download capabilities of the application"""
|
||||||
|
@ -34,12 +36,13 @@ class Downloader:
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
download_file_name.parent.mkdir(parents=True, exist_ok=True)
|
download_file_name.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
logger.debug(f"Downloading {file_url}")
|
||||||
response = requests.get(
|
response = requests.get(
|
||||||
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
|
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
|
||||||
)
|
)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
file_contents = response.content
|
file_contents = response.content
|
||||||
|
logger.debug("Downloaded.")
|
||||||
else:
|
else:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"
|
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"
|
||||||
|
|
|
@ -215,6 +215,7 @@ class CensusETL(ExtractTransformLoad):
|
||||||
state_gdf = gpd.read_file(file_name)
|
state_gdf = gpd.read_file(file_name)
|
||||||
usa_df = usa_df.append(state_gdf)
|
usa_df = usa_df.append(state_gdf)
|
||||||
|
|
||||||
|
logger.debug("Converting to CRS")
|
||||||
usa_df = usa_df.to_crs(
|
usa_df = usa_df.to_crs(
|
||||||
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
|
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
|
||||||
)
|
)
|
||||||
|
|
|
@ -13,6 +13,7 @@ import requests
|
||||||
import urllib3
|
import urllib3
|
||||||
import yaml
|
import yaml
|
||||||
from data_pipeline.config import settings
|
from data_pipeline.config import settings
|
||||||
|
from data_pipeline.constants import LOG_LEVEL
|
||||||
from data_pipeline.content.schemas.download_schemas import CodebookConfig
|
from data_pipeline.content.schemas.download_schemas import CodebookConfig
|
||||||
from data_pipeline.content.schemas.download_schemas import CSVConfig
|
from data_pipeline.content.schemas.download_schemas import CSVConfig
|
||||||
from data_pipeline.content.schemas.download_schemas import ExcelConfig
|
from data_pipeline.content.schemas.download_schemas import ExcelConfig
|
||||||
|
@ -48,7 +49,7 @@ def get_module_logger(module_name: str) -> logging.Logger:
|
||||||
)
|
)
|
||||||
handler.setFormatter(formatter)
|
handler.setFormatter(formatter)
|
||||||
logger.addHandler(handler)
|
logger.addHandler(handler)
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(LOG_LEVEL)
|
||||||
logger.propagate = False # don't send log messages to the parent logger (to avoid duplicate log messages)
|
logger.propagate = False # don't send log messages to the parent logger (to avoid duplicate log messages)
|
||||||
return logger
|
return logger
|
||||||
|
|
||||||
|
|
5049
data/data-pipeline/poetry.lock
generated
5049
data/data-pipeline/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -17,7 +17,7 @@ packages = [
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
CensusData = "^1.13"
|
CensusData = "^1.13"
|
||||||
certifi = "^2022.12.07" # explicit callout due to https://pyup.io/v/52365/f17/
|
certifi = ">= 2024.07.04" # Due to https://data.safetycli.com/v/72083/f17
|
||||||
click = "8.0.4" # pinning for now per https://github.com/psf/black/issues/2964
|
click = "8.0.4" # pinning for now per https://github.com/psf/black/issues/2964
|
||||||
dynaconf = "^3.1.4"
|
dynaconf = "^3.1.4"
|
||||||
geopandas = "^0.11.0"
|
geopandas = "^0.11.0"
|
||||||
|
@ -29,19 +29,20 @@ marshmallow-dataclass = "^8.5.3"
|
||||||
marshmallow-enum = "^1.5.1"
|
marshmallow-enum = "^1.5.1"
|
||||||
matplotlib = "^3.4.2"
|
matplotlib = "^3.4.2"
|
||||||
numpy = "^1.22.1"
|
numpy = "^1.22.1"
|
||||||
pandas = "^1.2.5"
|
pandas = "~1.4.3"
|
||||||
pylint = "^2.11.1"
|
pylint = "^2.11.1"
|
||||||
pillow = "9.3.0"
|
pillow = "9.3.0" # Newer versions break tile generation
|
||||||
python = "^3.8"
|
python = "^3.10"
|
||||||
pypandoc = "^1.6.3"
|
pypandoc = "^1.6.3"
|
||||||
PyYAML = "^6.0"
|
PyYAML = "^6.0"
|
||||||
requests = "^2.25.1"
|
requests = "^2.25.1"
|
||||||
tqdm = "4.62.0"
|
tqdm = "^4.66.3"
|
||||||
types-requests = "^2.25.0"
|
types-requests = "^2.25.0"
|
||||||
us = "^2.0.2"
|
us = "^2.0.2"
|
||||||
xlsxwriter = "^2.0.0"
|
xlsxwriter = "^2.0.0"
|
||||||
pydantic = "^1.9.0"
|
pydantic = "^1.9.0"
|
||||||
Rtree = "^1.0.0"
|
Rtree = "^1.0.0"
|
||||||
|
fiona = "~1.8.21"
|
||||||
|
|
||||||
[tool.poetry.dev-dependencies]
|
[tool.poetry.dev-dependencies]
|
||||||
black = {version = "^21.6b0", allow-prereleases = true}
|
black = {version = "^21.6b0", allow-prereleases = true}
|
||||||
|
@ -59,7 +60,7 @@ pandas-vet = "^0.2.2"
|
||||||
pytest-snapshot = "^0.8.1"
|
pytest-snapshot = "^0.8.1"
|
||||||
seaborn = "^0.11.2"
|
seaborn = "^0.11.2"
|
||||||
papermill = "^2.3.4"
|
papermill = "^2.3.4"
|
||||||
jupyterlab = "3.4.4"
|
jupyterlab = "^3.6.7"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
[tox]
|
[tox]
|
||||||
# required because we use pyproject.toml
|
# required because we use pyproject.toml
|
||||||
isolated_build = true
|
isolated_build = true
|
||||||
envlist = py38, py39, lint, checkdeps, pytest
|
envlist = py310, lint, checkdeps, pytest
|
||||||
# only checks python versions installed locally
|
# only checks python versions installed locally
|
||||||
skip_missing_interpreters = true
|
skip_missing_interpreters = true
|
||||||
|
|
||||||
|
@ -16,7 +16,9 @@ commands = black data_pipeline
|
||||||
# checks the dependencies for security vulnerabilities and open source licenses
|
# checks the dependencies for security vulnerabilities and open source licenses
|
||||||
allowlist_externals = bash
|
allowlist_externals = bash
|
||||||
commands = pip install -U wheel
|
commands = pip install -U wheel
|
||||||
safety check --ignore 51457 --ignore 44715 # known issue: https://github.com/pyupio/safety/issues/364
|
# known issue: https://github.com/pyupio/safety/issues/364
|
||||||
|
# jinja2 false positive for our use: https://data.safetycli.com/v/70612/f17
|
||||||
|
safety check --ignore 51457 --ignore 44715 --ignore 70612
|
||||||
bash scripts/run-liccheck.sh
|
bash scripts/run-liccheck.sh
|
||||||
|
|
||||||
[testenv:pytest]
|
[testenv:pytest]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue