mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-21 09:11:26 -08:00
Changes to allow local runs
This commit is contained in:
parent
d298f7dedb
commit
ff9e7b9aa2
11 changed files with 3231 additions and 1867 deletions
1
.github/CODEOWNERS
vendored
1
.github/CODEOWNERS
vendored
|
@ -1 +0,0 @@
|
|||
* @vim-usds @travis-newby @sampowers-usds @mattbowen-usds
|
6
.github/workflows/codeql-analysis.yml
vendored
6
.github/workflows/codeql-analysis.yml
vendored
|
@ -45,7 +45,7 @@ jobs:
|
|||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v1
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
|
@ -56,7 +56,7 @@ jobs:
|
|||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v1
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
|
@ -70,4 +70,4 @@ jobs:
|
|||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v1
|
||||
uses: github/codeql-action/analyze@v2
|
||||
|
|
2
.github/workflows/data-checks.yml
vendored
2
.github/workflows/data-checks.yml
vendored
|
@ -16,7 +16,7 @@ jobs:
|
|||
strategy:
|
||||
matrix:
|
||||
# checks all of the versions allowed in pyproject.toml
|
||||
python-version: [3.8, 3.9]
|
||||
python-version: [3.10.15]
|
||||
steps:
|
||||
# installs Python
|
||||
# one execution of the tests per version listed above
|
||||
|
|
7
data/data-pipeline/data_pipeline/constants.py
Normal file
7
data/data-pipeline/data_pipeline/constants.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
import logging
|
||||
|
||||
LOG_LEVEL = logging.DEBUG
|
||||
"""Log level for all loggers."""
|
||||
|
||||
NO_SSL_VERIFY = True
|
||||
"""Set to true to skip SSL verification when downloading files. Useful for local development."""
|
|
@ -22,6 +22,7 @@ from pathlib import Path
|
|||
from typing import List
|
||||
from dataclasses import dataclass
|
||||
from abc import ABC, abstractmethod
|
||||
from data_pipeline.constants import NO_SSL_VERIFY
|
||||
|
||||
from data_pipeline.etl.downloader import Downloader
|
||||
from data_pipeline.etl.sources.census_acs.etl_utils import (
|
||||
|
@ -65,7 +66,7 @@ class FileDataSource(DataSource):
|
|||
Downloader.download_file_from_url(
|
||||
file_url=self.source,
|
||||
download_file_name=self.destination,
|
||||
verify=True,
|
||||
verify=not NO_SSL_VERIFY,
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
|
@ -85,7 +86,7 @@ class ZIPDataSource(DataSource):
|
|||
Downloader.download_zip_file_from_url(
|
||||
file_url=self.source,
|
||||
unzipped_file_path=self.destination,
|
||||
verify=True,
|
||||
verify=not NO_SSL_VERIFY,
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
|
|
|
@ -6,7 +6,9 @@ import shutil
|
|||
|
||||
from pathlib import Path
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
class Downloader:
|
||||
"""A simple class to encapsulate the download capabilities of the application"""
|
||||
|
@ -34,12 +36,13 @@ class Downloader:
|
|||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
download_file_name.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.debug(f"Downloading {file_url}")
|
||||
response = requests.get(
|
||||
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
|
||||
)
|
||||
if response.status_code == 200:
|
||||
file_contents = response.content
|
||||
logger.debug("Downloaded.")
|
||||
else:
|
||||
raise Exception(
|
||||
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"
|
||||
|
|
|
@ -215,6 +215,7 @@ class CensusETL(ExtractTransformLoad):
|
|||
state_gdf = gpd.read_file(file_name)
|
||||
usa_df = usa_df.append(state_gdf)
|
||||
|
||||
logger.debug("Converting to CRS")
|
||||
usa_df = usa_df.to_crs(
|
||||
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
|
||||
)
|
||||
|
|
|
@ -13,6 +13,7 @@ import requests
|
|||
import urllib3
|
||||
import yaml
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.constants import LOG_LEVEL
|
||||
from data_pipeline.content.schemas.download_schemas import CodebookConfig
|
||||
from data_pipeline.content.schemas.download_schemas import CSVConfig
|
||||
from data_pipeline.content.schemas.download_schemas import ExcelConfig
|
||||
|
@ -48,7 +49,7 @@ def get_module_logger(module_name: str) -> logging.Logger:
|
|||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
logger.setLevel(LOG_LEVEL)
|
||||
logger.propagate = False # don't send log messages to the parent logger (to avoid duplicate log messages)
|
||||
return logger
|
||||
|
||||
|
|
5049
data/data-pipeline/poetry.lock
generated
5049
data/data-pipeline/poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -17,7 +17,7 @@ packages = [
|
|||
|
||||
[tool.poetry.dependencies]
|
||||
CensusData = "^1.13"
|
||||
certifi = "^2022.12.07" # explicit callout due to https://pyup.io/v/52365/f17/
|
||||
certifi = ">= 2024.07.04" # Due to https://data.safetycli.com/v/72083/f17
|
||||
click = "8.0.4" # pinning for now per https://github.com/psf/black/issues/2964
|
||||
dynaconf = "^3.1.4"
|
||||
geopandas = "^0.11.0"
|
||||
|
@ -29,19 +29,20 @@ marshmallow-dataclass = "^8.5.3"
|
|||
marshmallow-enum = "^1.5.1"
|
||||
matplotlib = "^3.4.2"
|
||||
numpy = "^1.22.1"
|
||||
pandas = "^1.2.5"
|
||||
pandas = "~1.4.3"
|
||||
pylint = "^2.11.1"
|
||||
pillow = "9.3.0"
|
||||
python = "^3.8"
|
||||
pillow = "9.3.0" # Newer versions break tile generation
|
||||
python = "^3.10"
|
||||
pypandoc = "^1.6.3"
|
||||
PyYAML = "^6.0"
|
||||
requests = "^2.25.1"
|
||||
tqdm = "4.62.0"
|
||||
tqdm = "^4.66.3"
|
||||
types-requests = "^2.25.0"
|
||||
us = "^2.0.2"
|
||||
xlsxwriter = "^2.0.0"
|
||||
pydantic = "^1.9.0"
|
||||
Rtree = "^1.0.0"
|
||||
fiona = "~1.8.21"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
black = {version = "^21.6b0", allow-prereleases = true}
|
||||
|
@ -59,7 +60,7 @@ pandas-vet = "^0.2.2"
|
|||
pytest-snapshot = "^0.8.1"
|
||||
seaborn = "^0.11.2"
|
||||
papermill = "^2.3.4"
|
||||
jupyterlab = "3.4.4"
|
||||
jupyterlab = "^3.6.7"
|
||||
|
||||
[build-system]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
[tox]
|
||||
# required because we use pyproject.toml
|
||||
isolated_build = true
|
||||
envlist = py38, py39, lint, checkdeps, pytest
|
||||
envlist = py310, lint, checkdeps, pytest
|
||||
# only checks python versions installed locally
|
||||
skip_missing_interpreters = true
|
||||
|
||||
|
@ -16,7 +16,9 @@ commands = black data_pipeline
|
|||
# checks the dependencies for security vulnerabilities and open source licenses
|
||||
allowlist_externals = bash
|
||||
commands = pip install -U wheel
|
||||
safety check --ignore 51457 --ignore 44715 # known issue: https://github.com/pyupio/safety/issues/364
|
||||
# known issue: https://github.com/pyupio/safety/issues/364
|
||||
# jinja2 false positive for our use: https://data.safetycli.com/v/70612/f17
|
||||
safety check --ignore 51457 --ignore 44715 --ignore 70612
|
||||
bash scripts/run-liccheck.sh
|
||||
|
||||
[testenv:pytest]
|
||||
|
|
Loading…
Add table
Reference in a new issue