Changes to allow local runs

This commit is contained in:
Carlos Felix 2024-11-05 11:31:54 -05:00 committed by Carlos Felix
parent d298f7dedb
commit ff9e7b9aa2
11 changed files with 3231 additions and 1867 deletions

1
.github/CODEOWNERS vendored
View file

@ -1 +0,0 @@
* @vim-usds @travis-newby @sampowers-usds @mattbowen-usds

View file

@ -45,7 +45,7 @@ jobs:
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@ -56,7 +56,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v1
uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
@ -70,4 +70,4 @@ jobs:
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1
uses: github/codeql-action/analyze@v2

View file

@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
# checks all of the versions allowed in pyproject.toml
python-version: [3.8, 3.9]
python-version: [3.10.15]
steps:
# installs Python
# one execution of the tests per version listed above

View file

@ -0,0 +1,7 @@
import logging
LOG_LEVEL = logging.DEBUG
"""Log level for all loggers."""
NO_SSL_VERIFY = True
"""Set to true to skip SSL verification when downloading files. Useful for local development."""

View file

@ -22,6 +22,7 @@ from pathlib import Path
from typing import List
from dataclasses import dataclass
from abc import ABC, abstractmethod
from data_pipeline.constants import NO_SSL_VERIFY
from data_pipeline.etl.downloader import Downloader
from data_pipeline.etl.sources.census_acs.etl_utils import (
@ -65,7 +66,7 @@ class FileDataSource(DataSource):
Downloader.download_file_from_url(
file_url=self.source,
download_file_name=self.destination,
verify=True,
verify=not NO_SSL_VERIFY,
)
def __str__(self):
@ -85,7 +86,7 @@ class ZIPDataSource(DataSource):
Downloader.download_zip_file_from_url(
file_url=self.source,
unzipped_file_path=self.destination,
verify=True,
verify=not NO_SSL_VERIFY,
)
def __str__(self):

View file

@ -6,7 +6,9 @@ import shutil
from pathlib import Path
from data_pipeline.config import settings
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)
class Downloader:
"""A simple class to encapsulate the download capabilities of the application"""
@ -34,12 +36,13 @@ class Downloader:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
download_file_name.parent.mkdir(parents=True, exist_ok=True)
logger.debug(f"Downloading {file_url}")
response = requests.get(
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
)
if response.status_code == 200:
file_contents = response.content
logger.debug("Downloaded.")
else:
raise Exception(
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"

View file

@ -215,6 +215,7 @@ class CensusETL(ExtractTransformLoad):
state_gdf = gpd.read_file(file_name)
usa_df = usa_df.append(state_gdf)
logger.debug("Converting to CRS")
usa_df = usa_df.to_crs(
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
)

View file

@ -13,6 +13,7 @@ import requests
import urllib3
import yaml
from data_pipeline.config import settings
from data_pipeline.constants import LOG_LEVEL
from data_pipeline.content.schemas.download_schemas import CodebookConfig
from data_pipeline.content.schemas.download_schemas import CSVConfig
from data_pipeline.content.schemas.download_schemas import ExcelConfig
@ -48,7 +49,7 @@ def get_module_logger(module_name: str) -> logging.Logger:
)
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
logger.setLevel(LOG_LEVEL)
logger.propagate = False # don't send log messages to the parent logger (to avoid duplicate log messages)
return logger

File diff suppressed because it is too large Load diff

View file

@ -17,7 +17,7 @@ packages = [
[tool.poetry.dependencies]
CensusData = "^1.13"
certifi = "^2022.12.07" # explicit callout due to https://pyup.io/v/52365/f17/
certifi = ">= 2024.07.04" # Due to https://data.safetycli.com/v/72083/f17
click = "8.0.4" # pinning for now per https://github.com/psf/black/issues/2964
dynaconf = "^3.1.4"
geopandas = "^0.11.0"
@ -29,19 +29,20 @@ marshmallow-dataclass = "^8.5.3"
marshmallow-enum = "^1.5.1"
matplotlib = "^3.4.2"
numpy = "^1.22.1"
pandas = "^1.2.5"
pandas = "~1.4.3"
pylint = "^2.11.1"
pillow = "9.3.0"
python = "^3.8"
pillow = "9.3.0" # Newer versions break tile generation
python = "^3.10"
pypandoc = "^1.6.3"
PyYAML = "^6.0"
requests = "^2.25.1"
tqdm = "4.62.0"
tqdm = "^4.66.3"
types-requests = "^2.25.0"
us = "^2.0.2"
xlsxwriter = "^2.0.0"
pydantic = "^1.9.0"
Rtree = "^1.0.0"
fiona = "~1.8.21"
[tool.poetry.dev-dependencies]
black = {version = "^21.6b0", allow-prereleases = true}
@ -59,7 +60,7 @@ pandas-vet = "^0.2.2"
pytest-snapshot = "^0.8.1"
seaborn = "^0.11.2"
papermill = "^2.3.4"
jupyterlab = "3.4.4"
jupyterlab = "^3.6.7"
[build-system]
build-backend = "poetry.core.masonry.api"

View file

@ -1,7 +1,7 @@
[tox]
# required because we use pyproject.toml
isolated_build = true
envlist = py38, py39, lint, checkdeps, pytest
envlist = py310, lint, checkdeps, pytest
# only checks python versions installed locally
skip_missing_interpreters = true
@ -16,7 +16,9 @@ commands = black data_pipeline
# checks the dependencies for security vulnerabilities and open source licenses
allowlist_externals = bash
commands = pip install -U wheel
safety check --ignore 51457 --ignore 44715 # known issue: https://github.com/pyupio/safety/issues/364
# known issue: https://github.com/pyupio/safety/issues/364
# jinja2 false positive for our use: https://data.safetycli.com/v/70612/f17
safety check --ignore 51457 --ignore 44715 --ignore 70612
bash scripts/run-liccheck.sh
[testenv:pytest]