mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-27 22:01:16 -07:00
Changes to allow local runs
This commit is contained in:
parent
d298f7dedb
commit
ff9e7b9aa2
11 changed files with 3231 additions and 1867 deletions
|
@ -22,6 +22,7 @@ from pathlib import Path
|
|||
from typing import List
|
||||
from dataclasses import dataclass
|
||||
from abc import ABC, abstractmethod
|
||||
from data_pipeline.constants import NO_SSL_VERIFY
|
||||
|
||||
from data_pipeline.etl.downloader import Downloader
|
||||
from data_pipeline.etl.sources.census_acs.etl_utils import (
|
||||
|
@ -65,7 +66,7 @@ class FileDataSource(DataSource):
|
|||
Downloader.download_file_from_url(
|
||||
file_url=self.source,
|
||||
download_file_name=self.destination,
|
||||
verify=True,
|
||||
verify=not NO_SSL_VERIFY,
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
|
@ -85,7 +86,7 @@ class ZIPDataSource(DataSource):
|
|||
Downloader.download_zip_file_from_url(
|
||||
file_url=self.source,
|
||||
unzipped_file_path=self.destination,
|
||||
verify=True,
|
||||
verify=not NO_SSL_VERIFY,
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
|
|
|
@ -6,7 +6,9 @@ import shutil
|
|||
|
||||
from pathlib import Path
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.utils import get_module_logger
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
class Downloader:
|
||||
"""A simple class to encapsulate the download capabilities of the application"""
|
||||
|
@ -34,12 +36,13 @@ class Downloader:
|
|||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
download_file_name.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.debug(f"Downloading {file_url}")
|
||||
response = requests.get(
|
||||
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
|
||||
)
|
||||
if response.status_code == 200:
|
||||
file_contents = response.content
|
||||
logger.debug("Downloaded.")
|
||||
else:
|
||||
raise Exception(
|
||||
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"
|
||||
|
|
|
@ -215,6 +215,7 @@ class CensusETL(ExtractTransformLoad):
|
|||
state_gdf = gpd.read_file(file_name)
|
||||
usa_df = usa_df.append(state_gdf)
|
||||
|
||||
logger.debug("Converting to CRS")
|
||||
usa_df = usa_df.to_crs(
|
||||
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue