Changes to allow local runs

This commit is contained in:
Carlos Felix 2024-11-05 11:31:54 -05:00 committed by Carlos Felix
commit ff9e7b9aa2
11 changed files with 3231 additions and 1867 deletions

View file

@ -22,6 +22,7 @@ from pathlib import Path
from typing import List
from dataclasses import dataclass
from abc import ABC, abstractmethod
from data_pipeline.constants import NO_SSL_VERIFY
from data_pipeline.etl.downloader import Downloader
from data_pipeline.etl.sources.census_acs.etl_utils import (
@ -65,7 +66,7 @@ class FileDataSource(DataSource):
Downloader.download_file_from_url(
file_url=self.source,
download_file_name=self.destination,
verify=True,
verify=not NO_SSL_VERIFY,
)
def __str__(self):
@ -85,7 +86,7 @@ class ZIPDataSource(DataSource):
Downloader.download_zip_file_from_url(
file_url=self.source,
unzipped_file_path=self.destination,
verify=True,
verify=not NO_SSL_VERIFY,
)
def __str__(self):

View file

@ -6,7 +6,9 @@ import shutil
from pathlib import Path
from data_pipeline.config import settings
from data_pipeline.utils import get_module_logger
logger = get_module_logger(__name__)
class Downloader:
"""A simple class to encapsulate the download capabilities of the application"""
@ -34,12 +36,13 @@ class Downloader:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
download_file_name.parent.mkdir(parents=True, exist_ok=True)
logger.debug(f"Downloading {file_url}")
response = requests.get(
file_url, verify=verify, timeout=settings.REQUESTS_DEFAULT_TIMOUT
)
if response.status_code == 200:
file_contents = response.content
logger.debug("Downloaded.")
else:
raise Exception(
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"

View file

@ -215,6 +215,7 @@ class CensusETL(ExtractTransformLoad):
state_gdf = gpd.read_file(file_name)
usa_df = usa_df.append(state_gdf)
logger.debug("Converting to CRS")
usa_df = usa_df.to_crs(
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
)