mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-25 04:00:17 -07:00
Score F, testing methodology (#510)
* fixing dependency issue * fixing more dependencies * including fraction of state AMI * wip * nitpick whitespace * etl working now * wip on scoring * fix rename error * reducing metrics * fixing score f * fixing readme * adding dependency * passing tests; * linting/black * removing unnecessary sample * fixing error * adding verify flag on etl/base Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
This commit is contained in:
parent
043ed983ea
commit
65ceb7900f
23 changed files with 557 additions and 153 deletions
|
@ -1,4 +1,5 @@
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from data_pipeline.config import settings
|
||||
from data_pipeline.utils import unzip_file_from_url, remove_all_from_dir
|
||||
|
@ -33,14 +34,21 @@ class ExtractTransformLoad:
|
|||
|
||||
pass
|
||||
|
||||
def extract(self, source_url: str = None, extract_path: Path = None) -> None:
|
||||
def extract(
|
||||
self,
|
||||
source_url: str = None,
|
||||
extract_path: Path = None,
|
||||
verify: Optional[bool] = True,
|
||||
) -> None:
|
||||
"""Extract the data from
|
||||
a remote source. By default it provides code to get the file from a source url,
|
||||
unzips it and stores it on an extract_path."""
|
||||
|
||||
# this can be accessed via super().extract()
|
||||
if source_url and extract_path:
|
||||
unzip_file_from_url(source_url, self.TMP_PATH, extract_path)
|
||||
unzip_file_from_url(
|
||||
source_url, self.TMP_PATH, extract_path, verify=verify
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
"""Transform the data extracted into a format that can be consumed by the
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue