mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
renaming geocorr to geocorr_urban
This commit is contained in:
parent
f4adf172e3
commit
f284d75098
9 changed files with 18 additions and 18 deletions
|
@ -90,9 +90,9 @@ DATASET_LIST = [
|
|||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
"name": "geocorr",
|
||||
"module_dir": "geocorr",
|
||||
"class_name": "GeoCorrETL",
|
||||
"name": "geocorr_urban",
|
||||
"module_dir": "geocorr_urban",
|
||||
"class_name": "GeoCorrUrbanETL",
|
||||
"is_memory_intensive": False,
|
||||
},
|
||||
{
|
||||
|
|
|
@ -153,7 +153,7 @@ class ScoreETL(ExtractTransformLoad):
|
|||
|
||||
# Load GeoCorr Urban Rural Map
|
||||
geocorr_urban_rural_csv = (
|
||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
||||
constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
|
||||
)
|
||||
self.geocorr_urban_rural_df = pd.read_csv(
|
||||
geocorr_urban_rural_csv,
|
||||
|
|
|
@ -76,7 +76,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
|
|||
self.pr_tracts: pd.DataFrame
|
||||
|
||||
def _transform_geocorr(self) -> pd.DataFrame:
|
||||
# Transform the geocorr data
|
||||
# Transform the geocorr_urban data
|
||||
geocorr_df = self.raw_geocorr_df
|
||||
|
||||
# Strip the unnecessary period from the tract ID:
|
||||
|
@ -244,12 +244,12 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
|
|||
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
|
||||
+ "/geocorr2014_all_states_tracts_only.csv.zip",
|
||||
download_path=self.get_tmp_path(),
|
||||
unzipped_file_path=self.get_tmp_path() / "geocorr",
|
||||
unzipped_file_path=self.get_tmp_path() / "geocorr_urban",
|
||||
)
|
||||
|
||||
self.raw_geocorr_df = pd.read_csv(
|
||||
filepath_or_buffer=self.get_tmp_path()
|
||||
/ "geocorr"
|
||||
/ "geocorr_urban"
|
||||
/ "geocorr2014_all_states_tracts_only.csv",
|
||||
# Skip second row, which has descriptions.
|
||||
skiprows=[1],
|
||||
|
@ -265,7 +265,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
|
|||
)
|
||||
|
||||
logger.info("Pulling PR tract list down.")
|
||||
# This step is necessary because PR is not in geocorr at the level that gets joined
|
||||
# This step is necessary because PR is not in geocorr_urban at the level that gets joined
|
||||
pr_file = self.get_tmp_path() / "pr_tracts" / "pr_tracts.csv"
|
||||
download_file_from_url(
|
||||
file_url=self.PUERTO_RICO_S3_LINK, download_file_name=pr_file
|
||||
|
@ -307,7 +307,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
|
|||
msa_median_incomes_df = self._transform_msa_median_incomes()
|
||||
state_median_incomes_df = self._transform_state_median_incomes()
|
||||
|
||||
# Adds 945 PR tracts to the geocorr dataframe
|
||||
# Adds 945 PR tracts to the geocorr_urban dataframe
|
||||
geocorr_df_plus_pr = geocorr_df.merge(self.pr_tracts, how="outer")
|
||||
|
||||
# Join tracts on MSA incomes
|
||||
|
|
|
@ -10,13 +10,13 @@ from data_pipeline.utils import (
|
|||
logger = get_module_logger(__name__)
|
||||
|
||||
|
||||
class GeoCorrETL(ExtractTransformLoad):
|
||||
NAME = "geocorr"
|
||||
class GeoCorrUrbanETL(ExtractTransformLoad):
|
||||
NAME = "geocorr_urban"
|
||||
GEO_LEVEL: ValidGeoLevel = ValidGeoLevel.CENSUS_TRACT
|
||||
PUERTO_RICO_EXPECTED_IN_DATA = False
|
||||
|
||||
def __init__(self):
|
||||
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr"
|
||||
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr_urban"
|
||||
|
||||
# Need to change hyperlink to S3
|
||||
|
||||
|
@ -37,7 +37,7 @@ class GeoCorrETL(ExtractTransformLoad):
|
|||
|
||||
def extract(self) -> None:
|
||||
logger.info(
|
||||
"Starting to download 2MB GeoCorr Urban Rural Census Tract Map file."
|
||||
"Starting to download 2MB geocorr_urban Urban Rural Census Tract Map file."
|
||||
)
|
||||
unzip_file_from_url(
|
||||
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
|
|
@ -334,7 +334,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.12"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
@ -161,7 +161,7 @@ def fuds_df():
|
|||
@pytest.fixture()
|
||||
def geocorr_urban_rural_df():
|
||||
geocorr_urban_rural_csv = (
|
||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
||||
constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
|
||||
)
|
||||
return pd.read_csv(
|
||||
geocorr_urban_rural_csv,
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
import pathlib
|
||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||
from data_pipeline.etl.sources.geocorr.etl import GeoCorrETL
|
||||
from data_pipeline.etl.sources.geocorr_urban.etl import GeoCorrUrbanETL
|
||||
|
||||
|
||||
class TestGeoCorrETL(TestETL):
|
||||
_ETL_CLASS = GeoCorrETL
|
||||
_ETL_CLASS = GeoCorrUrbanETL
|
||||
|
||||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||
_SAMPLE_DATA_FILE_NAME = "geocorr_urban_rural.csv"
|
||||
_SAMPLE_DATA_ZIP_FILE_NAME = "geocorr_urban_rural.csv.zip"
|
||||
_EXTRACT_TMP_FOLDER_NAME = "GeoCorrETL"
|
||||
_EXTRACT_TMP_FOLDER_NAME = "GeoCorrUrbanETL"
|
||||
|
||||
def setup_method(self, _method, filename=__file__):
|
||||
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||
|
|
Loading…
Add table
Reference in a new issue