mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
renaming geocorr to geocorr_urban
This commit is contained in:
parent
f4adf172e3
commit
f284d75098
9 changed files with 18 additions and 18 deletions
|
@ -90,9 +90,9 @@ DATASET_LIST = [
|
||||||
"is_memory_intensive": False,
|
"is_memory_intensive": False,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "geocorr",
|
"name": "geocorr_urban",
|
||||||
"module_dir": "geocorr",
|
"module_dir": "geocorr_urban",
|
||||||
"class_name": "GeoCorrETL",
|
"class_name": "GeoCorrUrbanETL",
|
||||||
"is_memory_intensive": False,
|
"is_memory_intensive": False,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -153,7 +153,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
# Load GeoCorr Urban Rural Map
|
# Load GeoCorr Urban Rural Map
|
||||||
geocorr_urban_rural_csv = (
|
geocorr_urban_rural_csv = (
|
||||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
|
||||||
)
|
)
|
||||||
self.geocorr_urban_rural_df = pd.read_csv(
|
self.geocorr_urban_rural_df = pd.read_csv(
|
||||||
geocorr_urban_rural_csv,
|
geocorr_urban_rural_csv,
|
||||||
|
|
|
@ -76,7 +76,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
|
||||||
self.pr_tracts: pd.DataFrame
|
self.pr_tracts: pd.DataFrame
|
||||||
|
|
||||||
def _transform_geocorr(self) -> pd.DataFrame:
|
def _transform_geocorr(self) -> pd.DataFrame:
|
||||||
# Transform the geocorr data
|
# Transform the geocorr_urban data
|
||||||
geocorr_df = self.raw_geocorr_df
|
geocorr_df = self.raw_geocorr_df
|
||||||
|
|
||||||
# Strip the unnecessary period from the tract ID:
|
# Strip the unnecessary period from the tract ID:
|
||||||
|
@ -244,12 +244,12 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
|
||||||
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
|
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
|
||||||
+ "/geocorr2014_all_states_tracts_only.csv.zip",
|
+ "/geocorr2014_all_states_tracts_only.csv.zip",
|
||||||
download_path=self.get_tmp_path(),
|
download_path=self.get_tmp_path(),
|
||||||
unzipped_file_path=self.get_tmp_path() / "geocorr",
|
unzipped_file_path=self.get_tmp_path() / "geocorr_urban",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.raw_geocorr_df = pd.read_csv(
|
self.raw_geocorr_df = pd.read_csv(
|
||||||
filepath_or_buffer=self.get_tmp_path()
|
filepath_or_buffer=self.get_tmp_path()
|
||||||
/ "geocorr"
|
/ "geocorr_urban"
|
||||||
/ "geocorr2014_all_states_tracts_only.csv",
|
/ "geocorr2014_all_states_tracts_only.csv",
|
||||||
# Skip second row, which has descriptions.
|
# Skip second row, which has descriptions.
|
||||||
skiprows=[1],
|
skiprows=[1],
|
||||||
|
@ -265,7 +265,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Pulling PR tract list down.")
|
logger.info("Pulling PR tract list down.")
|
||||||
# This step is necessary because PR is not in geocorr at the level that gets joined
|
# This step is necessary because PR is not in geocorr_urban at the level that gets joined
|
||||||
pr_file = self.get_tmp_path() / "pr_tracts" / "pr_tracts.csv"
|
pr_file = self.get_tmp_path() / "pr_tracts" / "pr_tracts.csv"
|
||||||
download_file_from_url(
|
download_file_from_url(
|
||||||
file_url=self.PUERTO_RICO_S3_LINK, download_file_name=pr_file
|
file_url=self.PUERTO_RICO_S3_LINK, download_file_name=pr_file
|
||||||
|
@ -307,7 +307,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
|
||||||
msa_median_incomes_df = self._transform_msa_median_incomes()
|
msa_median_incomes_df = self._transform_msa_median_incomes()
|
||||||
state_median_incomes_df = self._transform_state_median_incomes()
|
state_median_incomes_df = self._transform_state_median_incomes()
|
||||||
|
|
||||||
# Adds 945 PR tracts to the geocorr dataframe
|
# Adds 945 PR tracts to the geocorr_urban dataframe
|
||||||
geocorr_df_plus_pr = geocorr_df.merge(self.pr_tracts, how="outer")
|
geocorr_df_plus_pr = geocorr_df.merge(self.pr_tracts, how="outer")
|
||||||
|
|
||||||
# Join tracts on MSA incomes
|
# Join tracts on MSA incomes
|
||||||
|
|
|
@ -10,13 +10,13 @@ from data_pipeline.utils import (
|
||||||
logger = get_module_logger(__name__)
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class GeoCorrETL(ExtractTransformLoad):
|
class GeoCorrUrbanETL(ExtractTransformLoad):
|
||||||
NAME = "geocorr"
|
NAME = "geocorr_urban"
|
||||||
GEO_LEVEL: ValidGeoLevel = ValidGeoLevel.CENSUS_TRACT
|
GEO_LEVEL: ValidGeoLevel = ValidGeoLevel.CENSUS_TRACT
|
||||||
PUERTO_RICO_EXPECTED_IN_DATA = False
|
PUERTO_RICO_EXPECTED_IN_DATA = False
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr"
|
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr_urban"
|
||||||
|
|
||||||
# Need to change hyperlink to S3
|
# Need to change hyperlink to S3
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ class GeoCorrETL(ExtractTransformLoad):
|
||||||
|
|
||||||
def extract(self) -> None:
|
def extract(self) -> None:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Starting to download 2MB GeoCorr Urban Rural Census Tract Map file."
|
"Starting to download 2MB geocorr_urban Urban Rural Census Tract Map file."
|
||||||
)
|
)
|
||||||
unzip_file_from_url(
|
unzip_file_from_url(
|
||||||
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
|
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
|
|
@ -334,7 +334,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.12"
|
"version": "3.9.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|
|
@ -161,7 +161,7 @@ def fuds_df():
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def geocorr_urban_rural_df():
|
def geocorr_urban_rural_df():
|
||||||
geocorr_urban_rural_csv = (
|
geocorr_urban_rural_csv = (
|
||||||
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
|
constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
|
||||||
)
|
)
|
||||||
return pd.read_csv(
|
return pd.read_csv(
|
||||||
geocorr_urban_rural_csv,
|
geocorr_urban_rural_csv,
|
||||||
|
|
|
@ -1,15 +1,15 @@
|
||||||
import pathlib
|
import pathlib
|
||||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||||
from data_pipeline.etl.sources.geocorr.etl import GeoCorrETL
|
from data_pipeline.etl.sources.geocorr_urban.etl import GeoCorrUrbanETL
|
||||||
|
|
||||||
|
|
||||||
class TestGeoCorrETL(TestETL):
|
class TestGeoCorrETL(TestETL):
|
||||||
_ETL_CLASS = GeoCorrETL
|
_ETL_CLASS = GeoCorrUrbanETL
|
||||||
|
|
||||||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
|
||||||
_SAMPLE_DATA_FILE_NAME = "geocorr_urban_rural.csv"
|
_SAMPLE_DATA_FILE_NAME = "geocorr_urban_rural.csv"
|
||||||
_SAMPLE_DATA_ZIP_FILE_NAME = "geocorr_urban_rural.csv.zip"
|
_SAMPLE_DATA_ZIP_FILE_NAME = "geocorr_urban_rural.csv.zip"
|
||||||
_EXTRACT_TMP_FOLDER_NAME = "GeoCorrETL"
|
_EXTRACT_TMP_FOLDER_NAME = "GeoCorrUrbanETL"
|
||||||
|
|
||||||
def setup_method(self, _method, filename=__file__):
|
def setup_method(self, _method, filename=__file__):
|
||||||
"""Invoke `setup_method` from Parent, but using the current file name.
|
"""Invoke `setup_method` from Parent, but using the current file name.
|
||||||
|
|
Loading…
Add table
Reference in a new issue