renaming geocorr to geocorr_urban

This commit is contained in:
lucasmbrown-usds 2022-09-21 14:54:27 -04:00
parent f4adf172e3
commit f284d75098
9 changed files with 18 additions and 18 deletions

View file

@ -90,9 +90,9 @@ DATASET_LIST = [
"is_memory_intensive": False, "is_memory_intensive": False,
}, },
{ {
"name": "geocorr", "name": "geocorr_urban",
"module_dir": "geocorr", "module_dir": "geocorr_urban",
"class_name": "GeoCorrETL", "class_name": "GeoCorrUrbanETL",
"is_memory_intensive": False, "is_memory_intensive": False,
}, },
{ {

View file

@ -153,7 +153,7 @@ class ScoreETL(ExtractTransformLoad):
# Load GeoCorr Urban Rural Map # Load GeoCorr Urban Rural Map
geocorr_urban_rural_csv = ( geocorr_urban_rural_csv = (
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv" constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
) )
self.geocorr_urban_rural_df = pd.read_csv( self.geocorr_urban_rural_df = pd.read_csv(
geocorr_urban_rural_csv, geocorr_urban_rural_csv,

View file

@ -76,7 +76,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
self.pr_tracts: pd.DataFrame self.pr_tracts: pd.DataFrame
def _transform_geocorr(self) -> pd.DataFrame: def _transform_geocorr(self) -> pd.DataFrame:
# Transform the geocorr data # Transform the geocorr_urban data
geocorr_df = self.raw_geocorr_df geocorr_df = self.raw_geocorr_df
# Strip the unnecessary period from the tract ID: # Strip the unnecessary period from the tract ID:
@ -244,12 +244,12 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
+ "/geocorr2014_all_states_tracts_only.csv.zip", + "/geocorr2014_all_states_tracts_only.csv.zip",
download_path=self.get_tmp_path(), download_path=self.get_tmp_path(),
unzipped_file_path=self.get_tmp_path() / "geocorr", unzipped_file_path=self.get_tmp_path() / "geocorr_urban",
) )
self.raw_geocorr_df = pd.read_csv( self.raw_geocorr_df = pd.read_csv(
filepath_or_buffer=self.get_tmp_path() filepath_or_buffer=self.get_tmp_path()
/ "geocorr" / "geocorr_urban"
/ "geocorr2014_all_states_tracts_only.csv", / "geocorr2014_all_states_tracts_only.csv",
# Skip second row, which has descriptions. # Skip second row, which has descriptions.
skiprows=[1], skiprows=[1],
@ -265,7 +265,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
) )
logger.info("Pulling PR tract list down.") logger.info("Pulling PR tract list down.")
# This step is necessary because PR is not in geocorr at the level that gets joined # This step is necessary because PR is not in geocorr_urban at the level that gets joined
pr_file = self.get_tmp_path() / "pr_tracts" / "pr_tracts.csv" pr_file = self.get_tmp_path() / "pr_tracts" / "pr_tracts.csv"
download_file_from_url( download_file_from_url(
file_url=self.PUERTO_RICO_S3_LINK, download_file_name=pr_file file_url=self.PUERTO_RICO_S3_LINK, download_file_name=pr_file
@ -307,7 +307,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
msa_median_incomes_df = self._transform_msa_median_incomes() msa_median_incomes_df = self._transform_msa_median_incomes()
state_median_incomes_df = self._transform_state_median_incomes() state_median_incomes_df = self._transform_state_median_incomes()
# Adds 945 PR tracts to the geocorr dataframe # Adds 945 PR tracts to the geocorr_urban dataframe
geocorr_df_plus_pr = geocorr_df.merge(self.pr_tracts, how="outer") geocorr_df_plus_pr = geocorr_df.merge(self.pr_tracts, how="outer")
# Join tracts on MSA incomes # Join tracts on MSA incomes

View file

@ -10,13 +10,13 @@ from data_pipeline.utils import (
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
class GeoCorrETL(ExtractTransformLoad): class GeoCorrUrbanETL(ExtractTransformLoad):
NAME = "geocorr" NAME = "geocorr_urban"
GEO_LEVEL: ValidGeoLevel = ValidGeoLevel.CENSUS_TRACT GEO_LEVEL: ValidGeoLevel = ValidGeoLevel.CENSUS_TRACT
PUERTO_RICO_EXPECTED_IN_DATA = False PUERTO_RICO_EXPECTED_IN_DATA = False
def __init__(self): def __init__(self):
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr" self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr_urban"
# Need to change hyperlink to S3 # Need to change hyperlink to S3
@ -37,7 +37,7 @@ class GeoCorrETL(ExtractTransformLoad):
def extract(self) -> None: def extract(self) -> None:
logger.info( logger.info(
"Starting to download 2MB GeoCorr Urban Rural Census Tract Map file." "Starting to download 2MB geocorr_urban Urban Rural Census Tract Map file."
) )
unzip_file_from_url( unzip_file_from_url(
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL file_url=settings.AWS_JUSTICE40_DATASOURCES_URL

View file

@ -334,7 +334,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.12" "version": "3.9.6"
} }
}, },
"nbformat": 4, "nbformat": 4,

View file

@ -161,7 +161,7 @@ def fuds_df():
@pytest.fixture() @pytest.fixture()
def geocorr_urban_rural_df(): def geocorr_urban_rural_df():
geocorr_urban_rural_csv = ( geocorr_urban_rural_csv = (
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv" constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
) )
return pd.read_csv( return pd.read_csv(
geocorr_urban_rural_csv, geocorr_urban_rural_csv,

View file

@ -1,15 +1,15 @@
import pathlib import pathlib
from data_pipeline.tests.sources.example.test_etl import TestETL from data_pipeline.tests.sources.example.test_etl import TestETL
from data_pipeline.etl.sources.geocorr.etl import GeoCorrETL from data_pipeline.etl.sources.geocorr_urban.etl import GeoCorrUrbanETL
class TestGeoCorrETL(TestETL): class TestGeoCorrETL(TestETL):
_ETL_CLASS = GeoCorrETL _ETL_CLASS = GeoCorrUrbanETL
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data" _SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
_SAMPLE_DATA_FILE_NAME = "geocorr_urban_rural.csv" _SAMPLE_DATA_FILE_NAME = "geocorr_urban_rural.csv"
_SAMPLE_DATA_ZIP_FILE_NAME = "geocorr_urban_rural.csv.zip" _SAMPLE_DATA_ZIP_FILE_NAME = "geocorr_urban_rural.csv.zip"
_EXTRACT_TMP_FOLDER_NAME = "GeoCorrETL" _EXTRACT_TMP_FOLDER_NAME = "GeoCorrUrbanETL"
def setup_method(self, _method, filename=__file__): def setup_method(self, _method, filename=__file__):
"""Invoke `setup_method` from Parent, but using the current file name. """Invoke `setup_method` from Parent, but using the current file name.