renaming geocorr to geocorr_urban

This commit is contained in:
lucasmbrown-usds 2022-09-21 14:54:27 -04:00
parent f4adf172e3
commit f284d75098
9 changed files with 18 additions and 18 deletions

View file

@ -90,9 +90,9 @@ DATASET_LIST = [
"is_memory_intensive": False,
},
{
"name": "geocorr",
"module_dir": "geocorr",
"class_name": "GeoCorrETL",
"name": "geocorr_urban",
"module_dir": "geocorr_urban",
"class_name": "GeoCorrUrbanETL",
"is_memory_intensive": False,
},
{

View file

@ -153,7 +153,7 @@ class ScoreETL(ExtractTransformLoad):
# Load GeoCorr Urban Rural Map
geocorr_urban_rural_csv = (
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
)
self.geocorr_urban_rural_df = pd.read_csv(
geocorr_urban_rural_csv,

View file

@ -76,7 +76,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
self.pr_tracts: pd.DataFrame
def _transform_geocorr(self) -> pd.DataFrame:
# Transform the geocorr data
# Transform the geocorr_urban data
geocorr_df = self.raw_geocorr_df
# Strip the unnecessary period from the tract ID:
@ -244,12 +244,12 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
+ "/geocorr2014_all_states_tracts_only.csv.zip",
download_path=self.get_tmp_path(),
unzipped_file_path=self.get_tmp_path() / "geocorr",
unzipped_file_path=self.get_tmp_path() / "geocorr_urban",
)
self.raw_geocorr_df = pd.read_csv(
filepath_or_buffer=self.get_tmp_path()
/ "geocorr"
/ "geocorr_urban"
/ "geocorr2014_all_states_tracts_only.csv",
# Skip second row, which has descriptions.
skiprows=[1],
@ -265,7 +265,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
)
logger.info("Pulling PR tract list down.")
# This step is necessary because PR is not in geocorr at the level that gets joined
# This step is necessary because PR is not in geocorr_urban at the level that gets joined
pr_file = self.get_tmp_path() / "pr_tracts" / "pr_tracts.csv"
download_file_from_url(
file_url=self.PUERTO_RICO_S3_LINK, download_file_name=pr_file
@ -307,7 +307,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
msa_median_incomes_df = self._transform_msa_median_incomes()
state_median_incomes_df = self._transform_state_median_incomes()
# Adds 945 PR tracts to the geocorr dataframe
# Adds 945 PR tracts to the geocorr_urban dataframe
geocorr_df_plus_pr = geocorr_df.merge(self.pr_tracts, how="outer")
# Join tracts on MSA incomes

View file

@ -10,13 +10,13 @@ from data_pipeline.utils import (
logger = get_module_logger(__name__)
class GeoCorrETL(ExtractTransformLoad):
NAME = "geocorr"
class GeoCorrUrbanETL(ExtractTransformLoad):
NAME = "geocorr_urban"
GEO_LEVEL: ValidGeoLevel = ValidGeoLevel.CENSUS_TRACT
PUERTO_RICO_EXPECTED_IN_DATA = False
def __init__(self):
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr"
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr_urban"
# Need to change hyperlink to S3
@ -37,7 +37,7 @@ class GeoCorrETL(ExtractTransformLoad):
def extract(self) -> None:
logger.info(
"Starting to download 2MB GeoCorr Urban Rural Census Tract Map file."
"Starting to download 2MB geocorr_urban Urban Rural Census Tract Map file."
)
unzip_file_from_url(
file_url=settings.AWS_JUSTICE40_DATASOURCES_URL

View file

@ -334,7 +334,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
"version": "3.9.6"
}
},
"nbformat": 4,

View file

@ -161,7 +161,7 @@ def fuds_df():
@pytest.fixture()
def geocorr_urban_rural_df():
geocorr_urban_rural_csv = (
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
)
return pd.read_csv(
geocorr_urban_rural_csv,

View file

@ -1,15 +1,15 @@
import pathlib
from data_pipeline.tests.sources.example.test_etl import TestETL
from data_pipeline.etl.sources.geocorr.etl import GeoCorrETL
from data_pipeline.etl.sources.geocorr_urban.etl import GeoCorrUrbanETL
class TestGeoCorrETL(TestETL):
_ETL_CLASS = GeoCorrETL
_ETL_CLASS = GeoCorrUrbanETL
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
_SAMPLE_DATA_FILE_NAME = "geocorr_urban_rural.csv"
_SAMPLE_DATA_ZIP_FILE_NAME = "geocorr_urban_rural.csv.zip"
_EXTRACT_TMP_FOLDER_NAME = "GeoCorrETL"
_EXTRACT_TMP_FOLDER_NAME = "GeoCorrUrbanETL"
def setup_method(self, _method, filename=__file__):
"""Invoke `setup_method` from Parent, but using the current file name.