renaming geocorr to geocorr_urban

2025-02-23 01:54:18 -08:00 · 2022-09-21 14:54:27 -04:00 · 2022-09-21 14:54:27 -04:00 · f284d75098
commit f284d75098
parent f4adf172e3
9 changed files with 18 additions and 18 deletions
--- a/data/data-pipeline/data_pipeline/etl/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/constants.py
@ -90,9 +90,9 @@ DATASET_LIST = [
        "is_memory_intensive": False,
    },
    {
-        "name": "geocorr",
-        "module_dir": "geocorr",
-        "class_name": "GeoCorrETL",
+        "name": "geocorr_urban",
+        "module_dir": "geocorr_urban",
+        "class_name": "GeoCorrUrbanETL",
        "is_memory_intensive": False,
    },
    {
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -153,7 +153,7 @@ class ScoreETL(ExtractTransformLoad):

        # Load GeoCorr Urban Rural Map
        geocorr_urban_rural_csv = (
-            constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
+            constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
        )
        self.geocorr_urban_rural_df = pd.read_csv(
            geocorr_urban_rural_csv,
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py
@ -76,7 +76,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
        self.pr_tracts: pd.DataFrame

    def _transform_geocorr(self) -> pd.DataFrame:
-        # Transform the geocorr data
+        # Transform the geocorr_urban data
        geocorr_df = self.raw_geocorr_df

        # Strip the unnecessary period from the tract ID:
@ -244,12 +244,12 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
            file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
            + "/geocorr2014_all_states_tracts_only.csv.zip",
            download_path=self.get_tmp_path(),
-            unzipped_file_path=self.get_tmp_path() / "geocorr",
+            unzipped_file_path=self.get_tmp_path() / "geocorr_urban",
        )

        self.raw_geocorr_df = pd.read_csv(
            filepath_or_buffer=self.get_tmp_path()
-            / "geocorr"
+            / "geocorr_urban"
            / "geocorr2014_all_states_tracts_only.csv",
            # Skip second row, which has descriptions.
            skiprows=[1],
@ -265,7 +265,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
        )

        logger.info("Pulling PR tract list down.")
-        # This step is necessary because PR is not in geocorr at the level that gets joined
+        # This step is necessary because PR is not in geocorr_urban at the level that gets joined
        pr_file = self.get_tmp_path() / "pr_tracts" / "pr_tracts.csv"
        download_file_from_url(
            file_url=self.PUERTO_RICO_S3_LINK, download_file_name=pr_file
@ -307,7 +307,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
        msa_median_incomes_df = self._transform_msa_median_incomes()
        state_median_incomes_df = self._transform_state_median_incomes()

-        # Adds 945 PR tracts to the geocorr dataframe
+        # Adds 945 PR tracts to the geocorr_urban dataframe
        geocorr_df_plus_pr = geocorr_df.merge(self.pr_tracts, how="outer")

        # Join tracts on MSA incomes
--- a/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/README.md
+++ b/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/README.md
--- a/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/init.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/init.py
--- a/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/etl.py
@ -10,13 +10,13 @@ from data_pipeline.utils import (
 logger = get_module_logger(__name__)


-class GeoCorrETL(ExtractTransformLoad):
-    NAME = "geocorr"
+class GeoCorrUrbanETL(ExtractTransformLoad):
+    NAME = "geocorr_urban"
    GEO_LEVEL: ValidGeoLevel = ValidGeoLevel.CENSUS_TRACT
    PUERTO_RICO_EXPECTED_IN_DATA = False

    def __init__(self):
-        self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr"
+        self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr_urban"

        # Need to change hyperlink to S3

@ -37,7 +37,7 @@ class GeoCorrETL(ExtractTransformLoad):

    def extract(self) -> None:
        logger.info(
-            "Starting to download 2MB GeoCorr Urban Rural Census Tract Map file."
+            "Starting to download 2MB geocorr_urban Urban Rural Census Tract Map file."
        )
        unzip_file_from_url(
            file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
--- a/data/data-pipeline/data_pipeline/ipython/urban_vs_rural.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/urban_vs_rural.ipynb
@ -334,7 +334,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.9.6"
  }
 },
 "nbformat": 4,
--- a/data/data-pipeline/data_pipeline/tests/score/fixtures.py
+++ b/data/data-pipeline/data_pipeline/tests/score/fixtures.py
@ -161,7 +161,7 @@ def fuds_df():
@pytest.fixture()
 def geocorr_urban_rural_df():
    geocorr_urban_rural_csv = (
-        constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
+        constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
    )
    return pd.read_csv(
        geocorr_urban_rural_csv,
--- a/data/data-pipeline/data_pipeline/tests/sources/geocorr/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/geocorr/test_etl.py
@ -1,15 +1,15 @@
 import pathlib
 from data_pipeline.tests.sources.example.test_etl import TestETL
-from data_pipeline.etl.sources.geocorr.etl import GeoCorrETL
+from data_pipeline.etl.sources.geocorr_urban.etl import GeoCorrUrbanETL


 class TestGeoCorrETL(TestETL):
-    _ETL_CLASS = GeoCorrETL
+    _ETL_CLASS = GeoCorrUrbanETL

    _SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
    _SAMPLE_DATA_FILE_NAME = "geocorr_urban_rural.csv"
    _SAMPLE_DATA_ZIP_FILE_NAME = "geocorr_urban_rural.csv.zip"
-    _EXTRACT_TMP_FOLDER_NAME = "GeoCorrETL"
+    _EXTRACT_TMP_FOLDER_NAME = "GeoCorrUrbanETL"

    def setup_method(self, _method, filename=__file__):
        """Invoke `setup_method` from Parent, but using the current file name.