From f284d75098b69a41d01912e611da7487e012b26f Mon Sep 17 00:00:00 2001
From: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
Date: Wed, 21 Sep 2022 14:54:27 -0400
Subject: [PATCH] renaming geocorr to geocorr_urban

---
 data/data-pipeline/data_pipeline/etl/constants.py      |  6 +++---
 .../data-pipeline/data_pipeline/etl/score/etl_score.py |  2 +-
 .../etl/sources/census_acs_median_income/etl.py        | 10 +++++-----
 .../etl/sources/{geocorr => geocorr_urban}/README.md   |  0
 .../etl/sources/{geocorr => geocorr_urban}/__init__.py |  0
 .../etl/sources/{geocorr => geocorr_urban}/etl.py      |  8 ++++----
 .../data_pipeline/ipython/urban_vs_rural.ipynb         |  2 +-
 .../data_pipeline/tests/score/fixtures.py              |  2 +-
 .../data_pipeline/tests/sources/geocorr/test_etl.py    |  6 +++---
 9 files changed, 18 insertions(+), 18 deletions(-)
 rename data/data-pipeline/data_pipeline/etl/sources/{geocorr => geocorr_urban}/README.md (100%)
 rename data/data-pipeline/data_pipeline/etl/sources/{geocorr => geocorr_urban}/__init__.py (100%)
 rename data/data-pipeline/data_pipeline/etl/sources/{geocorr => geocorr_urban}/etl.py (92%)

diff --git a/data/data-pipeline/data_pipeline/etl/constants.py b/data/data-pipeline/data_pipeline/etl/constants.py
index f0d5b171..b246dcc1 100644
--- a/data/data-pipeline/data_pipeline/etl/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/constants.py
@@ -90,9 +90,9 @@ DATASET_LIST = [
         "is_memory_intensive": False,
     },
     {
-        "name": "geocorr",
-        "module_dir": "geocorr",
-        "class_name": "GeoCorrETL",
+        "name": "geocorr_urban",
+        "module_dir": "geocorr_urban",
+        "class_name": "GeoCorrUrbanETL",
         "is_memory_intensive": False,
     },
     {
diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
index 5a865d5f..e708920d 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -153,7 +153,7 @@ class ScoreETL(ExtractTransformLoad):
 
         # Load GeoCorr Urban Rural Map
         geocorr_urban_rural_csv = (
-            constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
+            constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
         )
         self.geocorr_urban_rural_df = pd.read_csv(
             geocorr_urban_rural_csv,
diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py
index a39f8891..fa07f830 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs_median_income/etl.py
@@ -76,7 +76,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
         self.pr_tracts: pd.DataFrame
 
     def _transform_geocorr(self) -> pd.DataFrame:
-        # Transform the geocorr data
+        # Transform the geocorr_urban data
         geocorr_df = self.raw_geocorr_df
 
         # Strip the unnecessary period from the tract ID:
@@ -244,12 +244,12 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
             file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
             + "/geocorr2014_all_states_tracts_only.csv.zip",
             download_path=self.get_tmp_path(),
-            unzipped_file_path=self.get_tmp_path() / "geocorr",
+            unzipped_file_path=self.get_tmp_path() / "geocorr_urban",
         )
 
         self.raw_geocorr_df = pd.read_csv(
             filepath_or_buffer=self.get_tmp_path()
-            / "geocorr"
+            / "geocorr_urban"
             / "geocorr2014_all_states_tracts_only.csv",
             # Skip second row, which has descriptions.
             skiprows=[1],
@@ -265,7 +265,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
         )
 
         logger.info("Pulling PR tract list down.")
-        # This step is necessary because PR is not in geocorr at the level that gets joined
+        # This step is necessary because PR is not in geocorr_urban at the level that gets joined
         pr_file = self.get_tmp_path() / "pr_tracts" / "pr_tracts.csv"
         download_file_from_url(
             file_url=self.PUERTO_RICO_S3_LINK, download_file_name=pr_file
@@ -307,7 +307,7 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
         msa_median_incomes_df = self._transform_msa_median_incomes()
         state_median_incomes_df = self._transform_state_median_incomes()
 
-        # Adds 945 PR tracts to the geocorr dataframe
+        # Adds 945 PR tracts to the geocorr_urban dataframe
         geocorr_df_plus_pr = geocorr_df.merge(self.pr_tracts, how="outer")
 
         # Join tracts on MSA incomes
diff --git a/data/data-pipeline/data_pipeline/etl/sources/geocorr/README.md b/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/README.md
similarity index 100%
rename from data/data-pipeline/data_pipeline/etl/sources/geocorr/README.md
rename to data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/README.md
diff --git a/data/data-pipeline/data_pipeline/etl/sources/geocorr/__init__.py b/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/__init__.py
similarity index 100%
rename from data/data-pipeline/data_pipeline/etl/sources/geocorr/__init__.py
rename to data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/__init__.py
diff --git a/data/data-pipeline/data_pipeline/etl/sources/geocorr/etl.py b/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/etl.py
similarity index 92%
rename from data/data-pipeline/data_pipeline/etl/sources/geocorr/etl.py
rename to data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/etl.py
index c8e30d10..4d775ed4 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/geocorr/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/geocorr_urban/etl.py
@@ -10,13 +10,13 @@ from data_pipeline.utils import (
 logger = get_module_logger(__name__)
 
 
-class GeoCorrETL(ExtractTransformLoad):
-    NAME = "geocorr"
+class GeoCorrUrbanETL(ExtractTransformLoad):
+    NAME = "geocorr_urban"
     GEO_LEVEL: ValidGeoLevel = ValidGeoLevel.CENSUS_TRACT
     PUERTO_RICO_EXPECTED_IN_DATA = False
 
     def __init__(self):
-        self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr"
+        self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr_urban"
 
         # Need to change hyperlink to S3
 
@@ -37,7 +37,7 @@ class GeoCorrETL(ExtractTransformLoad):
 
     def extract(self) -> None:
         logger.info(
-            "Starting to download 2MB GeoCorr Urban Rural Census Tract Map file."
+            "Starting to download 2MB geocorr_urban Urban Rural Census Tract Map file."
         )
         unzip_file_from_url(
             file_url=settings.AWS_JUSTICE40_DATASOURCES_URL
diff --git a/data/data-pipeline/data_pipeline/ipython/urban_vs_rural.ipynb b/data/data-pipeline/data_pipeline/ipython/urban_vs_rural.ipynb
index 1059229a..bd509350 100644
--- a/data/data-pipeline/data_pipeline/ipython/urban_vs_rural.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/urban_vs_rural.ipynb
@@ -334,7 +334,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.9.6"
   }
  },
  "nbformat": 4,
diff --git a/data/data-pipeline/data_pipeline/tests/score/fixtures.py b/data/data-pipeline/data_pipeline/tests/score/fixtures.py
index 744ebfa6..b3a5f4d8 100644
--- a/data/data-pipeline/data_pipeline/tests/score/fixtures.py
+++ b/data/data-pipeline/data_pipeline/tests/score/fixtures.py
@@ -161,7 +161,7 @@ def fuds_df():
 @pytest.fixture()
 def geocorr_urban_rural_df():
     geocorr_urban_rural_csv = (
-        constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
+        constants.DATA_PATH / "dataset" / "geocorr_urban" / "usa.csv"
     )
     return pd.read_csv(
         geocorr_urban_rural_csv,
diff --git a/data/data-pipeline/data_pipeline/tests/sources/geocorr/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/geocorr/test_etl.py
index bb065aac..57564972 100644
--- a/data/data-pipeline/data_pipeline/tests/sources/geocorr/test_etl.py
+++ b/data/data-pipeline/data_pipeline/tests/sources/geocorr/test_etl.py
@@ -1,15 +1,15 @@
 import pathlib
 from data_pipeline.tests.sources.example.test_etl import TestETL
-from data_pipeline.etl.sources.geocorr.etl import GeoCorrETL
+from data_pipeline.etl.sources.geocorr_urban.etl import GeoCorrUrbanETL
 
 
 class TestGeoCorrETL(TestETL):
-    _ETL_CLASS = GeoCorrETL
+    _ETL_CLASS = GeoCorrUrbanETL
 
     _SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
     _SAMPLE_DATA_FILE_NAME = "geocorr_urban_rural.csv"
     _SAMPLE_DATA_ZIP_FILE_NAME = "geocorr_urban_rural.csv.zip"
-    _EXTRACT_TMP_FOLDER_NAME = "GeoCorrETL"
+    _EXTRACT_TMP_FOLDER_NAME = "GeoCorrUrbanETL"
 
     def setup_method(self, _method, filename=__file__):
         """Invoke `setup_method` from Parent, but using the current file name.