From c2081937b3f4845aaec7d02a4afe420aa1401169 Mon Sep 17 00:00:00 2001
From: VincentLaUSDS <vincent.la@omb.eop.gov>
Date: Wed, 22 Sep 2021 17:30:37 -0400
Subject: [PATCH] Editing etl script

---
 .../sources/ejscreen_areas_of_concern/etl.py  |  3 +-
 .../data_pipeline/ipython/ejscreen_load.ipynb | 51 +++++++++----------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/etl/sources/ejscreen_areas_of_concern/etl.py b/data/data-pipeline/data_pipeline/etl/sources/ejscreen_areas_of_concern/etl.py
index dfb987f3..56477d6f 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/ejscreen_areas_of_concern/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/ejscreen_areas_of_concern/etl.py
@@ -12,7 +12,6 @@ class EJScreenAreasOfConcernETL(ExtractTransformLoad):
 
         # TO DO: Load from actual source; the issue is that this dataset is not public for now
         self.LOCAL_CSV_PATH = self.DATA_PATH / "local"
-        self.GEOID_CBG_FIELD_NAME = "GEOID10_CBG"
         self.df: pd.DataFrame
 
     def extract(self) -> None:
@@ -22,7 +21,7 @@ class EJScreenAreasOfConcernETL(ExtractTransformLoad):
             filepath_or_buffer=self.LOCAL_CSV_PATH
             / "ejscreen_areas_of_concerns_indicators.csv",
             dtype={
-                self.GEOID_CBG_FIELD_NAME: "string",
+                self.GEOID_FIELD_NAME: "string",
             },
             low_memory=False,
         )
diff --git a/data/data-pipeline/data_pipeline/ipython/ejscreen_load.ipynb b/data/data-pipeline/data_pipeline/ipython/ejscreen_load.ipynb
index c9a31f77..574c3d61 100644
--- a/data/data-pipeline/data_pipeline/ipython/ejscreen_load.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/ejscreen_load.ipynb
@@ -74,7 +74,6 @@
     "GEOID_FIELD_NAME = \"GEOID10\"\n",
     "GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n",
     "GEOID_STATE_FIELD_NAME = \"GEOID10_STATE\"\n",
-    "GEOID_CBG_FIELD_NAME = \"GEOID10_CBG\"\n",
     "COUNTRY_FIELD_NAME = \"Country\"\n",
     "CENSUS_BLOCK_GROUP_POPULATION_FIELD = \"Total population\"\n",
     "\n",
@@ -96,7 +95,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "67cf1993",
+   "id": "186c15bf",
    "metadata": {},
    "source": [
     "### National"
@@ -105,7 +104,7 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "c6d89263",
+   "id": "4843efbd",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -123,7 +122,7 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "3110606b",
+   "id": "0a146972",
    "metadata": {},
    "outputs": [
     {
@@ -152,9 +151,9 @@
     "    )\n",
     "    df['EXCEED_COUNT'] = pd.to_numeric(df['EXCEED_COUNT'])\n",
     "\n",
-    "    df.rename(columns={'ID': GEOID_CBG_FIELD_NAME}, inplace=True)\n",
+    "    df.rename(columns={'ID': GEOID_FIELD_NAME}, inplace=True)\n",
     "    df['percentile'] = percentile\n",
-    "    df = df[[GEOID_CBG_FIELD_NAME, 'percentile', 'EXCEED_COUNT']]\n",
+    "    df = df[[GEOID_FIELD_NAME, 'percentile', 'EXCEED_COUNT']]\n",
     "    dfs.append(df)\n",
     "    \n",
     "df = pd.concat(dfs)"
@@ -163,7 +162,7 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "id": "7f3bd094",
+   "id": "65622cbd",
    "metadata": {},
    "outputs": [
     {
@@ -248,11 +247,11 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "id": "f85dc722",
+   "id": "75e2d572",
    "metadata": {},
    "outputs": [],
    "source": [
-    "df_reshaped_nat = df.pivot(index=GEOID_CBG_FIELD_NAME, columns='percentile', values='EXCEED_COUNT')\n",
+    "df_reshaped_nat = df.pivot(index=GEOID_FIELD_NAME, columns='percentile', values='EXCEED_COUNT')\n",
     "df_reshaped_nat.columns = \\\n",
     "    ['EJSCREEN Areas of Concern, National, {}th percentile'.format(p) for p in df_reshaped_nat.columns]\n",
     "df_reshaped_nat.fillna(0, inplace=True)\n",
@@ -487,7 +486,7 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "id": "1a1c0ef9",
+   "id": "7eedff74",
    "metadata": {},
    "outputs": [
     {
@@ -784,7 +783,7 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "id": "ecb5a9da",
+   "id": "428b94f3",
    "metadata": {},
    "outputs": [
     {
@@ -977,7 +976,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "eca4ebf1",
+   "id": "7bc0f71c",
    "metadata": {},
    "source": [
     "### State"
@@ -986,7 +985,7 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "id": "d8d21a3d",
+   "id": "2de68aa5",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1004,7 +1003,7 @@
   {
    "cell_type": "code",
    "execution_count": 11,
-   "id": "7c4f0a6d",
+   "id": "fccb416e",
    "metadata": {},
    "outputs": [
     {
@@ -1033,9 +1032,9 @@
     "    )\n",
     "    df['EXCEED_COUNT'] = pd.to_numeric(df['EXCEED_COUNT'])\n",
     "\n",
-    "    df.rename(columns={'ID': GEOID_CBG_FIELD_NAME}, inplace=True)\n",
+    "    df.rename(columns={'ID': GEOID_FIELD_NAME}, inplace=True)\n",
     "    df['percentile'] = percentile\n",
-    "    df = df[[GEOID_CBG_FIELD_NAME, 'percentile', 'EXCEED_COUNT']]\n",
+    "    df = df[[GEOID_FIELD_NAME, 'percentile', 'EXCEED_COUNT']]\n",
     "    dfs.append(df)\n",
     "    \n",
     "df = pd.concat(dfs)"
@@ -1044,7 +1043,7 @@
   {
    "cell_type": "code",
    "execution_count": 12,
-   "id": "0c40712a",
+   "id": "8300e454",
    "metadata": {},
    "outputs": [
     {
@@ -1129,11 +1128,11 @@
   {
    "cell_type": "code",
    "execution_count": 13,
-   "id": "acce11ae",
+   "id": "5be30b4f",
    "metadata": {},
    "outputs": [],
    "source": [
-    "df_reshaped_sta = df.pivot(index=GEOID_CBG_FIELD_NAME, columns='percentile', values='EXCEED_COUNT')\n",
+    "df_reshaped_sta = df.pivot(index=GEOID_FIELD_NAME, columns='percentile', values='EXCEED_COUNT')\n",
     "df_reshaped_sta.columns = ['EJSCREEN Areas of Concern, State, {}th percentile'.format(p) for p in df_reshaped_sta.columns]\n",
     "df_reshaped_sta.fillna(0, inplace=True)\n",
     "\n",
@@ -1145,7 +1144,7 @@
   {
    "cell_type": "code",
    "execution_count": 14,
-   "id": "5160b8cd",
+   "id": "9206132b",
    "metadata": {},
    "outputs": [
     {
@@ -1367,7 +1366,7 @@
   {
    "cell_type": "code",
    "execution_count": 15,
-   "id": "533b81a9",
+   "id": "b551a4df",
    "metadata": {},
    "outputs": [
     {
@@ -1664,7 +1663,7 @@
   {
    "cell_type": "code",
    "execution_count": 16,
-   "id": "e1cc26bc",
+   "id": "c3cb5696",
    "metadata": {},
    "outputs": [
     {
@@ -1858,19 +1857,19 @@
   {
    "cell_type": "code",
    "execution_count": 17,
-   "id": "ee632477",
+   "id": "099cca8c",
    "metadata": {},
    "outputs": [],
    "source": [
     "df_reshaped = df_reshaped_nat.merge(\n",
     "    df_reshaped_sta,\n",
-    "    on=GEOID_CBG_FIELD_NAME)"
+    "    on=GEOID_FIELD_NAME)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 18,
-   "id": "36905865",
+   "id": "23097787",
    "metadata": {},
    "outputs": [
     {
@@ -2259,7 +2258,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4260b002",
+   "id": "403dfbc6",
    "metadata": {},
    "source": [
     "# Next Steps / Questions\n",