adding median income field and running black

2025-09-19 16:48:07 -07:00 · 2021-08-09 20:47:51 -05:00 · 2021-08-09 20:47:51 -05:00 · 4ae7eff4c4
commit 4ae7eff4c4
parent 6c986adfe4
6 changed files with 33 additions and 10 deletions
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
@ -22,6 +22,8 @@ class CensusACSETL(ExtractTransformLoad):
            "C16002_010E",
            "C16002_013E",
        ]
+        self.MEDIAN_INCOME_FIELD = "B19013_001E"
+        self.MEDIAN_INCOME_FIELD_NAME = "Median household income in the past 12 months"
        self.df: pd.DataFrame

    def _fips_from_censusdata_censusgeo(self, censusgeo: censusdata.censusgeo) -> str:
@ -45,6 +47,7 @@ class CensusACSETL(ExtractTransformLoad):
                        # Emploment fields
                        "B23025_005E",
                        "B23025_003E",
+                        self.MEDIAN_INCOME_FIELD,
                    ]
                    + self.LINGUISTIC_ISOLATION_FIELDS,
                )
@ -59,6 +62,9 @@ class CensusACSETL(ExtractTransformLoad):
    def transform(self) -> None:
        logger.info("Starting Census ACS Transform")

+        # Rename median income
+        self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[self.MEDIAN_INCOME_FIELD]
+
        # Calculate percent unemployment.
        # TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
        self.df[self.UNEMPLOYED_FIELD_NAME] = self.df.B23025_005E / self.df.B23025_003E
@ -91,6 +97,7 @@ class CensusACSETL(ExtractTransformLoad):
            self.GEOID_FIELD_NAME,
            self.UNEMPLOYED_FIELD_NAME,
            self.LINGUISTIC_ISOLATION_FIELD_NAME,
+            self.MEDIAN_INCOME_FIELD_NAME,
        ]

        self.df[columns_to_include].to_csv(
--- a/data/data-pipeline/data_pipeline/etl/sources/ejscreen/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/ejscreen/etl.py
@ -18,7 +18,8 @@ class EJScreenETL(ExtractTransformLoad):
    def extract(self) -> None:
        logger.info("Downloading EJScreen Data")
        super().extract(
-            self.EJSCREEN_FTP_URL, self.TMP_PATH,
+            self.EJSCREEN_FTP_URL,
+            self.TMP_PATH,
        )

    def transform(self) -> None:
--- a/data/data-pipeline/data_pipeline/etl/sources/hud_housing/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/hud_housing/etl.py
@ -34,7 +34,8 @@ class HudHousingETL(ExtractTransformLoad):
    def extract(self) -> None:
        logger.info("Extracting HUD Housing Data")
        super().extract(
-            self.HOUSING_FTP_URL, self.HOUSING_ZIP_FILE_DIR,
+            self.HOUSING_FTP_URL,
+            self.HOUSING_ZIP_FILE_DIR,
        )

    def transform(self) -> None:
@ -48,7 +49,10 @@ class HudHousingETL(ExtractTransformLoad):
            / "140"
            / "Table8.csv"
        )
-        self.df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path, encoding="latin-1",)
+        self.df = pd.read_csv(
+            filepath_or_buffer=tmp_csv_file_path,
+            encoding="latin-1",
+        )

        # Rename and reformat block group ID
        self.df.rename(columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True)
--- a/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/tree_equity_score/etl.py
@ -70,7 +70,8 @@ class TreeEquityScoreETL(ExtractTransformLoad):
        logger.info("Downloading Tree Equity Score Data")
        for state in self.states:
            super().extract(
-                f"{self.TES_URL}{state}.zip.zip", f"{self.TMP_PATH}/{state}",
+                f"{self.TES_URL}{state}.zip.zip",
+                f"{self.TMP_PATH}/{state}",
            )

    def transform(self) -> None: