Issue 1910: Do not impute income for 0 population tracts (#1918)

* should be working, has unnecessary loggers * removing loggers and cleaning up * updating ejscreen tests * adding tests and responding to PR feedback * fixing broken smoke test * delete smoketest docs
2025-09-30 08:33:18 -07:00 · 2022-09-26 11:00:21 -04:00 · 2022-09-26 11:00:21 -04:00 · 9fb9874a15
commit 9fb9874a15
parent 9e85375d9b
13 changed files with 150 additions and 75 deletions
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -5,6 +5,7 @@ import numpy as np
 import pandas as pd

 from data_pipeline.etl.base import ExtractTransformLoad
+from data_pipeline.etl.sources.census_acs.etl import CensusACSETL
 from data_pipeline.etl.sources.national_risk_index.etl import (
    NationalRiskIndexETL,
 )
@ -35,7 +36,7 @@ class ScoreETL(ExtractTransformLoad):
        # dataframes
        self.df: pd.DataFrame
        self.ejscreen_df: pd.DataFrame
-        self.census_df: pd.DataFrame
+        self.census_acs_df: pd.DataFrame
        self.hud_housing_df: pd.DataFrame
        self.cdc_places_df: pd.DataFrame
        self.census_acs_median_incomes_df: pd.DataFrame
@ -67,14 +68,7 @@ class ScoreETL(ExtractTransformLoad):
        )

        # Load census data
-        census_csv = (
-            constants.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
-        )
-        self.census_df = pd.read_csv(
-            census_csv,
-            dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
-            low_memory=False,
-        )
+        self.census_acs_df = CensusACSETL.get_data_frame()

        # Load HUD housing data
        hud_housing_csv = (
@ -346,7 +340,7 @@ class ScoreETL(ExtractTransformLoad):

        # Join all the data sources that use census tracts
        census_tract_dfs = [
-            self.census_df,
+            self.census_acs_df,
            self.hud_housing_df,
            self.cdc_places_df,
            self.cdc_life_expectancy_df,
@ -364,7 +358,7 @@ class ScoreETL(ExtractTransformLoad):
            self.nature_deprived_df,
            self.eamlis_df,
            self.fuds_df,
-            self.tribal_overlap_df
+            self.tribal_overlap_df,
        ]

        # Sanity check each data frame before merging.