Initial refactor for Score ETL (#618)

* WIP refactor * Exract score calculations into their own methods * do all initial df prep in single method * Fix error in docs for running etl for single dataset * WIP understanding HUD and linguistic iso data * Add comments from initial group review on PR Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
2025-07-27 19:31:16 -07:00 · 2021-09-10 10:34:34 -04:00 · 2021-09-10 10:34:34 -04:00 · ac62933d16
commit ac62933d16
parent 470c474367
4 changed files with 200 additions and 141 deletions
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
@ -21,11 +21,11 @@ class CensusACSETL(ExtractTransformLoad):
            "Linguistic isolation (total)"
        )
        self.LINGUISTIC_ISOLATION_FIELDS = [
-            "C16002_001E",
-            "C16002_004E",
-            "C16002_007E",
-            "C16002_010E",
-            "C16002_013E",
+            "C16002_001E", # Estimate!!Total
+            "C16002_004E", # Estimate!!Total!!Spanish!!Limited English speaking household
+            "C16002_007E", # Estimate!!Total!!Other Indo-European languages!!Limited English speaking household
+            "C16002_010E", # Estimate!!Total!!Asian and Pacific Island languages!!Limited English speaking household
+            "C16002_013E", # Estimate!!Total!!Other languages!!Limited English speaking household
        ]
        self.MEDIAN_INCOME_FIELD = "B19013_001E"
        self.MEDIAN_INCOME_FIELD_NAME = (