Download column order completed (#1077)

* Download column order completed * Kameron changes * Lucas and Beth column order changes * cdc_places update * passing score * pandas error * checkpoint * score passing * rounding complete - percentages still showing one decimal * fixing tests * fixing percentages * updating comment * int percentages! 🎉🎉 * forgot to pass back to df * passing tests Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
2025-07-28 15:41:38 -07:00 · 2022-01-13 15:04:16 -05:00 · 2022-01-13 15:04:16 -05:00 · d686bb856e
commit d686bb856e
parent 98ff4bd9d8
13 changed files with 232 additions and 133 deletions
--- a/data/data-pipeline/data_pipeline/etl/sources/cdc_places/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/cdc_places/etl.py
@ -2,6 +2,7 @@ import pandas as pd

 from data_pipeline.etl.base import ExtractTransformLoad
 from data_pipeline.utils import get_module_logger, download_file_from_url
+from data_pipeline.score import field_names

 logger = get_module_logger(__name__)

@ -49,6 +50,20 @@ class CDCPlacesETL(ExtractTransformLoad):
            values=self.CDC_VALUE_FIELD_NAME,
        )

+        # rename columns to be used in score
+        rename_fields = {
+            "Current asthma among adults aged >=18 years": field_names.ASTHMA_FIELD,
+            "Coronary heart disease among adults aged >=18 years": field_names.HEART_DISEASE_FIELD,
+            "Cancer (excluding skin cancer) among adults aged >=18 years": field_names.CANCER_FIELD,
+            "Diagnosed diabetes among adults aged >=18 years": field_names.DIABETES_FIELD,
+            "Physical health not good for >=14 days among adults aged >=18 years": field_names.PHYS_HEALTH_NOT_GOOD_FIELD,
+        }
+        self.df.rename(
+            columns=rename_fields,
+            inplace=True,
+            errors="raise",
+        )
+
        # Make the index (the census tract ID) a column, not the index.
        self.df.reset_index(inplace=True)