Update etl constants to use score field_names and put strings around tract IDs in downloadable CSV (#985)

* Update etl constants to use score field_names Put strings around tract IDs in downloadable CSV No need to modify the xls file creation because the string type is preserved and interpreted correctly in Excel already. One note is that this does cause the ID in the CSV to be have quotes around it, which might be annoying. Maybe we don't want this behavior? * Update based on PR feedback and lint needs * Change field we're using in downloadable This reverts the downloadable csv field list to use MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD instead of MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD in order to get the test to pass. The point of this PR is a refactor (and a small change to the CSV quotations), not to change the output. That will be a different PR later. Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
2025-09-15 02:08:18 -07:00 · 2021-12-06 13:17:17 -05:00 · 2021-12-06 13:17:17 -05:00 · 819f3ff478
commit 819f3ff478
parent bbc4a4dec0
4 changed files with 101 additions and 85 deletions
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@ -1,6 +1,12 @@
 # Suffixes
 PERCENTILE_FIELD_SUFFIX = " (percentile)"
 MIN_MAX_FIELD_SUFFIX = " (min-max normalized)"
+TOP_25_PERCENTILE_SUFFIX = " (top 25th percentile)"
+
+# Geographic field names
+GEOID_TRACT_FIELD = "GEOID10_TRACT"
+STATE_FIELD = "State Name"
+COUNTY_FIELD = "County Name"

 # Score file field names
 SCORE_A = "Score A"
@ -21,6 +27,7 @@ SCORE_I = "Score I"
 SCORE_I_COMMUNITIES = "Score I (communities)"
 SCORE_K = "NMTC (communities)"
 SCORE_K_COMMUNITIES = "Score K (communities)"
+SCORE_L = "Definition L"
 SCORE_L_COMMUNITIES = "Definition L (communities)"
 L_CLIMATE = "Climate Factor (Definition L)"
 L_ENERGY = "Energy Factor (Definition L)"
@ -45,7 +52,6 @@ POVERTY_LESS_THAN_150_FPL_FIELD = (
 POVERTY_LESS_THAN_100_FPL_FIELD = (
    "Percent of individuals < 100% Federal Poverty Line"
 )
-MEDIAN_INCOME_PERCENT_AMI_FIELD = "Median household income (% of AMI)"
 STATE_MEDIAN_INCOME_FIELD = (
    "Median household income (State; 2019 inflation-adjusted dollars)"
 )
--- a/data/data-pipeline/data_pipeline/score/score_l.py
+++ b/data/data-pipeline/data_pipeline/score/score_l.py
@ -528,7 +528,7 @@ class ScoreL(Score):

        median_income_threshold = (
            self.df[
-                field_names.MEDIAN_INCOME_PERCENT_AMI_FIELD
+                field_names.MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD
                + field_names.PERCENTILE_FIELD_SUFFIX
            ]
            # Note: a high median income as a % of AMI is good, so take 1 minus the threshold to invert it.