Updating higher education to be reversed (#1387)

Summary In this PR, we create a new variable so that the % college students is expressed as % not college students. This means that the front end can display % not college students.

Includes old variables so that this will not break fe.
This commit is contained in:
Emma Nechamkin 2022-03-15 16:43:32 -04:00 committed by GitHub
parent 2279a04c94
commit e7c7c0abeb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 30 additions and 5 deletions

View file

@ -248,3 +248,6 @@ fields:
- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
format: bool
- score_name: Percent of population not currently enrolled in college or graduate school
label: Percent of residents who are not currently enrolled in higher ed
format: percentage

View file

@ -238,6 +238,9 @@ sheets:
- score_name: Percent individuals age 25 or over with less than high school degree
label: Percent individuals age 25 or over with less than high school degree
format: percentage
- score_name: Percent of population not currently enrolled in college or graduate school
label: Percent of residents who are not currently enrolled in higher ed
format: percentage
- score_name: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
label: Unemployment (percent) in 2009 (island areas) and 2010 (states and PR)
format: percentage

View file

@ -1,6 +1,5 @@
from pathlib import Path
import datetime
from data_pipeline.config import settings
from data_pipeline.score import field_names
@ -205,6 +204,8 @@ TILES_SCORE_COLUMNS = {
# Percentage of HS Degree completion for Islands
field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009: "IAHSEF",
field_names.COLLEGE_ATTENDANCE_FIELD: "CA",
field_names.COLLEGE_NON_ATTENDANCE_FIELD: "NCA",
# This is logically equivalent to "non-college greater than 80%"
field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD: "CA_LT20",
field_names.LOW_INCOME_THRESHOLD: "FPL200S",
# Booleans for the front end about the types of thresholds exceeded
@ -270,5 +271,6 @@ TILES_SCORE_FLOAT_COLUMNS = [
field_names.ISLAND_AREAS_LOW_HS_EDUCATION_FIELD,
field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.SCORE_M + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
field_names.COLLEGE_ATTENDANCE_FIELD,
]

View file

@ -443,6 +443,7 @@ class ScoreETL(ExtractTransformLoad):
field_names.UNEMPLOYMENT_FIELD,
field_names.MEDIAN_HOUSE_VALUE_FIELD,
field_names.COLLEGE_ATTENDANCE_FIELD,
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD,
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD,
field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD,

File diff suppressed because one or more lines are too long

View file

@ -136,6 +136,8 @@ class CensusACSETL(ExtractTransformLoad):
"Percent enrollment in college or graduate school"
)
self.COLLEGE_NON_ATTENDANCE_FIELD = "Percent of population not currently enrolled in college or graduate school"
self.RE_FIELDS = [
"B02001_001E",
"B02001_002E",
@ -190,6 +192,7 @@ class CensusACSETL(ExtractTransformLoad):
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
self.HIGH_SCHOOL_ED_FIELD,
self.COLLEGE_ATTENDANCE_FIELD,
self.COLLEGE_NON_ATTENDANCE_FIELD,
]
+ self.RE_OUTPUT_FIELDS
+ [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
@ -354,6 +357,10 @@ class CensusACSETL(ExtractTransformLoad):
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
) / df[self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED]
df[self.COLLEGE_NON_ATTENDANCE_FIELD] = (
1 - df[self.COLLEGE_ATTENDANCE_FIELD]
)
# strip columns
df = df[self.COLUMNS_TO_KEEP]

View file

@ -112,6 +112,9 @@ MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD = (
PERSISTENT_POVERTY_FIELD = "Persistent Poverty Census Tract"
AMI_FIELD = "Area Median Income (State or metropolitan)"
COLLEGE_ATTENDANCE_FIELD = "Percent enrollment in college or graduate school"
COLLEGE_NON_ATTENDANCE_FIELD = (
"Percent of population not currently enrolled in college or graduate school"
)
MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD = (
"Median household income as a percent of area median income"
)

View file

@ -797,9 +797,15 @@ class ScoreM(Score):
>= self.LOW_INCOME_THRESHOLD
)
# Because we are moving this variable to be in the same direction as all
# other variables, we change this to be < rather than <=. This translates
# to "80% or more of residents are not college students", rather than
# "Strictly greater than 80% of residents are not college students."
# There are two tracts that are impacted by this (that is, they have exactly)
# 20% college students -- neither of these has been a DAC under any score.
self.df[field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD] = (
self.df[field_names.COLLEGE_ATTENDANCE_FIELD]
<= self.MAX_COLLEGE_ATTENDANCE_THRESHOLD
< self.MAX_COLLEGE_ATTENDANCE_THRESHOLD
)
self.df[