Fast flag update (#1844)

Added additional flags for the front end based on our conversation in stand up this morning.
This commit is contained in:
Emma Nechamkin 2022-08-19 13:14:44 -04:00 committed by GitHub
parent 1ee26bf30d
commit d892bce6cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 63 additions and 31 deletions

View file

@ -21,17 +21,14 @@ fields:
label: Total categories exceeded label: Total categories exceeded
format: int64 format: int64
- score_name: Definition N (communities) - score_name: Definition N (communities)
label: Identified as disadvantaged without considering neighbors
format: bool
- score_name: Definition N (communities) (based on adjacency index and low income alone)
label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
format: bool
- score_name: Definition M community, including adjacency index tracts
label: Identified as disadvantaged label: Identified as disadvantaged
format: bool format: bool
- score_name: Definition N (communities) (including adjacency index)
label: Identified as disadvantaged (including adjacency index)
format: bool
- score_name: Is the tract surrounded by disadvantaged communities?
label: Is the tract surrounded by disadvantaged communities?
format: bool
- score_name: Meets the less stringent low income criterion for the adjacency index?
label: Meets the less stringent low income criterion for the adjacency index?
format: bool
- score_name: Definition N (communities) (average of neighbors) - score_name: Definition N (communities) (average of neighbors)
label: Share of neighbors that are identified as disadvantaged label: Share of neighbors that are identified as disadvantaged
format: percentage format: percentage
@ -341,3 +338,6 @@ fields:
- score_name: Tract-level redlining score meets or exceeds 3.25 - score_name: Tract-level redlining score meets or exceeds 3.25
label: Tract experienced historic underinvestment label: Tract experienced historic underinvestment
format: bool format: bool
- score_name: Income data has been estimated based on neighbor income
label: Income data has been estimated based on geographic neighbor income
format: bool

View file

@ -25,17 +25,14 @@ sheets:
label: Total categories exceeded label: Total categories exceeded
format: int64 format: int64
- score_name: Definition N (communities) - score_name: Definition N (communities)
label: Identified as disadvantaged without considering neighbors
format: bool
- score_name: Definition N (communities) (based on adjacency index and low income alone)
label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
format: bool
- score_name: Definition M community, including adjacency index tracts
label: Identified as disadvantaged label: Identified as disadvantaged
format: bool format: bool
- score_name: Definition N (communities) (including adjacency index)
label: Identified as disadvantaged (including adjacency index)
format: bool
- score_name: Is the tract surrounded by disadvantaged communities?
label: Is the tract surrounded by disadvantaged communities?
format: bool
- score_name: Meets the less stringent low income criterion for the adjacency index?
label: Meets the less stringent low income criterion for the adjacency index?
format: bool
- score_name: Definition N (communities) (average of neighbors) - score_name: Definition N (communities) (average of neighbors)
label: Share of neighbors that are identified as disadvantaged label: Share of neighbors that are identified as disadvantaged
format: percentage format: percentage
@ -345,3 +342,6 @@ sheets:
- score_name: Tract-level redlining score meets or exceeds 3.25 - score_name: Tract-level redlining score meets or exceeds 3.25
label: Tract experienced historic underinvestment label: Tract experienced historic underinvestment
format: bool format: bool
- score_name: Income data has been estimated based on neighbor income
label: Income data has been estimated based on geographic neighbor income
format: bool

View file

@ -208,9 +208,10 @@ TILES_SCORE_COLUMNS = {
field_names.M_HEALTH: "M_HLTH", field_names.M_HEALTH: "M_HLTH",
# temporarily update this so that it's the Narwhal score that gets visualized on the map # temporarily update this so that it's the Narwhal score that gets visualized on the map
# The NEW final score value INCLUDES the adjacency index. # The NEW final score value INCLUDES the adjacency index.
field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX: "SM_C", field_names.FINAL_SCORE_N_BOOLEAN: "SM_C",
field_names.SCORE_N_COMMUNITIES field_names.SCORE_N_COMMUNITIES
+ field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS", + field_names.ADJACENT_MEAN_SUFFIX: "SM_DON",
field_names.SCORE_N_COMMUNITIES: "SM_NO_DON",
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI", field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI", field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI", field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI",
@ -313,7 +314,8 @@ TILES_SCORE_COLUMNS = {
+ field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS", + field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET", field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
field_names.AML_BOOLEAN: "AML_ET", field_names.AML_BOOLEAN: "AML_ET",
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET" field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET",
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG"
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
## FPL_200 (there is no higher ed in narwhal) ## FPL_200 (there is no higher ed in narwhal)
} }

View file

@ -471,6 +471,7 @@ class ScoreETL(ExtractTransformLoad):
field_names.AGRICULTURAL_VALUE_BOOL_FIELD, field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME, field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
field_names.AML_BOOLEAN, field_names.AML_BOOLEAN,
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
] ]
# For some columns, high values are "good", so we want to reverse the percentile # For some columns, high values are "good", so we want to reverse the percentile

View file

@ -521,8 +521,6 @@ class PostScoreETL(ExtractTransformLoad):
score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8") score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")
def _load_downloadable_zip(self, downloadable_info_path: Path) -> None: def _load_downloadable_zip(self, downloadable_info_path: Path) -> None:
logger.info("Saving Downloadable CSV")
downloadable_info_path.mkdir(parents=True, exist_ok=True) downloadable_info_path.mkdir(parents=True, exist_ok=True)
csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH

File diff suppressed because one or more lines are too long

View file

@ -227,6 +227,7 @@ class CensusACSETL(ExtractTransformLoad):
self.COLLEGE_ATTENDANCE_FIELD, self.COLLEGE_ATTENDANCE_FIELD,
self.COLLEGE_NON_ATTENDANCE_FIELD, self.COLLEGE_NON_ATTENDANCE_FIELD,
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD, self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
] ]
+ self.RE_OUTPUT_FIELDS + self.RE_OUTPUT_FIELDS
+ [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS] + [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
@ -503,6 +504,13 @@ class CensusACSETL(ExtractTransformLoad):
} }
) )
# We generate a boolean that is TRUE when there is an imputed income but not a baseline income, and FALSE otherwise.
# This allows us to see which tracts have an imputed income.
df[field_names.IMPUTED_INCOME_FLAG_FIELD_NAME] = (
df[field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD].notna()
& df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD].isna()
)
# Strip columns and save results to self. # Strip columns and save results to self.
self.df = df[self.COLUMNS_TO_KEEP] self.df = df[self.COLUMNS_TO_KEEP]

View file

@ -92,12 +92,17 @@ def calculate_income_measures(
) )
# Iterate through the dataframe to impute in place # Iterate through the dataframe to impute in place
## TODO: We should probably convert this to a spatial join now that we are doing >1 imputation and it's taking a lot
## of time, but thinking through how to do this while maintaining the masking will take some time. I think the best
## way would be to (1) spatial join to all neighbors, and then (2) iterate to take the "smallest" set of neighbors...
## but haven't implemented it yet.
for index, row in geo_df.iterrows(): for index, row in geo_df.iterrows():
if row[geoid_field] in tract_list: if row[geoid_field] in tract_list:
neighbor_mask = _get_neighbor_mask(geo_df, row) neighbor_mask = _get_neighbor_mask(geo_df, row)
county_mask = _get_fips_mask( county_mask = _get_fips_mask(
geo_df=geo_df, row=row, fips_digits=5, geoid_field=geoid_field geo_df=geo_df, row=row, fips_digits=5, geoid_field=geoid_field
) )
## TODO: Did CEQ decide to cut this?
state_mask = _get_fips_mask( state_mask = _get_fips_mask(
geo_df=geo_df, row=row, fips_digits=2, geoid_field=geoid_field geo_df=geo_df, row=row, fips_digits=2, geoid_field=geoid_field
) )

View file

@ -1,7 +1,7 @@
# Suffixes # Suffixes
PERCENTILE_FIELD_SUFFIX = " (percentile)" PERCENTILE_FIELD_SUFFIX = " (percentile)"
ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas" ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
ADJACENT_MEAN_SUFFIX = " (including adjacency index)" ADJACENT_MEAN_SUFFIX = " (based on adjacency index and low income alone)"
ADJACENCY_INDEX_SUFFIX = " (average of neighbors)" ADJACENCY_INDEX_SUFFIX = " (average of neighbors)"
# Geographic field names # Geographic field names
@ -12,6 +12,9 @@ COUNTY_FIELD = "County Name"
# Score file field names # Score file field names
# Definition M fields # Definition M fields
SCORE_M = "Definition M" SCORE_M = "Definition M"
FINAL_SCORE_N_BOOLEAN = (
"Definition M community, including adjacency index tracts"
)
SCORE_M_COMMUNITIES = "Definition M (communities)" SCORE_M_COMMUNITIES = "Definition M (communities)"
M_CLIMATE = "Climate Factor (Definition M)" M_CLIMATE = "Climate Factor (Definition M)"
M_ENERGY = "Energy Factor (Definition M)" M_ENERGY = "Energy Factor (Definition M)"
@ -67,6 +70,9 @@ ADJUSTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
# this is what gets used in the score # this is what gets used in the score
POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD = "Percent of individuals below 200% Federal Poverty Line, imputed and adjusted" POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD = "Percent of individuals below 200% Federal Poverty Line, imputed and adjusted"
IMPUTED_INCOME_FLAG_FIELD_NAME = (
"Income data has been estimated based on neighbor income"
)
POVERTY_LESS_THAN_150_FPL_FIELD = ( POVERTY_LESS_THAN_150_FPL_FIELD = (
"Percent of individuals < 150% Federal Poverty Line" "Percent of individuals < 150% Federal Poverty Line"
) )

View file

@ -385,8 +385,10 @@ class ScoreNarwhal(Score):
# Kitchen / plumbing # Kitchen / plumbing
self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD] = ( self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD] = (
self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD self.df[
+ field_names.PERCENTILE_FIELD_SUFFIX] field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD >= self.ENVIRONMENTAL_BURDEN_THRESHOLD
) )
@ -971,8 +973,8 @@ class ScoreNarwhal(Score):
>= self.SCORE_THRESHOLD_DONUT >= self.SCORE_THRESHOLD_DONUT
) )
# This should be the "final list" of Score Narwhal communities, meaning that we would # This constructs the boolean for whether it's a donut hole community
# expect this to be True if either the tract is a donut hole community OR the tract is a DAC # This can also be true when the tract itself is a DAC on its own
self.df[ self.df[
field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX
] = ( ] = (
@ -980,6 +982,16 @@ class ScoreNarwhal(Score):
& self.df[field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD] & self.df[field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD]
) )
# This should be the "final list" of Score Narwhal communities, meaning that we would
# expect this to be True if either the tract is a donut hole community OR the tract is a DAC
self.df[field_names.FINAL_SCORE_N_BOOLEAN] = (
self.df[field_names.SCORE_N_COMMUNITIES]
| self.df[
field_names.SCORE_N_COMMUNITIES
+ field_names.ADJACENT_MEAN_SUFFIX
]
)
def add_columns(self) -> pd.DataFrame: def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score Narhwal") logger.info("Adding Score Narhwal")