mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 17:44:20 -08:00
parent
d892bce6cf
commit
5c41c95764
14 changed files with 31 additions and 63 deletions
|
@ -21,14 +21,17 @@ fields:
|
||||||
label: Total categories exceeded
|
label: Total categories exceeded
|
||||||
format: int64
|
format: int64
|
||||||
- score_name: Definition N (communities)
|
- score_name: Definition N (communities)
|
||||||
label: Identified as disadvantaged without considering neighbors
|
|
||||||
format: bool
|
|
||||||
- score_name: Definition N (communities) (based on adjacency index and low income alone)
|
|
||||||
label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
|
|
||||||
format: bool
|
|
||||||
- score_name: Definition M community, including adjacency index tracts
|
|
||||||
label: Identified as disadvantaged
|
label: Identified as disadvantaged
|
||||||
format: bool
|
format: bool
|
||||||
|
- score_name: Definition N (communities) (including adjacency index)
|
||||||
|
label: Identified as disadvantaged (including adjacency index)
|
||||||
|
format: bool
|
||||||
|
- score_name: Is the tract surrounded by disadvantaged communities?
|
||||||
|
label: Is the tract surrounded by disadvantaged communities?
|
||||||
|
format: bool
|
||||||
|
- score_name: Meets the less stringent low income criterion for the adjacency index?
|
||||||
|
label: Meets the less stringent low income criterion for the adjacency index?
|
||||||
|
format: bool
|
||||||
- score_name: Definition N (communities) (average of neighbors)
|
- score_name: Definition N (communities) (average of neighbors)
|
||||||
label: Share of neighbors that are identified as disadvantaged
|
label: Share of neighbors that are identified as disadvantaged
|
||||||
format: percentage
|
format: percentage
|
||||||
|
@ -338,6 +341,3 @@ fields:
|
||||||
- score_name: Tract-level redlining score meets or exceeds 3.25
|
- score_name: Tract-level redlining score meets or exceeds 3.25
|
||||||
label: Tract experienced historic underinvestment
|
label: Tract experienced historic underinvestment
|
||||||
format: bool
|
format: bool
|
||||||
- score_name: Income data has been estimated based on neighbor income
|
|
||||||
label: Income data has been estimated based on geographic neighbor income
|
|
||||||
format: bool
|
|
||||||
|
|
|
@ -25,14 +25,17 @@ sheets:
|
||||||
label: Total categories exceeded
|
label: Total categories exceeded
|
||||||
format: int64
|
format: int64
|
||||||
- score_name: Definition N (communities)
|
- score_name: Definition N (communities)
|
||||||
label: Identified as disadvantaged without considering neighbors
|
|
||||||
format: bool
|
|
||||||
- score_name: Definition N (communities) (based on adjacency index and low income alone)
|
|
||||||
label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
|
|
||||||
format: bool
|
|
||||||
- score_name: Definition M community, including adjacency index tracts
|
|
||||||
label: Identified as disadvantaged
|
label: Identified as disadvantaged
|
||||||
format: bool
|
format: bool
|
||||||
|
- score_name: Definition N (communities) (including adjacency index)
|
||||||
|
label: Identified as disadvantaged (including adjacency index)
|
||||||
|
format: bool
|
||||||
|
- score_name: Is the tract surrounded by disadvantaged communities?
|
||||||
|
label: Is the tract surrounded by disadvantaged communities?
|
||||||
|
format: bool
|
||||||
|
- score_name: Meets the less stringent low income criterion for the adjacency index?
|
||||||
|
label: Meets the less stringent low income criterion for the adjacency index?
|
||||||
|
format: bool
|
||||||
- score_name: Definition N (communities) (average of neighbors)
|
- score_name: Definition N (communities) (average of neighbors)
|
||||||
label: Share of neighbors that are identified as disadvantaged
|
label: Share of neighbors that are identified as disadvantaged
|
||||||
format: percentage
|
format: percentage
|
||||||
|
@ -342,6 +345,3 @@ sheets:
|
||||||
- score_name: Tract-level redlining score meets or exceeds 3.25
|
- score_name: Tract-level redlining score meets or exceeds 3.25
|
||||||
label: Tract experienced historic underinvestment
|
label: Tract experienced historic underinvestment
|
||||||
format: bool
|
format: bool
|
||||||
- score_name: Income data has been estimated based on neighbor income
|
|
||||||
label: Income data has been estimated based on geographic neighbor income
|
|
||||||
format: bool
|
|
||||||
|
|
|
@ -208,10 +208,9 @@ TILES_SCORE_COLUMNS = {
|
||||||
field_names.M_HEALTH: "M_HLTH",
|
field_names.M_HEALTH: "M_HLTH",
|
||||||
# temporarily update this so that it's the Narwhal score that gets visualized on the map
|
# temporarily update this so that it's the Narwhal score that gets visualized on the map
|
||||||
# The NEW final score value INCLUDES the adjacency index.
|
# The NEW final score value INCLUDES the adjacency index.
|
||||||
field_names.FINAL_SCORE_N_BOOLEAN: "SM_C",
|
field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX: "SM_C",
|
||||||
field_names.SCORE_N_COMMUNITIES
|
field_names.SCORE_N_COMMUNITIES
|
||||||
+ field_names.ADJACENT_MEAN_SUFFIX: "SM_DON",
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "SM_PFS",
|
||||||
field_names.SCORE_N_COMMUNITIES: "SM_NO_DON",
|
|
||||||
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
|
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EPLRLI",
|
||||||
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
|
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EALRLI",
|
||||||
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI",
|
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_LOW_HIGHER_ED_FIELD: "EBLRLI",
|
||||||
|
@ -314,8 +313,7 @@ TILES_SCORE_COLUMNS = {
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
|
+ field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS",
|
||||||
field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
|
field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
|
||||||
field_names.AML_BOOLEAN: "AML_ET",
|
field_names.AML_BOOLEAN: "AML_ET",
|
||||||
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET",
|
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET"
|
||||||
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG"
|
|
||||||
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
|
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
|
||||||
## FPL_200 (there is no higher ed in narwhal)
|
## FPL_200 (there is no higher ed in narwhal)
|
||||||
}
|
}
|
||||||
|
|
|
@ -471,7 +471,6 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
|
||||||
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||||
field_names.AML_BOOLEAN,
|
field_names.AML_BOOLEAN,
|
||||||
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# For some columns, high values are "good", so we want to reverse the percentile
|
# For some columns, high values are "good", so we want to reverse the percentile
|
||||||
|
|
|
@ -521,6 +521,8 @@ class PostScoreETL(ExtractTransformLoad):
|
||||||
score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")
|
score_tiles_df.to_csv(tile_score_path, index=False, encoding="utf-8")
|
||||||
|
|
||||||
def _load_downloadable_zip(self, downloadable_info_path: Path) -> None:
|
def _load_downloadable_zip(self, downloadable_info_path: Path) -> None:
|
||||||
|
logger.info("Saving Downloadable CSV")
|
||||||
|
|
||||||
downloadable_info_path.mkdir(parents=True, exist_ok=True)
|
downloadable_info_path.mkdir(parents=True, exist_ok=True)
|
||||||
csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
|
csv_path = constants.SCORE_DOWNLOADABLE_CSV_FILE_PATH
|
||||||
excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
|
excel_path = constants.SCORE_DOWNLOADABLE_EXCEL_FILE_PATH
|
||||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -227,7 +227,6 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
self.COLLEGE_ATTENDANCE_FIELD,
|
self.COLLEGE_ATTENDANCE_FIELD,
|
||||||
self.COLLEGE_NON_ATTENDANCE_FIELD,
|
self.COLLEGE_NON_ATTENDANCE_FIELD,
|
||||||
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
|
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
|
||||||
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
|
|
||||||
]
|
]
|
||||||
+ self.RE_OUTPUT_FIELDS
|
+ self.RE_OUTPUT_FIELDS
|
||||||
+ [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
|
+ [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
|
||||||
|
@ -504,13 +503,6 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# We generate a boolean that is TRUE when there is an imputed income but not a baseline income, and FALSE otherwise.
|
|
||||||
# This allows us to see which tracts have an imputed income.
|
|
||||||
df[field_names.IMPUTED_INCOME_FLAG_FIELD_NAME] = (
|
|
||||||
df[field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD].notna()
|
|
||||||
& df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD].isna()
|
|
||||||
)
|
|
||||||
|
|
||||||
# Strip columns and save results to self.
|
# Strip columns and save results to self.
|
||||||
self.df = df[self.COLUMNS_TO_KEEP]
|
self.df = df[self.COLUMNS_TO_KEEP]
|
||||||
|
|
||||||
|
|
|
@ -92,17 +92,12 @@ def calculate_income_measures(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Iterate through the dataframe to impute in place
|
# Iterate through the dataframe to impute in place
|
||||||
## TODO: We should probably convert this to a spatial join now that we are doing >1 imputation and it's taking a lot
|
|
||||||
## of time, but thinking through how to do this while maintaining the masking will take some time. I think the best
|
|
||||||
## way would be to (1) spatial join to all neighbors, and then (2) iterate to take the "smallest" set of neighbors...
|
|
||||||
## but haven't implemented it yet.
|
|
||||||
for index, row in geo_df.iterrows():
|
for index, row in geo_df.iterrows():
|
||||||
if row[geoid_field] in tract_list:
|
if row[geoid_field] in tract_list:
|
||||||
neighbor_mask = _get_neighbor_mask(geo_df, row)
|
neighbor_mask = _get_neighbor_mask(geo_df, row)
|
||||||
county_mask = _get_fips_mask(
|
county_mask = _get_fips_mask(
|
||||||
geo_df=geo_df, row=row, fips_digits=5, geoid_field=geoid_field
|
geo_df=geo_df, row=row, fips_digits=5, geoid_field=geoid_field
|
||||||
)
|
)
|
||||||
## TODO: Did CEQ decide to cut this?
|
|
||||||
state_mask = _get_fips_mask(
|
state_mask = _get_fips_mask(
|
||||||
geo_df=geo_df, row=row, fips_digits=2, geoid_field=geoid_field
|
geo_df=geo_df, row=row, fips_digits=2, geoid_field=geoid_field
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# Suffixes
|
# Suffixes
|
||||||
PERCENTILE_FIELD_SUFFIX = " (percentile)"
|
PERCENTILE_FIELD_SUFFIX = " (percentile)"
|
||||||
ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
|
ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
|
||||||
ADJACENT_MEAN_SUFFIX = " (based on adjacency index and low income alone)"
|
ADJACENT_MEAN_SUFFIX = " (including adjacency index)"
|
||||||
ADJACENCY_INDEX_SUFFIX = " (average of neighbors)"
|
ADJACENCY_INDEX_SUFFIX = " (average of neighbors)"
|
||||||
|
|
||||||
# Geographic field names
|
# Geographic field names
|
||||||
|
@ -12,9 +12,6 @@ COUNTY_FIELD = "County Name"
|
||||||
# Score file field names
|
# Score file field names
|
||||||
# Definition M fields
|
# Definition M fields
|
||||||
SCORE_M = "Definition M"
|
SCORE_M = "Definition M"
|
||||||
FINAL_SCORE_N_BOOLEAN = (
|
|
||||||
"Definition M community, including adjacency index tracts"
|
|
||||||
)
|
|
||||||
SCORE_M_COMMUNITIES = "Definition M (communities)"
|
SCORE_M_COMMUNITIES = "Definition M (communities)"
|
||||||
M_CLIMATE = "Climate Factor (Definition M)"
|
M_CLIMATE = "Climate Factor (Definition M)"
|
||||||
M_ENERGY = "Energy Factor (Definition M)"
|
M_ENERGY = "Energy Factor (Definition M)"
|
||||||
|
@ -70,9 +67,6 @@ ADJUSTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||||
|
|
||||||
# this is what gets used in the score
|
# this is what gets used in the score
|
||||||
POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD = "Percent of individuals below 200% Federal Poverty Line, imputed and adjusted"
|
POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD = "Percent of individuals below 200% Federal Poverty Line, imputed and adjusted"
|
||||||
IMPUTED_INCOME_FLAG_FIELD_NAME = (
|
|
||||||
"Income data has been estimated based on neighbor income"
|
|
||||||
)
|
|
||||||
POVERTY_LESS_THAN_150_FPL_FIELD = (
|
POVERTY_LESS_THAN_150_FPL_FIELD = (
|
||||||
"Percent of individuals < 150% Federal Poverty Line"
|
"Percent of individuals < 150% Federal Poverty Line"
|
||||||
)
|
)
|
||||||
|
|
|
@ -385,10 +385,8 @@ class ScoreNarwhal(Score):
|
||||||
|
|
||||||
# Kitchen / plumbing
|
# Kitchen / plumbing
|
||||||
self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD] = (
|
self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_PCTILE_THRESHOLD] = (
|
||||||
self.df[
|
self.df[field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD
|
||||||
field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD
|
+ field_names.PERCENTILE_FIELD_SUFFIX]
|
||||||
+ field_names.PERCENTILE_FIELD_SUFFIX
|
|
||||||
]
|
|
||||||
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
>= self.ENVIRONMENTAL_BURDEN_THRESHOLD
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -973,8 +971,8 @@ class ScoreNarwhal(Score):
|
||||||
>= self.SCORE_THRESHOLD_DONUT
|
>= self.SCORE_THRESHOLD_DONUT
|
||||||
)
|
)
|
||||||
|
|
||||||
# This constructs the boolean for whether it's a donut hole community
|
# This should be the "final list" of Score Narwhal communities, meaning that we would
|
||||||
# This can also be true when the tract itself is a DAC on its own
|
# expect this to be True if either the tract is a donut hole community OR the tract is a DAC
|
||||||
self.df[
|
self.df[
|
||||||
field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX
|
field_names.SCORE_N_COMMUNITIES + field_names.ADJACENT_MEAN_SUFFIX
|
||||||
] = (
|
] = (
|
||||||
|
@ -982,16 +980,6 @@ class ScoreNarwhal(Score):
|
||||||
& self.df[field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD]
|
& self.df[field_names.ADJACENT_TRACT_SCORE_ABOVE_DONUT_THRESHOLD]
|
||||||
)
|
)
|
||||||
|
|
||||||
# This should be the "final list" of Score Narwhal communities, meaning that we would
|
|
||||||
# expect this to be True if either the tract is a donut hole community OR the tract is a DAC
|
|
||||||
self.df[field_names.FINAL_SCORE_N_BOOLEAN] = (
|
|
||||||
self.df[field_names.SCORE_N_COMMUNITIES]
|
|
||||||
| self.df[
|
|
||||||
field_names.SCORE_N_COMMUNITIES
|
|
||||||
+ field_names.ADJACENT_MEAN_SUFFIX
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def add_columns(self) -> pd.DataFrame:
|
def add_columns(self) -> pd.DataFrame:
|
||||||
logger.info("Adding Score Narhwal")
|
logger.info("Adding Score Narhwal")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue