Fast flag update (#1844)

Added additional flags for the front end based on our conversation in stand up this morning.
This commit is contained in:
Emma Nechamkin 2022-08-19 13:14:44 -04:00 committed by GitHub
commit d892bce6cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 63 additions and 31 deletions

View file

@ -227,6 +227,7 @@ class CensusACSETL(ExtractTransformLoad):
self.COLLEGE_ATTENDANCE_FIELD,
self.COLLEGE_NON_ATTENDANCE_FIELD,
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
]
+ self.RE_OUTPUT_FIELDS
+ [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS]
@ -503,6 +504,13 @@ class CensusACSETL(ExtractTransformLoad):
}
)
# We generate a boolean that is TRUE when there is an imputed income but not a baseline income, and FALSE otherwise.
# This allows us to see which tracts have an imputed income.
df[field_names.IMPUTED_INCOME_FLAG_FIELD_NAME] = (
df[field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD].notna()
& df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD].isna()
)
# Strip columns and save results to self.
self.df = df[self.COLUMNS_TO_KEEP]

View file

@ -92,12 +92,17 @@ def calculate_income_measures(
)
# Iterate through the dataframe to impute in place
## TODO: We should probably convert this to a spatial join now that we are doing >1 imputation and it's taking a lot
## of time, but thinking through how to do this while maintaining the masking will take some time. I think the best
## way would be to (1) spatial join to all neighbors, and then (2) iterate to take the "smallest" set of neighbors...
## but haven't implemented it yet.
for index, row in geo_df.iterrows():
if row[geoid_field] in tract_list:
neighbor_mask = _get_neighbor_mask(geo_df, row)
county_mask = _get_fips_mask(
geo_df=geo_df, row=row, fips_digits=5, geoid_field=geoid_field
)
## TODO: Did CEQ decide to cut this?
state_mask = _get_fips_mask(
geo_df=geo_df, row=row, fips_digits=2, geoid_field=geoid_field
)