Merge branch 'emma-nechamkin/release/score-narwhal' of github.com:usds/justice40-tool into emma-nechamkin/release/score-narwhal

This commit is contained in:
Emma Nechamkin 2022-08-31 14:29:45 -04:00
commit b7af13b2a6
5 changed files with 15 additions and 4802 deletions

View file

@ -394,4 +394,10 @@ TILES_SCORE_FLOAT_COLUMNS = [
field_names.PERCENT_AGE_UNDER_10, field_names.PERCENT_AGE_UNDER_10,
field_names.PERCENT_AGE_10_TO_64, field_names.PERCENT_AGE_10_TO_64,
field_names.PERCENT_AGE_OVER_64, field_names.PERCENT_AGE_OVER_64,
# Geojson cannot support nulls in a boolean column when we create tiles;
# to preserve null character, we coerce to floats for all fields
# that use null to signify missing information in a boolean field.
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
field_names.AML_BOOLEAN,
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED
] ]

View file

@ -494,6 +494,7 @@ class ScoreETL(ExtractTransformLoad):
# For some columns, high values are "good", so we want to reverse the percentile # For some columns, high values are "good", so we want to reverse the percentile
# so that high values are "bad" and any scoring logic can still check if it's # so that high values are "bad" and any scoring logic can still check if it's
# >= some threshold. # >= some threshold.
# Note that we must use dataclass here instead of namedtuples on account of pylint
# TODO: Add more fields here. # TODO: Add more fields here.
# https://github.com/usds/justice40-tool/issues/970 # https://github.com/usds/justice40-tool/issues/970
@dataclass @dataclass

View file

@ -96,7 +96,9 @@ class GeoScoreETL(ExtractTransformLoad):
logger.info("Reading score CSV") logger.info("Reading score CSV")
self.score_usa_df = pd.read_csv( self.score_usa_df = pd.read_csv(
self.TILE_SCORE_CSV, self.TILE_SCORE_CSV,
dtype={self.TRACT_SHORT_FIELD: "string"}, dtype={
self.TRACT_SHORT_FIELD: str,
},
low_memory=False, low_memory=False,
) )
@ -136,7 +138,7 @@ class GeoScoreETL(ExtractTransformLoad):
columns={self.TARGET_SCORE_SHORT_FIELD: self.TARGET_SCORE_RENAME_TO} columns={self.TARGET_SCORE_SHORT_FIELD: self.TARGET_SCORE_RENAME_TO}
) )
logger.info("Converting to geojson into tracts") logger.info("Converting geojson into geodf with tracts")
usa_tracts = gpd.GeoDataFrame( usa_tracts = gpd.GeoDataFrame(
usa_tracts, usa_tracts,
columns=[ columns=[
@ -272,8 +274,10 @@ class GeoScoreETL(ExtractTransformLoad):
# Create separate threads to run each write to disk. # Create separate threads to run each write to disk.
def write_high_to_file(): def write_high_to_file():
logger.info("Writing usa-high (~9 minutes)") logger.info("Writing usa-high (~9 minutes)")
self.geojson_score_usa_high.to_file( self.geojson_score_usa_high.to_file(
filename=self.SCORE_HIGH_GEOJSON, driver="GeoJSON" filename=self.SCORE_HIGH_GEOJSON,
driver="GeoJSON",
) )
logger.info("Completed writing usa-high") logger.info("Completed writing usa-high")
@ -375,7 +379,7 @@ class GeoScoreETL(ExtractTransformLoad):
for task in [ for task in [
write_high_to_file, write_high_to_file,
write_low_to_file, write_low_to_file,
# write_esri_shapefile, write_esri_shapefile,
] ]
} }

File diff suppressed because one or more lines are too long