mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
Merge branch 'emma-nechamkin/release/score-narwhal' of github.com:usds/justice40-tool into emma-nechamkin/release/score-narwhal
This commit is contained in:
commit
b7af13b2a6
5 changed files with 15 additions and 4802 deletions
|
@ -394,4 +394,10 @@ TILES_SCORE_FLOAT_COLUMNS = [
|
||||||
field_names.PERCENT_AGE_UNDER_10,
|
field_names.PERCENT_AGE_UNDER_10,
|
||||||
field_names.PERCENT_AGE_10_TO_64,
|
field_names.PERCENT_AGE_10_TO_64,
|
||||||
field_names.PERCENT_AGE_OVER_64,
|
field_names.PERCENT_AGE_OVER_64,
|
||||||
|
# Geojson cannot support nulls in a boolean column when we create tiles;
|
||||||
|
# to preserve null character, we coerce to floats for all fields
|
||||||
|
# that use null to signify missing information in a boolean field.
|
||||||
|
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
|
||||||
|
field_names.AML_BOOLEAN,
|
||||||
|
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED
|
||||||
]
|
]
|
||||||
|
|
|
@ -494,6 +494,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
# For some columns, high values are "good", so we want to reverse the percentile
|
# For some columns, high values are "good", so we want to reverse the percentile
|
||||||
# so that high values are "bad" and any scoring logic can still check if it's
|
# so that high values are "bad" and any scoring logic can still check if it's
|
||||||
# >= some threshold.
|
# >= some threshold.
|
||||||
|
# Note that we must use dataclass here instead of namedtuples on account of pylint
|
||||||
# TODO: Add more fields here.
|
# TODO: Add more fields here.
|
||||||
# https://github.com/usds/justice40-tool/issues/970
|
# https://github.com/usds/justice40-tool/issues/970
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
@ -96,7 +96,9 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
logger.info("Reading score CSV")
|
logger.info("Reading score CSV")
|
||||||
self.score_usa_df = pd.read_csv(
|
self.score_usa_df = pd.read_csv(
|
||||||
self.TILE_SCORE_CSV,
|
self.TILE_SCORE_CSV,
|
||||||
dtype={self.TRACT_SHORT_FIELD: "string"},
|
dtype={
|
||||||
|
self.TRACT_SHORT_FIELD: str,
|
||||||
|
},
|
||||||
low_memory=False,
|
low_memory=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -136,7 +138,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
columns={self.TARGET_SCORE_SHORT_FIELD: self.TARGET_SCORE_RENAME_TO}
|
columns={self.TARGET_SCORE_SHORT_FIELD: self.TARGET_SCORE_RENAME_TO}
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Converting to geojson into tracts")
|
logger.info("Converting geojson into geodf with tracts")
|
||||||
usa_tracts = gpd.GeoDataFrame(
|
usa_tracts = gpd.GeoDataFrame(
|
||||||
usa_tracts,
|
usa_tracts,
|
||||||
columns=[
|
columns=[
|
||||||
|
@ -272,8 +274,10 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
# Create separate threads to run each write to disk.
|
# Create separate threads to run each write to disk.
|
||||||
def write_high_to_file():
|
def write_high_to_file():
|
||||||
logger.info("Writing usa-high (~9 minutes)")
|
logger.info("Writing usa-high (~9 minutes)")
|
||||||
|
|
||||||
self.geojson_score_usa_high.to_file(
|
self.geojson_score_usa_high.to_file(
|
||||||
filename=self.SCORE_HIGH_GEOJSON, driver="GeoJSON"
|
filename=self.SCORE_HIGH_GEOJSON,
|
||||||
|
driver="GeoJSON",
|
||||||
)
|
)
|
||||||
logger.info("Completed writing usa-high")
|
logger.info("Completed writing usa-high")
|
||||||
|
|
||||||
|
@ -375,7 +379,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
for task in [
|
for task in [
|
||||||
write_high_to_file,
|
write_high_to_file,
|
||||||
write_low_to_file,
|
write_low_to_file,
|
||||||
# write_esri_shapefile,
|
write_esri_shapefile,
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
File diff suppressed because one or more lines are too long
Loading…
Add table
Reference in a new issue