just testing that the boolean is preserved on gha (#1867)

* updated with hopefully a fix; coercing aml, fuds, hrs to booleans for the raw value to preserve null character.
This commit is contained in:
Emma Nechamkin 2022-08-31 12:55:03 -04:00 committed by GitHub
parent 1c4d3e4142
commit b0b7ff0eec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 4802 deletions

View file

@ -394,4 +394,10 @@ TILES_SCORE_FLOAT_COLUMNS = [
field_names.PERCENT_AGE_UNDER_10,
field_names.PERCENT_AGE_10_TO_64,
field_names.PERCENT_AGE_OVER_64,
# Geojson cannot support nulls in a boolean column when we create tiles;
# to preserve null character, we coerce to floats for all fields
# that use null to signify missing information in a boolean field.
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
field_names.AML_BOOLEAN,
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED
]

View file

@ -96,7 +96,9 @@ class GeoScoreETL(ExtractTransformLoad):
logger.info("Reading score CSV")
self.score_usa_df = pd.read_csv(
self.TILE_SCORE_CSV,
dtype={self.TRACT_SHORT_FIELD: "string"},
dtype={
self.TRACT_SHORT_FIELD: str,
},
low_memory=False,
)
@ -136,7 +138,7 @@ class GeoScoreETL(ExtractTransformLoad):
columns={self.TARGET_SCORE_SHORT_FIELD: self.TARGET_SCORE_RENAME_TO}
)
logger.info("Converting to geojson into tracts")
logger.info("Converting geojson into geodf with tracts")
usa_tracts = gpd.GeoDataFrame(
usa_tracts,
columns=[
@ -272,8 +274,10 @@ class GeoScoreETL(ExtractTransformLoad):
# Create separate threads to run each write to disk.
def write_high_to_file():
logger.info("Writing usa-high (~9 minutes)")
self.geojson_score_usa_high.to_file(
filename=self.SCORE_HIGH_GEOJSON, driver="GeoJSON"
filename=self.SCORE_HIGH_GEOJSON,
driver="GeoJSON",
)
logger.info("Completed writing usa-high")
@ -375,7 +379,7 @@ class GeoScoreETL(ExtractTransformLoad):
for task in [
write_high_to_file,
write_low_to_file,
# write_esri_shapefile,
write_esri_shapefile,
]
}

File diff suppressed because one or more lines are too long