Creating shapefiles for ArcGIS users (#1275)

Added shapefiles to the files generated when the pipeline is run. Produces both shapefile and a key for column names.
This commit is contained in:
Emma Nechamkin 2022-02-24 10:32:49 -05:00 committed by GitHub
parent 521c61dff3
commit f0a4e40a79
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 5 deletions

View file

@ -27,6 +27,10 @@ class GeoScoreETL(ExtractTransformLoad):
self.SCORE_LOW_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-low.json"
self.SCORE_HIGH_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-high.json"
self.SCORE_SHP_PATH = self.DATA_PATH / "score" / "shapefile"
self.SCORE_SHP_FILE = self.SCORE_SHP_PATH / "usa.shp"
self.SCORE_SHP_CODE_CSV = self.SCORE_SHP_PATH / "columns.csv"
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
self.TILE_SCORE_CSV = self.SCORE_CSV_PATH / "tiles" / "usa.csv"
@ -94,6 +98,7 @@ class GeoScoreETL(ExtractTransformLoad):
fields = [self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME]
self.geojson_usa_df = self.geojson_usa_df[fields]
# TODO update this join
logger.info("Merging and compressing score CSV with USA GeoJSON")
self.geojson_score_usa_high = self.score_usa_df.merge(
self.geojson_usa_df, on=self.GEOID_FIELD_NAME, how="left"
@ -103,8 +108,6 @@ class GeoScoreETL(ExtractTransformLoad):
self.geojson_score_usa_high, crs="EPSG:4326"
)
logger.info(f"Columns: {self.geojson_score_usa_high.columns}")
usa_simplified = self.geojson_score_usa_high[
[
self.GEOID_FIELD_NAME,
@ -148,8 +151,9 @@ class GeoScoreETL(ExtractTransformLoad):
)
# round to 2 decimals
decimals = pd.Series([2], index=[self.TARGET_SCORE_RENAME_TO])
self.geojson_score_usa_low = self.geojson_score_usa_low.round(decimals)
self.geojson_score_usa_low = self.geojson_score_usa_low.round(
{self.TARGET_SCORE_RENAME_TO: 2}
)
def _aggregate_to_tracts(
self, block_group_df: gpd.GeoDataFrame
@ -221,10 +225,41 @@ class GeoScoreETL(ExtractTransformLoad):
)
logger.info("Completed writing usa-low")
def write_esri_shapefile():
logger.info("Producing ESRI shapefiles")
# Note that esri shapefiles can't have long column names, so we borrow from the
# shorten some tile names (renaming map) and print out a codebook for the user
codebook = {}
renaming_map = {}
# allows us to quickly rename / describe columns
reversed_tiles = {
short: long
for long, short in constants.TILES_SCORE_COLUMNS.items()
}
for column in self.geojson_score_usa_high.columns:
# take first 10 characters, max due to ESRI constraints
new_col = column[:10]
codebook[new_col] = reversed_tiles.get(column, column)
if new_col != column:
renaming_map[column] = new_col
pd.Series(codebook).reset_index().rename(
# kept as strings because no downstream impacts
columns={0: "column", "index": "meaning"}
).to_csv(self.SCORE_SHP_CODE_CSV, index=False)
self.geojson_score_usa_high.rename(columns=renaming_map).to_file(
self.SCORE_SHP_FILE
)
logger.info("Completed writing shapefile")
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {
executor.submit(task)
for task in [write_high_to_file, write_low_to_file]
for task in [
write_high_to_file,
write_low_to_file,
write_esri_shapefile,
]
}
for fut in concurrent.futures.as_completed(futures):