mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Creating shapefiles for ArcGIS users (#1275)
Added shapefiles to the files generated when the pipeline is run. Produces both shapefile and a key for column names.
This commit is contained in:
parent
521c61dff3
commit
f0a4e40a79
2 changed files with 40 additions and 5 deletions
|
@ -27,6 +27,10 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
self.SCORE_LOW_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-low.json"
|
||||
self.SCORE_HIGH_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-high.json"
|
||||
|
||||
self.SCORE_SHP_PATH = self.DATA_PATH / "score" / "shapefile"
|
||||
self.SCORE_SHP_FILE = self.SCORE_SHP_PATH / "usa.shp"
|
||||
self.SCORE_SHP_CODE_CSV = self.SCORE_SHP_PATH / "columns.csv"
|
||||
|
||||
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
|
||||
self.TILE_SCORE_CSV = self.SCORE_CSV_PATH / "tiles" / "usa.csv"
|
||||
|
||||
|
@ -94,6 +98,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
fields = [self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME]
|
||||
self.geojson_usa_df = self.geojson_usa_df[fields]
|
||||
|
||||
# TODO update this join
|
||||
logger.info("Merging and compressing score CSV with USA GeoJSON")
|
||||
self.geojson_score_usa_high = self.score_usa_df.merge(
|
||||
self.geojson_usa_df, on=self.GEOID_FIELD_NAME, how="left"
|
||||
|
@ -103,8 +108,6 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
self.geojson_score_usa_high, crs="EPSG:4326"
|
||||
)
|
||||
|
||||
logger.info(f"Columns: {self.geojson_score_usa_high.columns}")
|
||||
|
||||
usa_simplified = self.geojson_score_usa_high[
|
||||
[
|
||||
self.GEOID_FIELD_NAME,
|
||||
|
@ -148,8 +151,9 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
)
|
||||
|
||||
# round to 2 decimals
|
||||
decimals = pd.Series([2], index=[self.TARGET_SCORE_RENAME_TO])
|
||||
self.geojson_score_usa_low = self.geojson_score_usa_low.round(decimals)
|
||||
self.geojson_score_usa_low = self.geojson_score_usa_low.round(
|
||||
{self.TARGET_SCORE_RENAME_TO: 2}
|
||||
)
|
||||
|
||||
def _aggregate_to_tracts(
|
||||
self, block_group_df: gpd.GeoDataFrame
|
||||
|
@ -221,10 +225,41 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
)
|
||||
logger.info("Completed writing usa-low")
|
||||
|
||||
def write_esri_shapefile():
|
||||
logger.info("Producing ESRI shapefiles")
|
||||
# Note that esri shapefiles can't have long column names, so we borrow from the
|
||||
# shorten some tile names (renaming map) and print out a codebook for the user
|
||||
codebook = {}
|
||||
renaming_map = {}
|
||||
|
||||
# allows us to quickly rename / describe columns
|
||||
reversed_tiles = {
|
||||
short: long
|
||||
for long, short in constants.TILES_SCORE_COLUMNS.items()
|
||||
}
|
||||
for column in self.geojson_score_usa_high.columns:
|
||||
# take first 10 characters, max due to ESRI constraints
|
||||
new_col = column[:10]
|
||||
codebook[new_col] = reversed_tiles.get(column, column)
|
||||
if new_col != column:
|
||||
renaming_map[column] = new_col
|
||||
pd.Series(codebook).reset_index().rename(
|
||||
# kept as strings because no downstream impacts
|
||||
columns={0: "column", "index": "meaning"}
|
||||
).to_csv(self.SCORE_SHP_CODE_CSV, index=False)
|
||||
self.geojson_score_usa_high.rename(columns=renaming_map).to_file(
|
||||
self.SCORE_SHP_FILE
|
||||
)
|
||||
logger.info("Completed writing shapefile")
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||
futures = {
|
||||
executor.submit(task)
|
||||
for task in [write_high_to_file, write_low_to_file]
|
||||
for task in [
|
||||
write_high_to_file,
|
||||
write_low_to_file,
|
||||
write_esri_shapefile,
|
||||
]
|
||||
}
|
||||
|
||||
for fut in concurrent.futures.as_completed(futures):
|
||||
|
|
Loading…
Add table
Reference in a new issue