mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Creating shapefiles for ArcGIS users (#1275)
Added shapefiles to the files generated when the pipeline is run. Produces both shapefile and a key for column names.
This commit is contained in:
parent
521c61dff3
commit
f0a4e40a79
2 changed files with 40 additions and 5 deletions
|
@ -27,6 +27,10 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
self.SCORE_LOW_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-low.json"
|
self.SCORE_LOW_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-low.json"
|
||||||
self.SCORE_HIGH_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-high.json"
|
self.SCORE_HIGH_GEOJSON = self.SCORE_GEOJSON_PATH / "usa-high.json"
|
||||||
|
|
||||||
|
self.SCORE_SHP_PATH = self.DATA_PATH / "score" / "shapefile"
|
||||||
|
self.SCORE_SHP_FILE = self.SCORE_SHP_PATH / "usa.shp"
|
||||||
|
self.SCORE_SHP_CODE_CSV = self.SCORE_SHP_PATH / "columns.csv"
|
||||||
|
|
||||||
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
|
self.SCORE_CSV_PATH = self.DATA_PATH / "score" / "csv"
|
||||||
self.TILE_SCORE_CSV = self.SCORE_CSV_PATH / "tiles" / "usa.csv"
|
self.TILE_SCORE_CSV = self.SCORE_CSV_PATH / "tiles" / "usa.csv"
|
||||||
|
|
||||||
|
@ -94,6 +98,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
fields = [self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME]
|
fields = [self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME]
|
||||||
self.geojson_usa_df = self.geojson_usa_df[fields]
|
self.geojson_usa_df = self.geojson_usa_df[fields]
|
||||||
|
|
||||||
|
# TODO update this join
|
||||||
logger.info("Merging and compressing score CSV with USA GeoJSON")
|
logger.info("Merging and compressing score CSV with USA GeoJSON")
|
||||||
self.geojson_score_usa_high = self.score_usa_df.merge(
|
self.geojson_score_usa_high = self.score_usa_df.merge(
|
||||||
self.geojson_usa_df, on=self.GEOID_FIELD_NAME, how="left"
|
self.geojson_usa_df, on=self.GEOID_FIELD_NAME, how="left"
|
||||||
|
@ -103,8 +108,6 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
self.geojson_score_usa_high, crs="EPSG:4326"
|
self.geojson_score_usa_high, crs="EPSG:4326"
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Columns: {self.geojson_score_usa_high.columns}")
|
|
||||||
|
|
||||||
usa_simplified = self.geojson_score_usa_high[
|
usa_simplified = self.geojson_score_usa_high[
|
||||||
[
|
[
|
||||||
self.GEOID_FIELD_NAME,
|
self.GEOID_FIELD_NAME,
|
||||||
|
@ -148,8 +151,9 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
|
|
||||||
# round to 2 decimals
|
# round to 2 decimals
|
||||||
decimals = pd.Series([2], index=[self.TARGET_SCORE_RENAME_TO])
|
self.geojson_score_usa_low = self.geojson_score_usa_low.round(
|
||||||
self.geojson_score_usa_low = self.geojson_score_usa_low.round(decimals)
|
{self.TARGET_SCORE_RENAME_TO: 2}
|
||||||
|
)
|
||||||
|
|
||||||
def _aggregate_to_tracts(
|
def _aggregate_to_tracts(
|
||||||
self, block_group_df: gpd.GeoDataFrame
|
self, block_group_df: gpd.GeoDataFrame
|
||||||
|
@ -221,10 +225,41 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
logger.info("Completed writing usa-low")
|
logger.info("Completed writing usa-low")
|
||||||
|
|
||||||
|
def write_esri_shapefile():
|
||||||
|
logger.info("Producing ESRI shapefiles")
|
||||||
|
# Note that esri shapefiles can't have long column names, so we borrow from the
|
||||||
|
# shorten some tile names (renaming map) and print out a codebook for the user
|
||||||
|
codebook = {}
|
||||||
|
renaming_map = {}
|
||||||
|
|
||||||
|
# allows us to quickly rename / describe columns
|
||||||
|
reversed_tiles = {
|
||||||
|
short: long
|
||||||
|
for long, short in constants.TILES_SCORE_COLUMNS.items()
|
||||||
|
}
|
||||||
|
for column in self.geojson_score_usa_high.columns:
|
||||||
|
# take first 10 characters, max due to ESRI constraints
|
||||||
|
new_col = column[:10]
|
||||||
|
codebook[new_col] = reversed_tiles.get(column, column)
|
||||||
|
if new_col != column:
|
||||||
|
renaming_map[column] = new_col
|
||||||
|
pd.Series(codebook).reset_index().rename(
|
||||||
|
# kept as strings because no downstream impacts
|
||||||
|
columns={0: "column", "index": "meaning"}
|
||||||
|
).to_csv(self.SCORE_SHP_CODE_CSV, index=False)
|
||||||
|
self.geojson_score_usa_high.rename(columns=renaming_map).to_file(
|
||||||
|
self.SCORE_SHP_FILE
|
||||||
|
)
|
||||||
|
logger.info("Completed writing shapefile")
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
futures = {
|
futures = {
|
||||||
executor.submit(task)
|
executor.submit(task)
|
||||||
for task in [write_high_to_file, write_low_to_file]
|
for task in [
|
||||||
|
write_high_to_file,
|
||||||
|
write_low_to_file,
|
||||||
|
write_esri_shapefile,
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
for fut in concurrent.futures.as_completed(futures):
|
for fut in concurrent.futures.as_completed(futures):
|
||||||
|
|
Loading…
Add table
Reference in a new issue