mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 10:04:18 -08:00
arcgis column name fix (#1581)
eliminates duplicate column and ensures all column names are unique.
This commit is contained in:
parent
fbd56e3bd5
commit
ae725f0a3e
3 changed files with 36 additions and 11 deletions
|
@ -246,7 +246,6 @@ TILES_SCORE_COLUMNS = {
|
||||||
field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD: "IA_LMI_ET",
|
field_names.ISLAND_LOW_MEDIAN_INCOME_PCTILE_THRESHOLD: "IA_LMI_ET",
|
||||||
field_names.ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD: "IA_UN_ET",
|
field_names.ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD: "IA_UN_ET",
|
||||||
field_names.ISLAND_POVERTY_PCTILE_THRESHOLD: "IA_POV_ET",
|
field_names.ISLAND_POVERTY_PCTILE_THRESHOLD: "IA_POV_ET",
|
||||||
field_names.FPL_200_SERIES: "FPL200S",
|
|
||||||
field_names.THRESHOLD_COUNT: "TC",
|
field_names.THRESHOLD_COUNT: "TC",
|
||||||
field_names.CATEGORY_COUNT: "CC",
|
field_names.CATEGORY_COUNT: "CC",
|
||||||
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
|
field_names.ISLAND_AREAS_UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD: "IAULHSE",
|
||||||
|
@ -269,7 +268,6 @@ TILES_SCORE_COLUMNS = {
|
||||||
field_names.COLLEGE_NON_ATTENDANCE_FIELD: "NCA",
|
field_names.COLLEGE_NON_ATTENDANCE_FIELD: "NCA",
|
||||||
# This is logically equivalent to "non-college greater than 80%"
|
# This is logically equivalent to "non-college greater than 80%"
|
||||||
field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD: "CA_LT20",
|
field_names.COLLEGE_ATTENDANCE_LESS_THAN_20_FIELD: "CA_LT20",
|
||||||
field_names.LOW_INCOME_THRESHOLD: "FPL200S",
|
|
||||||
# Booleans for the front end about the types of thresholds exceeded
|
# Booleans for the front end about the types of thresholds exceeded
|
||||||
field_names.CLIMATE_THRESHOLD_EXCEEDED: "M_CLT_EOMI",
|
field_names.CLIMATE_THRESHOLD_EXCEEDED: "M_CLT_EOMI",
|
||||||
field_names.ENERGY_THRESHOLD_EXCEEDED: "M_ENY_EOMI",
|
field_names.ENERGY_THRESHOLD_EXCEEDED: "M_ENY_EOMI",
|
||||||
|
@ -280,6 +278,8 @@ TILES_SCORE_COLUMNS = {
|
||||||
field_names.HEALTH_THRESHOLD_EXCEEDED: "M_HLTH_EOMI",
|
field_names.HEALTH_THRESHOLD_EXCEEDED: "M_HLTH_EOMI",
|
||||||
field_names.WORKFORCE_THRESHOLD_EXCEEDED: "M_WKFC_EOMI",
|
field_names.WORKFORCE_THRESHOLD_EXCEEDED: "M_WKFC_EOMI",
|
||||||
# These are the booleans for socioeconomic indicators
|
# These are the booleans for socioeconomic indicators
|
||||||
|
## this measures low income boolean
|
||||||
|
field_names.FPL_200_SERIES: "FPL200S",
|
||||||
## Low high school and low higher ed for t&wd
|
## Low high school and low higher ed for t&wd
|
||||||
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
|
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
|
||||||
## FPL 200 and low higher ed for all others
|
## FPL 200 and low higher ed for all others
|
||||||
|
|
|
@ -284,13 +284,21 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
def create_esri_codebook(codebook):
|
def create_esri_codebook(codebook):
|
||||||
"""temporary: helper to make a codebook for esri shapefile only"""
|
"""temporary: helper to make a codebook for esri shapefile only"""
|
||||||
|
|
||||||
|
shapefile_column_field = "shapefile_column"
|
||||||
|
internal_column_name_field = "column_name"
|
||||||
|
column_description_field = "column_description"
|
||||||
|
|
||||||
logger.info("Creating a codebook that uses the csv names")
|
logger.info("Creating a codebook that uses the csv names")
|
||||||
codebook = (
|
codebook = (
|
||||||
pd.Series(codebook)
|
pd.Series(codebook)
|
||||||
.reset_index()
|
.reset_index()
|
||||||
.rename(
|
.rename(
|
||||||
# kept as strings because no downstream impacts
|
# kept as strings because no downstream impacts
|
||||||
columns={0: "column_name", "index": "shapefile_column"}
|
columns={
|
||||||
|
0: internal_column_name_field,
|
||||||
|
"index": shapefile_column_field,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -304,10 +312,21 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
object_value="label",
|
object_value="label",
|
||||||
)
|
)
|
||||||
|
|
||||||
codebook["column_description"] = codebook["column_name"].map(
|
codebook[column_description_field] = codebook[
|
||||||
column_rename_dict
|
internal_column_name_field
|
||||||
|
].map(column_rename_dict)
|
||||||
|
|
||||||
|
codebook[
|
||||||
|
[
|
||||||
|
shapefile_column_field,
|
||||||
|
internal_column_name_field,
|
||||||
|
column_description_field,
|
||||||
|
]
|
||||||
|
].to_csv(
|
||||||
|
self.SCORE_SHP_CODE_CSV,
|
||||||
|
index=False,
|
||||||
)
|
)
|
||||||
codebook.to_csv(self.SCORE_SHP_CODE_CSV, index=False)
|
logger.info("Completed writing codebook")
|
||||||
|
|
||||||
def write_esri_shapefile():
|
def write_esri_shapefile():
|
||||||
logger.info("Producing ESRI shapefiles")
|
logger.info("Producing ESRI shapefiles")
|
||||||
|
@ -321,19 +340,25 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
short: long
|
short: long
|
||||||
for long, short in constants.TILES_SCORE_COLUMNS.items()
|
for long, short in constants.TILES_SCORE_COLUMNS.items()
|
||||||
}
|
}
|
||||||
for column in self.geojson_score_usa_high.columns:
|
|
||||||
# take first 10 characters, max due to ESRI constraints
|
for i, column in enumerate(self.geojson_score_usa_high.columns):
|
||||||
new_col = column[:10]
|
# take first 6 characters and add a number to ensure uniqueness
|
||||||
|
# this is the max due to esri (index can be 3-digits)
|
||||||
|
if len(column) > 10:
|
||||||
|
new_col = column[:6] + f"_{i}"
|
||||||
|
else:
|
||||||
|
new_col = column
|
||||||
codebook[new_col] = reversed_tiles.get(column, column)
|
codebook[new_col] = reversed_tiles.get(column, column)
|
||||||
if new_col != column:
|
if new_col != column:
|
||||||
renaming_map[column] = new_col
|
renaming_map[column] = new_col
|
||||||
|
|
||||||
create_esri_codebook(codebook)
|
|
||||||
|
|
||||||
self.geojson_score_usa_high.rename(columns=renaming_map).to_file(
|
self.geojson_score_usa_high.rename(columns=renaming_map).to_file(
|
||||||
self.SCORE_SHP_FILE
|
self.SCORE_SHP_FILE
|
||||||
)
|
)
|
||||||
logger.info("Completed writing shapefile")
|
logger.info("Completed writing shapefile")
|
||||||
|
|
||||||
|
create_esri_codebook(codebook)
|
||||||
|
|
||||||
arcgis_zip_file_path = self.SCORE_SHP_PATH / "usa.zip"
|
arcgis_zip_file_path = self.SCORE_SHP_PATH / "usa.zip"
|
||||||
arcgis_files = []
|
arcgis_files = []
|
||||||
for file in os.listdir(self.SCORE_SHP_PATH):
|
for file in os.listdir(self.SCORE_SHP_PATH):
|
||||||
|
|
Binary file not shown.
Loading…
Add table
Reference in a new issue