mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-08 18:44:18 -07:00
Generate Geo-aware scores for all zoom levels (#391)
* generate Geo-aware scores for all zoom levels * usa high progress * testing dissolve * checkpoint * changing type * removing breakpoint * validation notebooks * quick update * score validation * fixes for county merge * code completed
This commit is contained in:
parent
446c8d1f68
commit
b404fdcc43
14 changed files with 3023 additions and 270 deletions
|
@ -9,12 +9,16 @@ logger = get_module_logger(__name__)
|
|||
class CalEnviroScreenETL(ExtractTransformLoad):
|
||||
def __init__(self):
|
||||
self.CALENVIROSCREEN_FTP_URL = "https://justice40-data.s3.amazonaws.com/data-sources/CalEnviroScreen_4.0_2021.zip"
|
||||
self.CALENVIROSCREEN_CSV = self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
|
||||
self.CALENVIROSCREEN_CSV = (
|
||||
self.TMP_PATH / "CalEnviroScreen_4.0_2021.csv"
|
||||
)
|
||||
self.CSV_PATH = self.DATA_PATH / "dataset" / "calenviroscreen4"
|
||||
|
||||
# Definining some variable names
|
||||
self.CALENVIROSCREEN_SCORE_FIELD_NAME = "calenviroscreen_score"
|
||||
self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = "calenviroscreen_percentile"
|
||||
self.CALENVIROSCREEN_PERCENTILE_FIELD_NAME = (
|
||||
"calenviroscreen_percentile"
|
||||
)
|
||||
self.CALENVIROSCREEN_PRIORITY_COMMUNITY_FIELD_NAME = (
|
||||
"calenviroscreen_priority_community"
|
||||
)
|
||||
|
|
|
@ -2,6 +2,7 @@ import csv
|
|||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
import geopandas as gpd
|
||||
|
||||
from .etl_utils import get_state_fips_codes
|
||||
from utils import unzip_file_from_url, get_module_logger
|
||||
|
@ -11,7 +12,7 @@ logger = get_module_logger(__name__)
|
|||
|
||||
def download_census_csvs(data_path: Path) -> None:
|
||||
"""Download all census shape files from the Census FTP and extract the geojson
|
||||
to generate national and by state Census Block Group CSVs
|
||||
to generate national and by state Census Block Group CSVs and GeoJSONs
|
||||
|
||||
Args:
|
||||
data_path (pathlib.Path): Name of the directory where the files and directories will
|
||||
|
@ -108,4 +109,17 @@ def download_census_csvs(data_path: Path) -> None:
|
|||
]
|
||||
)
|
||||
|
||||
## create national geojson
|
||||
logger.info(f"Generating national geojson file")
|
||||
usa_df = gpd.GeoDataFrame()
|
||||
|
||||
for file_name in geojson_dir_path.rglob("*.json"):
|
||||
logger.info(f"Ingesting {file_name}")
|
||||
state_gdf = gpd.read_file(file_name)
|
||||
usa_df = usa_df.append(state_gdf)
|
||||
|
||||
usa_df = usa_df.to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")
|
||||
logger.info(f"Writing national geojson file")
|
||||
usa_df.to_file(geojson_dir_path / "us.json", driver="GeoJSON")
|
||||
|
||||
logger.info("Census block groups downloading complete")
|
||||
|
|
|
@ -106,3 +106,8 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.df[columns_to_include].to_csv(
|
||||
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
|
||||
)
|
||||
|
||||
def validate(self) -> None:
|
||||
logger.info(f"Validating Census ACS Data")
|
||||
|
||||
pass
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue