diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md index e55fa2c8..ca8333f5 100644 --- a/data/data-pipeline/README.md +++ b/data/data-pipeline/README.md @@ -128,6 +128,8 @@ Here's a list of commands: - Generate census data: `docker exec j40_data_pipeline_1 python3 application.py census-data-download"` - Run all ETL processes: `docker exec j40_data_pipeline_1 python3 application.py etl-run"` - Generate Score: `docker exec j40_data_pipeline_1 python3 application.py score-run"` +- Generate Score with Geojson and high and low versions: `docker exec j40_data_pipeline_1 python3 application.py geo-score` +- Generate Map Tiles: `docker exec j40_data_pipeline_1 python3 application.py generate-map-tiles` ## Local development @@ -154,15 +156,18 @@ You can run the Python code locally without Docker to develop, using Poetry. How - Then run `poetry run python application.py census-data-download` Note: Census files are not kept in the repository and the download directories are ignored by Git -### Generating mbtiles +### Generating Map Tiles -- TBD +- Make sure you have Docker running in your machine +- Start a terminal +- Change to this directory (i.e. `cd data/data-pipeline`) +- Then run `poetry run python application.py generate-map-tiles` ### Serve the map locally - Start a terminal - Change to this directory (i.e. `cd data/data-pipeline`) -- Run: `docker run --rm -it -v ${PWD}/data/tiles:/data -p 8080:80 maptiler/tileserver-gl` +- For USA high zoom: `docker run --rm -it -v ${PWD}/data/score/tiles/high:/data -p 8080:80 maptiler/tileserver-gl` ### Running Jupyter notebooks diff --git a/data/data-pipeline/application.py b/data/data-pipeline/application.py index 3d185c57..426fd9db 100644 --- a/data/data-pipeline/application.py +++ b/data/data-pipeline/application.py @@ -10,6 +10,7 @@ from utils import ( from etl.sources.census.etl import download_census_csvs from etl.sources.census.etl_utils import reset_data_directories as census_reset from etl.runner import etl_runner, score_generate, score_geo +from tile.generate import generate_tiles logger = get_module_logger(__name__) @@ -85,6 +86,7 @@ def etl_run(dataset: str): ) def score_run(): """CLI command to generate the score""" + score_generate() @@ -93,8 +95,19 @@ def score_run(): ) def geo_score(): """CLI command to generate the score""" + score_geo() +@cli.command( + help="Generate map tiles", +) +def generate_map_tiles(): + """CLI command to generate the map tiles""" + + data_path = settings.APP_ROOT / "data" + generate_tiles(data_path) + + if __name__ == "__main__": cli() diff --git a/data/data-pipeline/data/score/tiles/__init__.py b/data/data-pipeline/data/score/tiles/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data/data-pipeline/tile/generate.py b/data/data-pipeline/tile/generate.py index 8c6964fc..788bb1c0 100644 --- a/data/data-pipeline/tile/generate.py +++ b/data/data-pipeline/tile/generate.py @@ -1,63 +1,60 @@ -import os from pathlib import Path -import shutil +import os +from subprocess import call -from etl.sources.census.etl_utils import get_state_fips_codes +from utils import remove_all_from_dir +from utils import get_module_logger + +logger = get_module_logger(__name__) def generate_tiles(data_path: Path) -> None: - # remove existing mbtiles file - mb_tiles_path = data_path / "tiles" / "block2010.mbtiles" - if os.path.exists(mb_tiles_path): - os.remove(mb_tiles_path) - - # remove existing mvt directory - mvt_tiles_path = data_path / "tiles" / "mvt" - if os.path.exists(mvt_tiles_path): - shutil.rmtree(mvt_tiles_path) - - # remove existing score json files + score_tiles_path = data_path / "score" / "tiles" + high_tile_path = score_tiles_path / "high" + low_tile_path = score_tiles_path / "low" score_geojson_dir = data_path / "score" / "geojson" - files_in_directory = os.listdir(score_geojson_dir) - filtered_files = [file for file in files_in_directory if file.endswith(".json")] - for file in filtered_files: - path_to_file = os.path.join(score_geojson_dir, file) - os.remove(path_to_file) - # join the state shape sqllite with the score csv - state_fips_codes = get_state_fips_codes() - for fips in state_fips_codes: - cmd = ( - "ogr2ogr -f GeoJSON " - + f"-sql \"SELECT * FROM tl_2010_{fips}_bg10 LEFT JOIN 'data/score/csv/data{fips}.csv'.data{fips} ON tl_2010_{fips}_bg10.GEOID10 = data{fips}.ID\" " - + f"data/score/geojson/{fips}.json data/census/shp/{fips}/tl_2010_{fips}_bg10.dbf" - ) - os.system(cmd) + USA_HIGH_MIN_ZOOM = 5 + USA_HIGH_MAX_ZOOM = 11 + USA_LOW_MIN_ZOOM = 0 + USA_LOW_MAX_ZOOM = 7 - # get a list of all json files to plug in the docker commands below - # (workaround since *.json doesn't seem to work) - geojson_list = "" - geojson_path = data_path / "score" / "geojson" - for file in os.listdir(geojson_path): - if file.endswith(".json"): - geojson_list += f"data/score/geojson/{file} " + # remove existing mbtiles file + remove_all_from_dir(score_tiles_path) - if geojson_list == "": - logging.error( - "No GeoJson files found. Please run scripts/download_cbg.py first" - ) + # create dirs + os.mkdir(high_tile_path) + os.mkdir(low_tile_path) - # generate mbtiles file - cmd = ( - "tippecanoe --drop-densest-as-needed -zg -o /home/data/tiles/block2010.mbtiles --extend-zooms-if-still-dropping -l cbg2010 -s_srs EPSG:4269 -t_srs EPSG:4326 " - + geojson_list - ) - os.system(cmd) + # generate high mbtiles file + logger.info(f"Generating USA High mbtiles file") + cmd = "tippecanoe " + cmd += f"--minimum-zoom={USA_HIGH_MIN_ZOOM} --maximum-zoom={USA_HIGH_MAX_ZOOM} --layer=blocks " + cmd += f"--output={high_tile_path}/usa_high.mbtiles " + cmd += str(score_geojson_dir / "usa-high.json") + call(cmd, shell=True) - # generate mvts - cmd = ( - "tippecanoe --drop-densest-as-needed --no-tile-compression -zg -e /home/data/tiles/mvt " - + geojson_list - ) - os.system(cmd) + # generate high mvts + logger.info(f"Generating USA High mvt folders and files") + cmd = "tippecanoe " + cmd += f"--minimum-zoom={USA_HIGH_MIN_ZOOM} --maximum-zoom={USA_HIGH_MAX_ZOOM} --no-tile-compression " + cmd += f"--output-to-directory={high_tile_path} " + cmd += str(score_geojson_dir / "usa-high.json") + call(cmd, shell=True) + + # generate low mbtiles file + logger.info(f"Generating USA Low mbtiles file") + cmd = "tippecanoe " + cmd += f"--minimum-zoom={USA_LOW_MIN_ZOOM} --maximum-zoom={USA_LOW_MAX_ZOOM} --layer=blocks " + cmd += f"--output={low_tile_path}/usa_low.mbtiles " + cmd += str(score_geojson_dir / "usa-low.json") + call(cmd, shell=True) + + # generate low mvts + logger.info(f"Generating USA Low mvt folders and files") + cmd = "tippecanoe " + cmd += f"--minimum-zoom={USA_LOW_MIN_ZOOM} --maximum-zoom={USA_LOW_MAX_ZOOM} --no-tile-compression " + cmd += f"--output-to-directory={low_tile_path} " + cmd += str(score_geojson_dir / "usa-low.json") + call(cmd, shell=True)