From e8d64df510f06d01dd14d290fd00a73dea6ff837 Mon Sep 17 00:00:00 2001 From: Lucas Merrill Brown Date: Mon, 15 Nov 2021 11:06:44 -0500 Subject: [PATCH 1/7] Fixing missing FEMA fields (#892) --- .../data_pipeline/etl/score/etl_score.py | 1 - .../ipython/scoring_comparison.ipynb | 22 ------------------- .../data_pipeline/score/field_names.py | 3 --- 3 files changed, 26 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index d26c8995..505cba1e 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -291,7 +291,6 @@ class ScoreETL(ExtractTransformLoad): field_names.LIFE_EXPECTANCY_FIELD, field_names.ENERGY_BURDEN_FIELD, field_names.FEMA_RISK_FIELD, - field_names.FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD, field_names.URBAN_HERUISTIC_FIELD, field_names.AIR_TOXICS_CANCER_RISK_FIELD, field_names.RESPITORY_HAZARD_FIELD, diff --git a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb index b33ae10d..477d2820 100644 --- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb @@ -320,28 +320,6 @@ "# )" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "4b74b0bf", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Create a FEMA risk index score\n", - "# Note: this can be deleted at a later date.\n", - "FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (\n", - " \"FEMA Risk Index Expected Annual Loss Rate\"\n", - ")\n", - "FEMA_COMMUNITIES = \"FEMA Risk Index (top 30th percentile)\"\n", - "merged_df[FEMA_COMMUNITIES] = (\n", - " merged_df[f\"{FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD} (percentile)\"] > 0.70\n", - ")\n", - "\n", - "merged_df[FEMA_COMMUNITIES].describe()" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index a3a043ae..14220883 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -57,9 +57,6 @@ AMI_FIELD = "Area Median Income (State or metropolitan)" # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" -FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = ( - "FEMA Risk Index Expected Annual Loss Rate" -) EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = ( "Expected building loss rate (Natural Hazards Risk Index)" ) From f7e9d96e638caa9ed5f6babaabbd50bd6dc71641 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Nov 2021 10:01:55 -0500 Subject: [PATCH 2/7] Bump nth-check from 2.0.0 to 2.0.1 in /client (#881) Bumps [nth-check](https://github.com/fb55/nth-check) from 2.0.0 to 2.0.1. - [Release notes](https://github.com/fb55/nth-check/releases) - [Commits](https://github.com/fb55/nth-check/compare/v2.0.0...v2.0.1) --- updated-dependencies: - dependency-name: nth-check dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- client/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/package-lock.json b/client/package-lock.json index 5f9fd621..c54fb482 100644 --- a/client/package-lock.json +++ b/client/package-lock.json @@ -16733,9 +16733,9 @@ } }, "nth-check": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.0.tgz", - "integrity": "sha512-i4sc/Kj8htBrAiH1viZ0TgU8Y5XqCaV/FziYK6TBczxmeKm3AEFWqqF3195yKudrarqy7Zu80Ra5dobFjn9X/Q==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.1.tgz", + "integrity": "sha512-it1vE95zF6dTT9lBsYbxvqh0Soy4SPowchj0UBGj/V6cTPnXXtQOPUbhZ6CmGzAD/rW22LQK6E96pcdJXk4A4w==", "dev": true, "requires": { "boolbase": "^1.0.0" From f330b1f0621b4e016b5c8218cc5af3eede396fe2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Nov 2021 10:02:34 -0500 Subject: [PATCH 3/7] Bump tmpl from 1.0.4 to 1.0.5 in /client (#880) Bumps [tmpl](https://github.com/daaku/nodejs-tmpl) from 1.0.4 to 1.0.5. - [Release notes](https://github.com/daaku/nodejs-tmpl/releases) - [Commits](https://github.com/daaku/nodejs-tmpl/commits/v1.0.5) --- updated-dependencies: - dependency-name: tmpl dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- client/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/package-lock.json b/client/package-lock.json index c54fb482..36990b33 100644 --- a/client/package-lock.json +++ b/client/package-lock.json @@ -21780,9 +21780,9 @@ } }, "tmpl": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.4.tgz", - "integrity": "sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE=", + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", + "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==", "dev": true }, "to-arraybuffer": { From 4c2b7e101dc856ddafab1e15e8988126600b94ba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Nov 2021 10:03:18 -0500 Subject: [PATCH 4/7] Bump url-parse from 1.5.1 to 1.5.3 in /client (#878) Bumps [url-parse](https://github.com/unshiftio/url-parse) from 1.5.1 to 1.5.3. - [Release notes](https://github.com/unshiftio/url-parse/releases) - [Commits](https://github.com/unshiftio/url-parse/compare/1.5.1...1.5.3) --- updated-dependencies: - dependency-name: url-parse dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- client/package-lock.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/package-lock.json b/client/package-lock.json index 36990b33..844f4968 100644 --- a/client/package-lock.json +++ b/client/package-lock.json @@ -22526,9 +22526,9 @@ } }, "url-parse": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.1.tgz", - "integrity": "sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q==", + "version": "1.5.3", + "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.3.tgz", + "integrity": "sha512-IIORyIQD9rvj0A4CLWsHkBBJuNqWpFQe224b6j9t/ABmquIS0qDU2pY6kl6AuOrL5OkCXHMCFNe1jBcuAggjvQ==", "dev": true, "requires": { "querystringify": "^2.1.1", From 0b75be8ec09146089cfd2bcc258082af309df2ef Mon Sep 17 00:00:00 2001 From: Shelby Switzer Date: Tue, 16 Nov 2021 10:03:32 -0500 Subject: [PATCH 5/7] Update README.md (#868) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7b2c8808..f60ad927 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ We also recognize capacity building as a key part of involving a diverse open so Principles and guidelines for participating in our open source community are available [here](COMMUNITY_GUIDELINES.md). Please read them before joining or starting a conversation in this repo or one of the channels listed below. ### Community Chats -We host open source community chats every two weeks on Monday at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source). +We host open source community chats every third Monday of the month at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source). Community members are welcome to share updates or propose topics for discussion in community chats. Please do so in the Google Group. From f00cc5f7b2f83eae09176adc28741a92b7d98f7d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Nov 2021 10:04:02 -0500 Subject: [PATCH 6/7] Bump axios from 0.21.1 to 0.21.4 in /client (#876) Bumps [axios](https://github.com/axios/axios) from 0.21.1 to 0.21.4. - [Release notes](https://github.com/axios/axios/releases) - [Changelog](https://github.com/axios/axios/blob/master/CHANGELOG.md) - [Commits](https://github.com/axios/axios/compare/v0.21.1...v0.21.4) --- updated-dependencies: - dependency-name: axios dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- client/package-lock.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/client/package-lock.json b/client/package-lock.json index 844f4968..80b606e3 100644 --- a/client/package-lock.json +++ b/client/package-lock.json @@ -4814,12 +4814,12 @@ "dev": true }, "axios": { - "version": "0.21.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", - "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", + "version": "0.21.4", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz", + "integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==", "dev": true, "requires": { - "follow-redirects": "^1.10.0" + "follow-redirects": "^1.14.0" } }, "axobject-query": { From 0a21fc6b12b007f137b9bb63fe275e70d1001c42 Mon Sep 17 00:00:00 2001 From: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com> Date: Tue, 16 Nov 2021 10:05:09 -0500 Subject: [PATCH 7/7] Add territory boundary data (#885) * Add territory boundary data * housing and transp * lint * lint * lint --- data/data-pipeline/data_pipeline/etl/base.py | 2 +- .../etl/sources/census/etl_utils.py | 8 ++++++-- .../etl/sources/census_acs/etl.py | 11 ++++++++--- .../sources/housing_and_transportation/etl.py | 13 ++++++++----- data/data-pipeline/data_pipeline/utils.py | 18 ++++++++++++------ 5 files changed, 35 insertions(+), 17 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/base.py b/data/data-pipeline/data_pipeline/etl/base.py index 4936ab1f..ca636fa9 100644 --- a/data/data-pipeline/data_pipeline/etl/base.py +++ b/data/data-pipeline/data_pipeline/etl/base.py @@ -33,7 +33,7 @@ class ExtractTransformLoad: GEOID_FIELD_NAME: str = "GEOID10" GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT" # TODO: investigate. Census says there are only 217,740 CBGs in the US. This might be from CBGs at different time periods. - EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 220405 + EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 250000 EXPECTED_MAX_CENSUS_TRACTS: int = 73076 def __init__(self, config_path: Path) -> None: diff --git a/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py b/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py index 9ef74d71..d77d1874 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py @@ -16,13 +16,17 @@ from data_pipeline.utils import ( logger = get_module_logger(__name__) -def reset_data_directories(data_path: Path) -> None: +def reset_data_directories( + data_path: Path, +) -> None: """Empties all census folders""" census_data_path = data_path / "census" # csv csv_path = census_data_path / "csv" - remove_files_from_dir(csv_path, ".csv") + remove_files_from_dir( + csv_path, ".csv", exception_list=["fips_states_2010.csv"] + ) # geojson geojson_path = census_data_path / "geojson" diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index dda15015..79ba1258 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -72,8 +72,8 @@ class CensusACSETL(ExtractTransformLoad): f"Downloading data for state/territory with FIPS code {fips}" ) - dfs.append( - censusdata.download( + try: + response = censusdata.download( src="acs5", year=self.ACS_YEAR, geo=censusdata.censusgeo( @@ -91,7 +91,12 @@ class CensusACSETL(ExtractTransformLoad): + self.LINGUISTIC_ISOLATION_FIELDS + self.POVERTY_FIELDS, ) - ) + except ValueError: + logger.error( + f"Could not download data for state/territory with FIPS code {fips}" + ) + + dfs.append(response) self.df = pd.concat(dfs) diff --git a/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py b/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py index 6e054df6..9e8986a8 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py @@ -1,4 +1,5 @@ import pandas as pd +from pandas.errors import EmptyDataError from data_pipeline.etl.base import ExtractTransformLoad from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes @@ -26,10 +27,6 @@ class HousingTransportationETL(ExtractTransformLoad): f"Downloading housing data for state/territory with FIPS code {fips}" ) - # Puerto Rico has no data, so skip - if fips == "72": - continue - unzip_file_from_url( f"{self.HOUSING_FTP_URL}{fips}", self.TMP_PATH, zip_file_dir ) @@ -38,7 +35,13 @@ class HousingTransportationETL(ExtractTransformLoad): tmp_csv_file_path = ( zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv" ) - tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path) + + try: + tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path) + except EmptyDataError: + logger.error( + f"Could not read Housing and Transportation data for state/territory with FIPS code {fips}" + ) dfs.append(tmp_df) diff --git a/data/data-pipeline/data_pipeline/utils.py b/data/data-pipeline/data_pipeline/utils.py index 5edce8fa..58761a9f 100644 --- a/data/data-pipeline/data_pipeline/utils.py +++ b/data/data-pipeline/data_pipeline/utils.py @@ -46,25 +46,31 @@ def get_module_logger(module_name: str) -> logging.Logger: logger = get_module_logger(__name__) -def remove_files_from_dir(files_path: Path, extension: str = None) -> None: +def remove_files_from_dir( + files_path: Path, extension: str = None, exception_list: list = None +) -> None: """Removes all files from a specific directory with the exception of __init__.py files or files with a specific extension Args: files_path (pathlib.Path): Name of the directory where the files will be deleted extension (str): Extension of the file pattern to delete, example "json" (optional) + exception_list (list): List of files to not remove (optional) Returns: None """ for file in os.listdir(files_path): - if extension: - if not file.endswith(extension): + # don't rempove __init__ files as they conserve dir structure + if file == "__init__.py": + continue + + if exception_list: + if file in exception_list: continue - else: - # don't rempove __init__ files as they conserve dir structure - if file == "__init__.py": + elif extension: + if not file.endswith(extension): continue os.remove(files_path / file) logger.info(f"Removing {file}")