Merge pull request #1 from usds/main

Update 1 based on #885 changes
2025-10-17 06:11:27 -07:00 · 2021-11-16 12:08:38 -05:00 · 2021-11-16 12:08:38 -05:00 · 1e03e75ac4
commit 1e03e75ac4
parent 21834b4a91 0a21fc6b12
10 changed files with 49 additions and 57 deletions
--- a/README.md
+++ b/README.md
@ -27,7 +27,7 @@ We also recognize capacity building as a key part of involving a diverse open so
 Principles and guidelines for participating in our open source community are available [here](COMMUNITY_GUIDELINES.md). Please read them before joining or starting a conversation in this repo or one of the channels listed below. 

 ### Community Chats
-We host open source community chats every two weeks on Monday at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source).
+We host open source community chats every third Monday of the month at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source).

 Community members are welcome to share updates or propose topics for discussion in community chats. Please do so in the Google Group.

--- a/client/package-lock.json
+++ b/client/package-lock.json
@ -4814,12 +4814,12 @@
      "dev": true
    },
    "axios": {
-      "version": "0.21.1",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz",
-      "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==",
+      "version": "0.21.4",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz",
+      "integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==",
      "dev": true,
      "requires": {
-        "follow-redirects": "^1.10.0"
+        "follow-redirects": "^1.14.0"
      }
    },
    "axobject-query": {
@ -16733,9 +16733,9 @@
      }
    },
    "nth-check": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.0.tgz",
-      "integrity": "sha512-i4sc/Kj8htBrAiH1viZ0TgU8Y5XqCaV/FziYK6TBczxmeKm3AEFWqqF3195yKudrarqy7Zu80Ra5dobFjn9X/Q==",
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.1.tgz",
+      "integrity": "sha512-it1vE95zF6dTT9lBsYbxvqh0Soy4SPowchj0UBGj/V6cTPnXXtQOPUbhZ6CmGzAD/rW22LQK6E96pcdJXk4A4w==",
      "dev": true,
      "requires": {
        "boolbase": "^1.0.0"
@ -21780,9 +21780,9 @@
      }
    },
    "tmpl": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.4.tgz",
-      "integrity": "sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE=",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
+      "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
      "dev": true
    },
    "to-arraybuffer": {
@ -22526,9 +22526,9 @@
      }
    },
    "url-parse": {
-      "version": "1.5.1",
-      "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.1.tgz",
-      "integrity": "sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q==",
+      "version": "1.5.3",
+      "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.3.tgz",
+      "integrity": "sha512-IIORyIQD9rvj0A4CLWsHkBBJuNqWpFQe224b6j9t/ABmquIS0qDU2pY6kl6AuOrL5OkCXHMCFNe1jBcuAggjvQ==",
      "dev": true,
      "requires": {
        "querystringify": "^2.1.1",
--- a/data/data-pipeline/data_pipeline/etl/base.py
+++ b/data/data-pipeline/data_pipeline/etl/base.py
@ -33,7 +33,7 @@ class ExtractTransformLoad:
    GEOID_FIELD_NAME: str = "GEOID10"
    GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
    # TODO: investigate. Census says there are only 217,740 CBGs in the US. This might be from CBGs at different time periods.
-    EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 220405
+    EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 250000
    EXPECTED_MAX_CENSUS_TRACTS: int = 73076

    def __init__(self, config_path: Path) -> None:
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -291,7 +291,6 @@ class ScoreETL(ExtractTransformLoad):
            field_names.LIFE_EXPECTANCY_FIELD,
            field_names.ENERGY_BURDEN_FIELD,
            field_names.FEMA_RISK_FIELD,
-            field_names.FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD,
            field_names.URBAN_HERUISTIC_FIELD,
            field_names.AIR_TOXICS_CANCER_RISK_FIELD,
            field_names.RESPITORY_HAZARD_FIELD,
--- a/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py
@ -16,13 +16,17 @@ from data_pipeline.utils import (
 logger = get_module_logger(__name__)


-def reset_data_directories(data_path: Path) -> None:
+def reset_data_directories(
+    data_path: Path,
+) -> None:
    """Empties all census folders"""
    census_data_path = data_path / "census"

    # csv
    csv_path = census_data_path / "csv"
-    remove_files_from_dir(csv_path, ".csv")
+    remove_files_from_dir(
+        csv_path, ".csv", exception_list=["fips_states_2010.csv"]
+    )

    # geojson
    geojson_path = census_data_path / "geojson"
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
@ -72,8 +72,8 @@ class CensusACSETL(ExtractTransformLoad):
                f"Downloading data for state/territory with FIPS code {fips}"
            )

-            dfs.append(
-                censusdata.download(
+            try:
+                response = censusdata.download(
                    src="acs5",
                    year=self.ACS_YEAR,
                    geo=censusdata.censusgeo(
@ -91,8 +91,13 @@ class CensusACSETL(ExtractTransformLoad):
                    + self.LINGUISTIC_ISOLATION_FIELDS
                    + self.POVERTY_FIELDS,
                )
+            except ValueError:
+                logger.error(
+                    f"Could not download data for state/territory with FIPS code {fips}"
                )

+            dfs.append(response)
+
        self.df = pd.concat(dfs)

        self.df[self.GEOID_FIELD_NAME] = self.df.index.to_series().apply(
--- a/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py
@ -1,4 +1,5 @@
 import pandas as pd
+from pandas.errors import EmptyDataError

 from data_pipeline.etl.base import ExtractTransformLoad
 from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
@ -26,10 +27,6 @@ class HousingTransportationETL(ExtractTransformLoad):
                f"Downloading housing data for state/territory with FIPS code {fips}"
            )

-            # Puerto Rico has no data, so skip
-            if fips == "72":
-                continue
-
            unzip_file_from_url(
                f"{self.HOUSING_FTP_URL}{fips}", self.TMP_PATH, zip_file_dir
            )
@ -38,7 +35,13 @@ class HousingTransportationETL(ExtractTransformLoad):
            tmp_csv_file_path = (
                zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
            )
+
+            try:
                tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
+            except EmptyDataError:
+                logger.error(
+                    f"Could not read Housing and Transportation data for state/territory with FIPS code {fips}"
+                )

            dfs.append(tmp_df)

--- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
@ -320,28 +320,6 @@
    "# )"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4b74b0bf",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "# Create a FEMA risk index score\n",
-    "# Note: this can be deleted at a later date.\n",
-    "FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (\n",
-    "    \"FEMA Risk Index Expected Annual Loss Rate\"\n",
-    ")\n",
-    "FEMA_COMMUNITIES = \"FEMA Risk Index (top 30th percentile)\"\n",
-    "merged_df[FEMA_COMMUNITIES] = (\n",
-    "    merged_df[f\"{FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD} (percentile)\"] > 0.70\n",
-    ")\n",
-    "\n",
-    "merged_df[FEMA_COMMUNITIES].describe()"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@ -57,9 +57,6 @@ AMI_FIELD = "Area Median Income (State or metropolitan)"

 # Climate
 FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
-FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (
-    "FEMA Risk Index Expected Annual Loss Rate"
-)
 EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
    "Expected building loss rate (Natural Hazards Risk Index)"
 )
--- a/data/data-pipeline/data_pipeline/utils.py
+++ b/data/data-pipeline/data_pipeline/utils.py
@ -46,26 +46,32 @@ def get_module_logger(module_name: str) -> logging.Logger:
 logger = get_module_logger(__name__)


-def remove_files_from_dir(files_path: Path, extension: str = None) -> None:
+def remove_files_from_dir(
+    files_path: Path, extension: str = None, exception_list: list = None
+) -> None:
    """Removes all files from a specific directory with the exception of __init__.py
    files or files with a specific extension

    Args:
        files_path (pathlib.Path): Name of the directory where the files will be deleted
        extension (str): Extension of the file pattern to delete, example "json" (optional)
+        exception_list (list): List of files to not remove (optional)

    Returns:
        None

    """
    for file in os.listdir(files_path):
-        if extension:
-            if not file.endswith(extension):
-                continue
-        else:
        # don't rempove __init__ files as they conserve dir structure
        if file == "__init__.py":
            continue
+
+        if exception_list:
+            if file in exception_list:
+                continue
+        elif extension:
+            if not file.endswith(extension):
+                continue
        os.remove(files_path / file)
        logger.info(f"Removing {file}")