Merge pull request #1 from usds/main

Update 1 based on #885 changes
2025-08-23 02:41:40 -07:00 · 2021-11-16 12:08:38 -05:00 · 2021-11-16 12:08:38 -05:00 · 1e03e75ac4
commit 1e03e75ac4
parent 21834b4a91 0a21fc6b12
10 changed files with 49 additions and 57 deletions
--- a/README.md
+++ b/README.md
@ -27,7 +27,7 @@ We also recognize capacity building as a key part of involving a diverse open so
 Principles and guidelines for participating in our open source community are available [here](COMMUNITY_GUIDELINES.md). Please read them before joining or starting a conversation in this repo or one of the channels listed below. 
 ### Community Chats
-We host open source community chats every two weeks on Monday at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source).
+We host open source community chats every third Monday of the month at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source).
 Community members are welcome to share updates or propose topics for discussion in community chats. Please do so in the Google Group.
--- a/client/package-lock.json
+++ b/client/package-lock.json
@ -4814,12 +4814,12 @@
      "dev": true
    },
    "axios": {
-      "version": "0.21.1",
+      "version": "0.21.4",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz",
-      "integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==",
+      "integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==",
      "dev": true,
      "requires": {
-        "follow-redirects": "^1.10.0"
+        "follow-redirects": "^1.14.0"
      }
    },
    "axobject-query": {
@ -16733,9 +16733,9 @@
      }
    },
    "nth-check": {
-      "version": "2.0.0",
+      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.0.tgz",
+      "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.1.tgz",
-      "integrity": "sha512-i4sc/Kj8htBrAiH1viZ0TgU8Y5XqCaV/FziYK6TBczxmeKm3AEFWqqF3195yKudrarqy7Zu80Ra5dobFjn9X/Q==",
+      "integrity": "sha512-it1vE95zF6dTT9lBsYbxvqh0Soy4SPowchj0UBGj/V6cTPnXXtQOPUbhZ6CmGzAD/rW22LQK6E96pcdJXk4A4w==",
      "dev": true,
      "requires": {
        "boolbase": "^1.0.0"
@ -21780,9 +21780,9 @@
      }
    },
    "tmpl": {
-      "version": "1.0.4",
+      "version": "1.0.5",
-      "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.4.tgz",
+      "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
-      "integrity": "sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE=",
+      "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
      "dev": true
    },
    "to-arraybuffer": {
@ -22526,9 +22526,9 @@
      }
    },
    "url-parse": {
-      "version": "1.5.1",
+      "version": "1.5.3",
-      "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.1.tgz",
+      "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.3.tgz",
-      "integrity": "sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q==",
+      "integrity": "sha512-IIORyIQD9rvj0A4CLWsHkBBJuNqWpFQe224b6j9t/ABmquIS0qDU2pY6kl6AuOrL5OkCXHMCFNe1jBcuAggjvQ==",
      "dev": true,
      "requires": {
        "querystringify": "^2.1.1",
--- a/data/data-pipeline/data_pipeline/etl/base.py
+++ b/data/data-pipeline/data_pipeline/etl/base.py
@ -33,7 +33,7 @@ class ExtractTransformLoad:
    GEOID_FIELD_NAME: str = "GEOID10"
    GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
    # TODO: investigate. Census says there are only 217,740 CBGs in the US. This might be from CBGs at different time periods.
-    EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 220405
+    EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 250000
    EXPECTED_MAX_CENSUS_TRACTS: int = 73076
    def __init__(self, config_path: Path) -> None:
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@ -291,7 +291,6 @@ class ScoreETL(ExtractTransformLoad):
            field_names.LIFE_EXPECTANCY_FIELD,
            field_names.ENERGY_BURDEN_FIELD,
            field_names.FEMA_RISK_FIELD,
            field_names.FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD,
            field_names.URBAN_HERUISTIC_FIELD,
            field_names.AIR_TOXICS_CANCER_RISK_FIELD,
            field_names.RESPITORY_HAZARD_FIELD,
--- a/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census/etl_utils.py
@ -16,13 +16,17 @@ from data_pipeline.utils import (
 logger = get_module_logger(__name__)
-def reset_data_directories(data_path: Path) -> None:
+def reset_data_directories(
    data_path: Path,
 ) -> None:
    """Empties all census folders"""
    census_data_path = data_path / "census"
    # csv
    csv_path = census_data_path / "csv"
-    remove_files_from_dir(csv_path, ".csv")
+    remove_files_from_dir(
        csv_path, ".csv", exception_list=["fips_states_2010.csv"]
    )
    # geojson
    geojson_path = census_data_path / "geojson"
--- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py
@ -72,8 +72,8 @@ class CensusACSETL(ExtractTransformLoad):
                f"Downloading data for state/territory with FIPS code {fips}"
            )
-            dfs.append(
+            try:
-                censusdata.download(
+                response = censusdata.download(
                    src="acs5",
                    year=self.ACS_YEAR,
                    geo=censusdata.censusgeo(
@ -91,7 +91,12 @@ class CensusACSETL(ExtractTransformLoad):
                    + self.LINGUISTIC_ISOLATION_FIELDS
                    + self.POVERTY_FIELDS,
                )
-            )
+            except ValueError:
                logger.error(
                    f"Could not download data for state/territory with FIPS code {fips}"
                )
            dfs.append(response)
        self.df = pd.concat(dfs)
--- a/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/housing_and_transportation/etl.py
@ -1,4 +1,5 @@
 import pandas as pd
 from pandas.errors import EmptyDataError
 from data_pipeline.etl.base import ExtractTransformLoad
 from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
@ -26,10 +27,6 @@ class HousingTransportationETL(ExtractTransformLoad):
                f"Downloading housing data for state/territory with FIPS code {fips}"
            )
            # Puerto Rico has no data, so skip
            if fips == "72":
                continue
            unzip_file_from_url(
                f"{self.HOUSING_FTP_URL}{fips}", self.TMP_PATH, zip_file_dir
            )
@ -38,7 +35,13 @@ class HousingTransportationETL(ExtractTransformLoad):
            tmp_csv_file_path = (
                zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
            )
-            tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
+
            try:
                tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
            except EmptyDataError:
                logger.error(
                    f"Could not read Housing and Transportation data for state/territory with FIPS code {fips}"
                )
            dfs.append(tmp_df)
--- a/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
+++ b/data/data-pipeline/data_pipeline/ipython/scoring_comparison.ipynb
@ -320,28 +320,6 @@
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b74b0bf",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Create a FEMA risk index score\n",
    "# Note: this can be deleted at a later date.\n",
    "FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (\n",
    "    \"FEMA Risk Index Expected Annual Loss Rate\"\n",
    ")\n",
    "FEMA_COMMUNITIES = \"FEMA Risk Index (top 30th percentile)\"\n",
    "merged_df[FEMA_COMMUNITIES] = (\n",
    "    merged_df[f\"{FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD} (percentile)\"] > 0.70\n",
    ")\n",
    "\n",
    "merged_df[FEMA_COMMUNITIES].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@ -57,9 +57,6 @@ AMI_FIELD = "Area Median Income (State or metropolitan)"
 # Climate
 FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
 FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (
    "FEMA Risk Index Expected Annual Loss Rate"
 )
 EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
    "Expected building loss rate (Natural Hazards Risk Index)"
 )
--- a/data/data-pipeline/data_pipeline/utils.py
+++ b/data/data-pipeline/data_pipeline/utils.py
@ -46,25 +46,31 @@ def get_module_logger(module_name: str) -> logging.Logger:
 logger = get_module_logger(__name__)
-def remove_files_from_dir(files_path: Path, extension: str = None) -> None:
+def remove_files_from_dir(
    files_path: Path, extension: str = None, exception_list: list = None
 ) -> None:
    """Removes all files from a specific directory with the exception of __init__.py
    files or files with a specific extension
    Args:
        files_path (pathlib.Path): Name of the directory where the files will be deleted
        extension (str): Extension of the file pattern to delete, example "json" (optional)
        exception_list (list): List of files to not remove (optional)
    Returns:
        None
    """
    for file in os.listdir(files_path):
-        if extension:
+        # don't rempove __init__ files as they conserve dir structure
-            if not file.endswith(extension):
+        if file == "__init__.py":
            continue
        if exception_list:
            if file in exception_list:
                continue
-        else:
+        elif extension:
-            # don't rempove __init__ files as they conserve dir structure
+            if not file.endswith(extension):
            if file == "__init__.py":
                continue
        os.remove(files_path / file)
        logger.info(f"Removing {file}")