Merge pull request #1 from usds/main

Update 1 based on #885 changes
This commit is contained in:
Saran Ahluwalia 2021-11-16 12:08:38 -05:00 committed by GitHub
commit 1e03e75ac4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 49 additions and 57 deletions

View file

@ -27,7 +27,7 @@ We also recognize capacity building as a key part of involving a diverse open so
Principles and guidelines for participating in our open source community are available [here](COMMUNITY_GUIDELINES.md). Please read them before joining or starting a conversation in this repo or one of the channels listed below. Principles and guidelines for participating in our open source community are available [here](COMMUNITY_GUIDELINES.md). Please read them before joining or starting a conversation in this repo or one of the channels listed below.
### Community Chats ### Community Chats
We host open source community chats every two weeks on Monday at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source). We host open source community chats every third Monday of the month at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source).
Community members are welcome to share updates or propose topics for discussion in community chats. Please do so in the Google Group. Community members are welcome to share updates or propose topics for discussion in community chats. Please do so in the Google Group.

View file

@ -4814,12 +4814,12 @@
"dev": true "dev": true
}, },
"axios": { "axios": {
"version": "0.21.1", "version": "0.21.4",
"resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz", "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz",
"integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==", "integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==",
"dev": true, "dev": true,
"requires": { "requires": {
"follow-redirects": "^1.10.0" "follow-redirects": "^1.14.0"
} }
}, },
"axobject-query": { "axobject-query": {
@ -16733,9 +16733,9 @@
} }
}, },
"nth-check": { "nth-check": {
"version": "2.0.0", "version": "2.0.1",
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.0.tgz", "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.1.tgz",
"integrity": "sha512-i4sc/Kj8htBrAiH1viZ0TgU8Y5XqCaV/FziYK6TBczxmeKm3AEFWqqF3195yKudrarqy7Zu80Ra5dobFjn9X/Q==", "integrity": "sha512-it1vE95zF6dTT9lBsYbxvqh0Soy4SPowchj0UBGj/V6cTPnXXtQOPUbhZ6CmGzAD/rW22LQK6E96pcdJXk4A4w==",
"dev": true, "dev": true,
"requires": { "requires": {
"boolbase": "^1.0.0" "boolbase": "^1.0.0"
@ -21780,9 +21780,9 @@
} }
}, },
"tmpl": { "tmpl": {
"version": "1.0.4", "version": "1.0.5",
"resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.4.tgz", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
"integrity": "sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE=", "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
"dev": true "dev": true
}, },
"to-arraybuffer": { "to-arraybuffer": {
@ -22526,9 +22526,9 @@
} }
}, },
"url-parse": { "url-parse": {
"version": "1.5.1", "version": "1.5.3",
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.1.tgz", "resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.3.tgz",
"integrity": "sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q==", "integrity": "sha512-IIORyIQD9rvj0A4CLWsHkBBJuNqWpFQe224b6j9t/ABmquIS0qDU2pY6kl6AuOrL5OkCXHMCFNe1jBcuAggjvQ==",
"dev": true, "dev": true,
"requires": { "requires": {
"querystringify": "^2.1.1", "querystringify": "^2.1.1",

View file

@ -33,7 +33,7 @@ class ExtractTransformLoad:
GEOID_FIELD_NAME: str = "GEOID10" GEOID_FIELD_NAME: str = "GEOID10"
GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT" GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
# TODO: investigate. Census says there are only 217,740 CBGs in the US. This might be from CBGs at different time periods. # TODO: investigate. Census says there are only 217,740 CBGs in the US. This might be from CBGs at different time periods.
EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 220405 EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 250000
EXPECTED_MAX_CENSUS_TRACTS: int = 73076 EXPECTED_MAX_CENSUS_TRACTS: int = 73076
def __init__(self, config_path: Path) -> None: def __init__(self, config_path: Path) -> None:

View file

@ -291,7 +291,6 @@ class ScoreETL(ExtractTransformLoad):
field_names.LIFE_EXPECTANCY_FIELD, field_names.LIFE_EXPECTANCY_FIELD,
field_names.ENERGY_BURDEN_FIELD, field_names.ENERGY_BURDEN_FIELD,
field_names.FEMA_RISK_FIELD, field_names.FEMA_RISK_FIELD,
field_names.FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD,
field_names.URBAN_HERUISTIC_FIELD, field_names.URBAN_HERUISTIC_FIELD,
field_names.AIR_TOXICS_CANCER_RISK_FIELD, field_names.AIR_TOXICS_CANCER_RISK_FIELD,
field_names.RESPITORY_HAZARD_FIELD, field_names.RESPITORY_HAZARD_FIELD,

View file

@ -16,13 +16,17 @@ from data_pipeline.utils import (
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
def reset_data_directories(data_path: Path) -> None: def reset_data_directories(
data_path: Path,
) -> None:
"""Empties all census folders""" """Empties all census folders"""
census_data_path = data_path / "census" census_data_path = data_path / "census"
# csv # csv
csv_path = census_data_path / "csv" csv_path = census_data_path / "csv"
remove_files_from_dir(csv_path, ".csv") remove_files_from_dir(
csv_path, ".csv", exception_list=["fips_states_2010.csv"]
)
# geojson # geojson
geojson_path = census_data_path / "geojson" geojson_path = census_data_path / "geojson"

View file

@ -72,8 +72,8 @@ class CensusACSETL(ExtractTransformLoad):
f"Downloading data for state/territory with FIPS code {fips}" f"Downloading data for state/territory with FIPS code {fips}"
) )
dfs.append( try:
censusdata.download( response = censusdata.download(
src="acs5", src="acs5",
year=self.ACS_YEAR, year=self.ACS_YEAR,
geo=censusdata.censusgeo( geo=censusdata.censusgeo(
@ -91,8 +91,13 @@ class CensusACSETL(ExtractTransformLoad):
+ self.LINGUISTIC_ISOLATION_FIELDS + self.LINGUISTIC_ISOLATION_FIELDS
+ self.POVERTY_FIELDS, + self.POVERTY_FIELDS,
) )
except ValueError:
logger.error(
f"Could not download data for state/territory with FIPS code {fips}"
) )
dfs.append(response)
self.df = pd.concat(dfs) self.df = pd.concat(dfs)
self.df[self.GEOID_FIELD_NAME] = self.df.index.to_series().apply( self.df[self.GEOID_FIELD_NAME] = self.df.index.to_series().apply(

View file

@ -1,4 +1,5 @@
import pandas as pd import pandas as pd
from pandas.errors import EmptyDataError
from data_pipeline.etl.base import ExtractTransformLoad from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
@ -26,10 +27,6 @@ class HousingTransportationETL(ExtractTransformLoad):
f"Downloading housing data for state/territory with FIPS code {fips}" f"Downloading housing data for state/territory with FIPS code {fips}"
) )
# Puerto Rico has no data, so skip
if fips == "72":
continue
unzip_file_from_url( unzip_file_from_url(
f"{self.HOUSING_FTP_URL}{fips}", self.TMP_PATH, zip_file_dir f"{self.HOUSING_FTP_URL}{fips}", self.TMP_PATH, zip_file_dir
) )
@ -38,7 +35,13 @@ class HousingTransportationETL(ExtractTransformLoad):
tmp_csv_file_path = ( tmp_csv_file_path = (
zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv" zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
) )
try:
tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path) tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
except EmptyDataError:
logger.error(
f"Could not read Housing and Transportation data for state/territory with FIPS code {fips}"
)
dfs.append(tmp_df) dfs.append(tmp_df)

View file

@ -320,28 +320,6 @@
"# )" "# )"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "4b74b0bf",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Create a FEMA risk index score\n",
"# Note: this can be deleted at a later date.\n",
"FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (\n",
" \"FEMA Risk Index Expected Annual Loss Rate\"\n",
")\n",
"FEMA_COMMUNITIES = \"FEMA Risk Index (top 30th percentile)\"\n",
"merged_df[FEMA_COMMUNITIES] = (\n",
" merged_df[f\"{FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD} (percentile)\"] > 0.70\n",
")\n",
"\n",
"merged_df[FEMA_COMMUNITIES].describe()"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,

View file

@ -57,9 +57,6 @@ AMI_FIELD = "Area Median Income (State or metropolitan)"
# Climate # Climate
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (
"FEMA Risk Index Expected Annual Loss Rate"
)
EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = ( EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
"Expected building loss rate (Natural Hazards Risk Index)" "Expected building loss rate (Natural Hazards Risk Index)"
) )

View file

@ -46,26 +46,32 @@ def get_module_logger(module_name: str) -> logging.Logger:
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
def remove_files_from_dir(files_path: Path, extension: str = None) -> None: def remove_files_from_dir(
files_path: Path, extension: str = None, exception_list: list = None
) -> None:
"""Removes all files from a specific directory with the exception of __init__.py """Removes all files from a specific directory with the exception of __init__.py
files or files with a specific extension files or files with a specific extension
Args: Args:
files_path (pathlib.Path): Name of the directory where the files will be deleted files_path (pathlib.Path): Name of the directory where the files will be deleted
extension (str): Extension of the file pattern to delete, example "json" (optional) extension (str): Extension of the file pattern to delete, example "json" (optional)
exception_list (list): List of files to not remove (optional)
Returns: Returns:
None None
""" """
for file in os.listdir(files_path): for file in os.listdir(files_path):
if extension:
if not file.endswith(extension):
continue
else:
# don't rempove __init__ files as they conserve dir structure # don't rempove __init__ files as they conserve dir structure
if file == "__init__.py": if file == "__init__.py":
continue continue
if exception_list:
if file in exception_list:
continue
elif extension:
if not file.endswith(extension):
continue
os.remove(files_path / file) os.remove(files_path / file)
logger.info(f"Removing {file}") logger.info(f"Removing {file}")