mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-08-23 02:41:40 -07:00
commit
1e03e75ac4
10 changed files with 49 additions and 57 deletions
|
@ -27,7 +27,7 @@ We also recognize capacity building as a key part of involving a diverse open so
|
||||||
Principles and guidelines for participating in our open source community are available [here](COMMUNITY_GUIDELINES.md). Please read them before joining or starting a conversation in this repo or one of the channels listed below.
|
Principles and guidelines for participating in our open source community are available [here](COMMUNITY_GUIDELINES.md). Please read them before joining or starting a conversation in this repo or one of the channels listed below.
|
||||||
|
|
||||||
### Community Chats
|
### Community Chats
|
||||||
We host open source community chats every two weeks on Monday at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source).
|
We host open source community chats every third Monday of the month at 5-6pm ET. You can find information about the agenda and how to participate in our [Google Group](https://groups.google.com/u/4/g/justice40-open-source).
|
||||||
|
|
||||||
Community members are welcome to share updates or propose topics for discussion in community chats. Please do so in the Google Group.
|
Community members are welcome to share updates or propose topics for discussion in community chats. Please do so in the Google Group.
|
||||||
|
|
||||||
|
|
26
client/package-lock.json
generated
26
client/package-lock.json
generated
|
@ -4814,12 +4814,12 @@
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"axios": {
|
"axios": {
|
||||||
"version": "0.21.1",
|
"version": "0.21.4",
|
||||||
"resolved": "https://registry.npmjs.org/axios/-/axios-0.21.1.tgz",
|
"resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz",
|
||||||
"integrity": "sha512-dKQiRHxGD9PPRIUNIWvZhPTPpl1rf/OxTYKsqKUDjBwYylTvV7SjSHJb9ratfyzM6wCdLCOYLzs73qpg5c4iGA==",
|
"integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"follow-redirects": "^1.10.0"
|
"follow-redirects": "^1.14.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"axobject-query": {
|
"axobject-query": {
|
||||||
|
@ -16733,9 +16733,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nth-check": {
|
"nth-check": {
|
||||||
"version": "2.0.0",
|
"version": "2.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.0.1.tgz",
|
||||||
"integrity": "sha512-i4sc/Kj8htBrAiH1viZ0TgU8Y5XqCaV/FziYK6TBczxmeKm3AEFWqqF3195yKudrarqy7Zu80Ra5dobFjn9X/Q==",
|
"integrity": "sha512-it1vE95zF6dTT9lBsYbxvqh0Soy4SPowchj0UBGj/V6cTPnXXtQOPUbhZ6CmGzAD/rW22LQK6E96pcdJXk4A4w==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"boolbase": "^1.0.0"
|
"boolbase": "^1.0.0"
|
||||||
|
@ -21780,9 +21780,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"tmpl": {
|
"tmpl": {
|
||||||
"version": "1.0.4",
|
"version": "1.0.5",
|
||||||
"resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.4.tgz",
|
"resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
|
||||||
"integrity": "sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE=",
|
"integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==",
|
||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"to-arraybuffer": {
|
"to-arraybuffer": {
|
||||||
|
@ -22526,9 +22526,9 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"url-parse": {
|
"url-parse": {
|
||||||
"version": "1.5.1",
|
"version": "1.5.3",
|
||||||
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.1.tgz",
|
"resolved": "https://registry.npmjs.org/url-parse/-/url-parse-1.5.3.tgz",
|
||||||
"integrity": "sha512-HOfCOUJt7iSYzEx/UqgtwKRMC6EU91NFhsCHMv9oM03VJcVo2Qrp8T8kI9D7amFf1cu+/3CEhgb3rF9zL7k85Q==",
|
"integrity": "sha512-IIORyIQD9rvj0A4CLWsHkBBJuNqWpFQe224b6j9t/ABmquIS0qDU2pY6kl6AuOrL5OkCXHMCFNe1jBcuAggjvQ==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"querystringify": "^2.1.1",
|
"querystringify": "^2.1.1",
|
||||||
|
|
|
@ -33,7 +33,7 @@ class ExtractTransformLoad:
|
||||||
GEOID_FIELD_NAME: str = "GEOID10"
|
GEOID_FIELD_NAME: str = "GEOID10"
|
||||||
GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
|
GEOID_TRACT_FIELD_NAME: str = "GEOID10_TRACT"
|
||||||
# TODO: investigate. Census says there are only 217,740 CBGs in the US. This might be from CBGs at different time periods.
|
# TODO: investigate. Census says there are only 217,740 CBGs in the US. This might be from CBGs at different time periods.
|
||||||
EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 220405
|
EXPECTED_MAX_CENSUS_BLOCK_GROUPS: int = 250000
|
||||||
EXPECTED_MAX_CENSUS_TRACTS: int = 73076
|
EXPECTED_MAX_CENSUS_TRACTS: int = 73076
|
||||||
|
|
||||||
def __init__(self, config_path: Path) -> None:
|
def __init__(self, config_path: Path) -> None:
|
||||||
|
|
|
@ -291,7 +291,6 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
field_names.LIFE_EXPECTANCY_FIELD,
|
field_names.LIFE_EXPECTANCY_FIELD,
|
||||||
field_names.ENERGY_BURDEN_FIELD,
|
field_names.ENERGY_BURDEN_FIELD,
|
||||||
field_names.FEMA_RISK_FIELD,
|
field_names.FEMA_RISK_FIELD,
|
||||||
field_names.FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD,
|
|
||||||
field_names.URBAN_HERUISTIC_FIELD,
|
field_names.URBAN_HERUISTIC_FIELD,
|
||||||
field_names.AIR_TOXICS_CANCER_RISK_FIELD,
|
field_names.AIR_TOXICS_CANCER_RISK_FIELD,
|
||||||
field_names.RESPITORY_HAZARD_FIELD,
|
field_names.RESPITORY_HAZARD_FIELD,
|
||||||
|
|
|
@ -16,13 +16,17 @@ from data_pipeline.utils import (
|
||||||
logger = get_module_logger(__name__)
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def reset_data_directories(data_path: Path) -> None:
|
def reset_data_directories(
|
||||||
|
data_path: Path,
|
||||||
|
) -> None:
|
||||||
"""Empties all census folders"""
|
"""Empties all census folders"""
|
||||||
census_data_path = data_path / "census"
|
census_data_path = data_path / "census"
|
||||||
|
|
||||||
# csv
|
# csv
|
||||||
csv_path = census_data_path / "csv"
|
csv_path = census_data_path / "csv"
|
||||||
remove_files_from_dir(csv_path, ".csv")
|
remove_files_from_dir(
|
||||||
|
csv_path, ".csv", exception_list=["fips_states_2010.csv"]
|
||||||
|
)
|
||||||
|
|
||||||
# geojson
|
# geojson
|
||||||
geojson_path = census_data_path / "geojson"
|
geojson_path = census_data_path / "geojson"
|
||||||
|
|
|
@ -72,8 +72,8 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
f"Downloading data for state/territory with FIPS code {fips}"
|
f"Downloading data for state/territory with FIPS code {fips}"
|
||||||
)
|
)
|
||||||
|
|
||||||
dfs.append(
|
try:
|
||||||
censusdata.download(
|
response = censusdata.download(
|
||||||
src="acs5",
|
src="acs5",
|
||||||
year=self.ACS_YEAR,
|
year=self.ACS_YEAR,
|
||||||
geo=censusdata.censusgeo(
|
geo=censusdata.censusgeo(
|
||||||
|
@ -91,7 +91,12 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
+ self.LINGUISTIC_ISOLATION_FIELDS
|
+ self.LINGUISTIC_ISOLATION_FIELDS
|
||||||
+ self.POVERTY_FIELDS,
|
+ self.POVERTY_FIELDS,
|
||||||
)
|
)
|
||||||
)
|
except ValueError:
|
||||||
|
logger.error(
|
||||||
|
f"Could not download data for state/territory with FIPS code {fips}"
|
||||||
|
)
|
||||||
|
|
||||||
|
dfs.append(response)
|
||||||
|
|
||||||
self.df = pd.concat(dfs)
|
self.df = pd.concat(dfs)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from pandas.errors import EmptyDataError
|
||||||
|
|
||||||
from data_pipeline.etl.base import ExtractTransformLoad
|
from data_pipeline.etl.base import ExtractTransformLoad
|
||||||
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
|
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
|
||||||
|
@ -26,10 +27,6 @@ class HousingTransportationETL(ExtractTransformLoad):
|
||||||
f"Downloading housing data for state/territory with FIPS code {fips}"
|
f"Downloading housing data for state/territory with FIPS code {fips}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Puerto Rico has no data, so skip
|
|
||||||
if fips == "72":
|
|
||||||
continue
|
|
||||||
|
|
||||||
unzip_file_from_url(
|
unzip_file_from_url(
|
||||||
f"{self.HOUSING_FTP_URL}{fips}", self.TMP_PATH, zip_file_dir
|
f"{self.HOUSING_FTP_URL}{fips}", self.TMP_PATH, zip_file_dir
|
||||||
)
|
)
|
||||||
|
@ -38,7 +35,13 @@ class HousingTransportationETL(ExtractTransformLoad):
|
||||||
tmp_csv_file_path = (
|
tmp_csv_file_path = (
|
||||||
zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
|
zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
|
||||||
)
|
)
|
||||||
tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
|
|
||||||
|
try:
|
||||||
|
tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
|
||||||
|
except EmptyDataError:
|
||||||
|
logger.error(
|
||||||
|
f"Could not read Housing and Transportation data for state/territory with FIPS code {fips}"
|
||||||
|
)
|
||||||
|
|
||||||
dfs.append(tmp_df)
|
dfs.append(tmp_df)
|
||||||
|
|
||||||
|
|
|
@ -320,28 +320,6 @@
|
||||||
"# )"
|
"# )"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "4b74b0bf",
|
|
||||||
"metadata": {
|
|
||||||
"scrolled": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Create a FEMA risk index score\n",
|
|
||||||
"# Note: this can be deleted at a later date.\n",
|
|
||||||
"FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (\n",
|
|
||||||
" \"FEMA Risk Index Expected Annual Loss Rate\"\n",
|
|
||||||
")\n",
|
|
||||||
"FEMA_COMMUNITIES = \"FEMA Risk Index (top 30th percentile)\"\n",
|
|
||||||
"merged_df[FEMA_COMMUNITIES] = (\n",
|
|
||||||
" merged_df[f\"{FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD} (percentile)\"] > 0.70\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"merged_df[FEMA_COMMUNITIES].describe()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|
|
@ -57,9 +57,6 @@ AMI_FIELD = "Area Median Income (State or metropolitan)"
|
||||||
|
|
||||||
# Climate
|
# Climate
|
||||||
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
|
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
|
||||||
FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (
|
|
||||||
"FEMA Risk Index Expected Annual Loss Rate"
|
|
||||||
)
|
|
||||||
EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
|
EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME = (
|
||||||
"Expected building loss rate (Natural Hazards Risk Index)"
|
"Expected building loss rate (Natural Hazards Risk Index)"
|
||||||
)
|
)
|
||||||
|
|
|
@ -46,25 +46,31 @@ def get_module_logger(module_name: str) -> logging.Logger:
|
||||||
logger = get_module_logger(__name__)
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def remove_files_from_dir(files_path: Path, extension: str = None) -> None:
|
def remove_files_from_dir(
|
||||||
|
files_path: Path, extension: str = None, exception_list: list = None
|
||||||
|
) -> None:
|
||||||
"""Removes all files from a specific directory with the exception of __init__.py
|
"""Removes all files from a specific directory with the exception of __init__.py
|
||||||
files or files with a specific extension
|
files or files with a specific extension
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
files_path (pathlib.Path): Name of the directory where the files will be deleted
|
files_path (pathlib.Path): Name of the directory where the files will be deleted
|
||||||
extension (str): Extension of the file pattern to delete, example "json" (optional)
|
extension (str): Extension of the file pattern to delete, example "json" (optional)
|
||||||
|
exception_list (list): List of files to not remove (optional)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
None
|
None
|
||||||
|
|
||||||
"""
|
"""
|
||||||
for file in os.listdir(files_path):
|
for file in os.listdir(files_path):
|
||||||
if extension:
|
# don't rempove __init__ files as they conserve dir structure
|
||||||
if not file.endswith(extension):
|
if file == "__init__.py":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if exception_list:
|
||||||
|
if file in exception_list:
|
||||||
continue
|
continue
|
||||||
else:
|
elif extension:
|
||||||
# don't rempove __init__ files as they conserve dir structure
|
if not file.endswith(extension):
|
||||||
if file == "__init__.py":
|
|
||||||
continue
|
continue
|
||||||
os.remove(files_path / file)
|
os.remove(files_path / file)
|
||||||
logger.info(f"Removing {file}")
|
logger.info(f"Removing {file}")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue