Add territory boundary data (#885)

* Add territory boundary data

* housing and transp

* lint

* lint

* lint
This commit is contained in:
Jorge Escobar 2021-11-16 10:05:09 -05:00 committed by GitHub
commit 0a21fc6b12
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 35 additions and 17 deletions

View file

@ -16,13 +16,17 @@ from data_pipeline.utils import (
logger = get_module_logger(__name__)
def reset_data_directories(data_path: Path) -> None:
def reset_data_directories(
data_path: Path,
) -> None:
"""Empties all census folders"""
census_data_path = data_path / "census"
# csv
csv_path = census_data_path / "csv"
remove_files_from_dir(csv_path, ".csv")
remove_files_from_dir(
csv_path, ".csv", exception_list=["fips_states_2010.csv"]
)
# geojson
geojson_path = census_data_path / "geojson"

View file

@ -72,8 +72,8 @@ class CensusACSETL(ExtractTransformLoad):
f"Downloading data for state/territory with FIPS code {fips}"
)
dfs.append(
censusdata.download(
try:
response = censusdata.download(
src="acs5",
year=self.ACS_YEAR,
geo=censusdata.censusgeo(
@ -91,7 +91,12 @@ class CensusACSETL(ExtractTransformLoad):
+ self.LINGUISTIC_ISOLATION_FIELDS
+ self.POVERTY_FIELDS,
)
)
except ValueError:
logger.error(
f"Could not download data for state/territory with FIPS code {fips}"
)
dfs.append(response)
self.df = pd.concat(dfs)

View file

@ -1,4 +1,5 @@
import pandas as pd
from pandas.errors import EmptyDataError
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.sources.census.etl_utils import get_state_fips_codes
@ -26,10 +27,6 @@ class HousingTransportationETL(ExtractTransformLoad):
f"Downloading housing data for state/territory with FIPS code {fips}"
)
# Puerto Rico has no data, so skip
if fips == "72":
continue
unzip_file_from_url(
f"{self.HOUSING_FTP_URL}{fips}", self.TMP_PATH, zip_file_dir
)
@ -38,7 +35,13 @@ class HousingTransportationETL(ExtractTransformLoad):
tmp_csv_file_path = (
zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
)
tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
try:
tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)
except EmptyDataError:
logger.error(
f"Could not read Housing and Transportation data for state/territory with FIPS code {fips}"
)
dfs.append(tmp_df)