mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Issue 1910: Do not impute income for 0 population tracts (#1918)
* should be working, has unnecessary loggers * removing loggers and cleaning up * updating ejscreen tests * adding tests and responding to PR feedback * fixing broken smoke test * delete smoketest docs
This commit is contained in:
parent
9e85375d9b
commit
9fb9874a15
13 changed files with 150 additions and 75 deletions
|
@ -322,7 +322,9 @@ see [python-markdown docs](https://github.com/ipython-contrib/jupyter_contrib_nb
|
||||||
|
|
||||||
### Background
|
### Background
|
||||||
|
|
||||||
For this project, we make use of [pytest](https://docs.pytest.org/en/latest/) for testing purposes. To run tests, simply run `poetry run pytest` in this directory (i.e., `justice40-tool/data/data-pipeline`).
|
For this project, we make use of [pytest](https://docs.pytest.org/en/latest/) for testing purposes.
|
||||||
|
|
||||||
|
To run tests, simply run `poetry run pytest` in this directory (i.e., `justice40-tool/data/data-pipeline`).
|
||||||
|
|
||||||
Test data is configured via [fixtures](https://docs.pytest.org/en/latest/explanation/fixtures.html).
|
Test data is configured via [fixtures](https://docs.pytest.org/en/latest/explanation/fixtures.html).
|
||||||
|
|
||||||
|
|
|
@ -365,6 +365,9 @@ class ExtractTransformLoad:
|
||||||
f"No file found at `{output_file_path}`."
|
f"No file found at `{output_file_path}`."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Reading in CSV `{output_file_path}` for ETL of class `{cls}`."
|
||||||
|
)
|
||||||
output_df = pd.read_csv(
|
output_df = pd.read_csv(
|
||||||
output_file_path,
|
output_file_path,
|
||||||
dtype={
|
dtype={
|
||||||
|
|
|
@ -5,6 +5,7 @@ import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from data_pipeline.etl.base import ExtractTransformLoad
|
from data_pipeline.etl.base import ExtractTransformLoad
|
||||||
|
from data_pipeline.etl.sources.census_acs.etl import CensusACSETL
|
||||||
from data_pipeline.etl.sources.national_risk_index.etl import (
|
from data_pipeline.etl.sources.national_risk_index.etl import (
|
||||||
NationalRiskIndexETL,
|
NationalRiskIndexETL,
|
||||||
)
|
)
|
||||||
|
@ -35,7 +36,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
# dataframes
|
# dataframes
|
||||||
self.df: pd.DataFrame
|
self.df: pd.DataFrame
|
||||||
self.ejscreen_df: pd.DataFrame
|
self.ejscreen_df: pd.DataFrame
|
||||||
self.census_df: pd.DataFrame
|
self.census_acs_df: pd.DataFrame
|
||||||
self.hud_housing_df: pd.DataFrame
|
self.hud_housing_df: pd.DataFrame
|
||||||
self.cdc_places_df: pd.DataFrame
|
self.cdc_places_df: pd.DataFrame
|
||||||
self.census_acs_median_incomes_df: pd.DataFrame
|
self.census_acs_median_incomes_df: pd.DataFrame
|
||||||
|
@ -67,14 +68,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Load census data
|
# Load census data
|
||||||
census_csv = (
|
self.census_acs_df = CensusACSETL.get_data_frame()
|
||||||
constants.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
|
|
||||||
)
|
|
||||||
self.census_df = pd.read_csv(
|
|
||||||
census_csv,
|
|
||||||
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
|
|
||||||
low_memory=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Load HUD housing data
|
# Load HUD housing data
|
||||||
hud_housing_csv = (
|
hud_housing_csv = (
|
||||||
|
@ -346,7 +340,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
|
|
||||||
# Join all the data sources that use census tracts
|
# Join all the data sources that use census tracts
|
||||||
census_tract_dfs = [
|
census_tract_dfs = [
|
||||||
self.census_df,
|
self.census_acs_df,
|
||||||
self.hud_housing_df,
|
self.hud_housing_df,
|
||||||
self.cdc_places_df,
|
self.cdc_places_df,
|
||||||
self.cdc_life_expectancy_df,
|
self.cdc_life_expectancy_df,
|
||||||
|
@ -364,7 +358,7 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
self.nature_deprived_df,
|
self.nature_deprived_df,
|
||||||
self.eamlis_df,
|
self.eamlis_df,
|
||||||
self.fuds_df,
|
self.fuds_df,
|
||||||
self.tribal_overlap_df
|
self.tribal_overlap_df,
|
||||||
]
|
]
|
||||||
|
|
||||||
# Sanity check each data frame before merging.
|
# Sanity check each data frame before merging.
|
||||||
|
|
|
@ -73,8 +73,7 @@ class CDCLifeExpectancy(ExtractTransformLoad):
|
||||||
|
|
||||||
all_usa_raw_df = self._download_and_prep_data(
|
all_usa_raw_df = self._download_and_prep_data(
|
||||||
file_url=self.USA_FILE_URL,
|
file_url=self.USA_FILE_URL,
|
||||||
download_file_name=self.get_tmp_path()
|
download_file_name=self.get_tmp_path() / "US_A.CSV",
|
||||||
/ "US_A.CSV",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check which states are missing
|
# Check which states are missing
|
||||||
|
@ -94,15 +93,13 @@ class CDCLifeExpectancy(ExtractTransformLoad):
|
||||||
logger.info("Downloading data for Maine")
|
logger.info("Downloading data for Maine")
|
||||||
maine_raw_df = self._download_and_prep_data(
|
maine_raw_df = self._download_and_prep_data(
|
||||||
file_url=self.MAINE_FILE_URL,
|
file_url=self.MAINE_FILE_URL,
|
||||||
download_file_name=self.get_tmp_path()
|
download_file_name=self.get_tmp_path() / "maine.csv",
|
||||||
/ "maine.csv",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Downloading data for Wisconsin")
|
logger.info("Downloading data for Wisconsin")
|
||||||
wisconsin_raw_df = self._download_and_prep_data(
|
wisconsin_raw_df = self._download_and_prep_data(
|
||||||
file_url=self.WISCONSIN_FILE_URL,
|
file_url=self.WISCONSIN_FILE_URL,
|
||||||
download_file_name=self.get_tmp_path()
|
download_file_name=self.get_tmp_path() / "wisconsin.csv",
|
||||||
/ "wisconsin.csv",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
combined_df = pd.concat(
|
combined_df = pd.concat(
|
||||||
|
|
|
@ -23,12 +23,11 @@ CENSUS_DATA_S3_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/census.zip"
|
||||||
|
|
||||||
|
|
||||||
class CensusACSETL(ExtractTransformLoad):
|
class CensusACSETL(ExtractTransformLoad):
|
||||||
def __init__(self):
|
NAME = "census_acs"
|
||||||
self.ACS_YEAR = 2019
|
ACS_YEAR = 2019
|
||||||
self.OUTPUT_PATH = (
|
MINIMUM_POPULATION_REQUIRED_FOR_IMPUTATION = 1
|
||||||
self.DATA_PATH / "dataset" / f"census_acs_{self.ACS_YEAR}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
self.TOTAL_UNEMPLOYED_FIELD = "B23025_005E"
|
self.TOTAL_UNEMPLOYED_FIELD = "B23025_005E"
|
||||||
self.TOTAL_IN_LABOR_FORCE = "B23025_003E"
|
self.TOTAL_IN_LABOR_FORCE = "B23025_003E"
|
||||||
self.EMPLOYMENT_FIELDS = [
|
self.EMPLOYMENT_FIELDS = [
|
||||||
|
@ -216,8 +215,15 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
self.OTHER_RACE_FIELD_NAME,
|
self.OTHER_RACE_FIELD_NAME,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Note: this field does double-duty here. It's used as the total population
|
||||||
|
# within the age questions.
|
||||||
|
# It's also what EJScreen used as their variable for total population in the
|
||||||
|
# census tract, so we use it similarly.
|
||||||
|
# See p. 83 of https://www.epa.gov/sites/default/files/2021-04/documents/ejscreen_technical_document.pdf
|
||||||
|
self.TOTAL_POPULATION_FROM_AGE_TABLE = "B01001_001E" # Estimate!!Total:
|
||||||
|
|
||||||
self.AGE_INPUT_FIELDS = [
|
self.AGE_INPUT_FIELDS = [
|
||||||
"B01001_001E", # Estimate!!Total:
|
self.TOTAL_POPULATION_FROM_AGE_TABLE,
|
||||||
"B01001_003E", # Estimate!!Total:!!Male:!!Under 5 years
|
"B01001_003E", # Estimate!!Total:!!Male:!!Under 5 years
|
||||||
"B01001_004E", # Estimate!!Total:!!Male:!!5 to 9 years
|
"B01001_004E", # Estimate!!Total:!!Male:!!5 to 9 years
|
||||||
"B01001_005E", # Estimate!!Total:!!Male:!!10 to 14 years
|
"B01001_005E", # Estimate!!Total:!!Male:!!10 to 14 years
|
||||||
|
@ -277,6 +283,7 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
self.COLUMNS_TO_KEEP = (
|
self.COLUMNS_TO_KEEP = (
|
||||||
[
|
[
|
||||||
self.GEOID_TRACT_FIELD_NAME,
|
self.GEOID_TRACT_FIELD_NAME,
|
||||||
|
field_names.TOTAL_POP_FIELD,
|
||||||
self.UNEMPLOYED_FIELD_NAME,
|
self.UNEMPLOYED_FIELD_NAME,
|
||||||
self.LINGUISTIC_ISOLATION_FIELD_NAME,
|
self.LINGUISTIC_ISOLATION_FIELD_NAME,
|
||||||
self.MEDIAN_INCOME_FIELD_NAME,
|
self.MEDIAN_INCOME_FIELD_NAME,
|
||||||
|
@ -375,18 +382,22 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
|
|
||||||
geo_df = gpd.read_file(
|
geo_df = gpd.read_file(
|
||||||
self.DATA_PATH / "census" / "geojson" / "us.json"
|
self.DATA_PATH / "census" / "geojson" / "us.json",
|
||||||
)
|
)
|
||||||
|
|
||||||
df = self._merge_geojson(
|
df = self._merge_geojson(
|
||||||
df=df,
|
df=df,
|
||||||
usa_geo_df=geo_df,
|
usa_geo_df=geo_df,
|
||||||
)
|
)
|
||||||
# Rename two fields.
|
|
||||||
|
# Rename some fields.
|
||||||
df = df.rename(
|
df = df.rename(
|
||||||
columns={
|
columns={
|
||||||
self.MEDIAN_HOUSE_VALUE_FIELD: self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
self.MEDIAN_HOUSE_VALUE_FIELD: self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||||
self.MEDIAN_INCOME_FIELD: self.MEDIAN_INCOME_FIELD_NAME,
|
self.MEDIAN_INCOME_FIELD: self.MEDIAN_INCOME_FIELD_NAME,
|
||||||
}
|
self.TOTAL_POPULATION_FROM_AGE_TABLE: field_names.TOTAL_POP_FIELD,
|
||||||
|
},
|
||||||
|
errors="raise",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Handle null values for various fields, which are `-666666666`.
|
# Handle null values for various fields, which are `-666666666`.
|
||||||
|
@ -472,7 +483,6 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate some demographic information.
|
# Calculate some demographic information.
|
||||||
|
|
||||||
df = df.rename(
|
df = df.rename(
|
||||||
columns={
|
columns={
|
||||||
"B02001_003E": self.BLACK_FIELD_NAME,
|
"B02001_003E": self.BLACK_FIELD_NAME,
|
||||||
|
@ -560,14 +570,11 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Calculate age groups
|
|
||||||
total_population_age_series = df["B01001_001E"]
|
|
||||||
|
|
||||||
# For each age bucket, sum the relevant columns and calculate the total
|
# For each age bucket, sum the relevant columns and calculate the total
|
||||||
# percentage.
|
# percentage.
|
||||||
for age_bucket, sum_columns in age_bucket_and_its_sum_columns:
|
for age_bucket, sum_columns in age_bucket_and_its_sum_columns:
|
||||||
df[age_bucket] = (
|
df[age_bucket] = (
|
||||||
df[sum_columns].sum(axis=1) / total_population_age_series
|
df[sum_columns].sum(axis=1) / df[field_names.TOTAL_POP_FIELD]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate college attendance and adjust low income
|
# Calculate college attendance and adjust low income
|
||||||
|
@ -602,6 +609,7 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
],
|
],
|
||||||
geo_df=df,
|
geo_df=df,
|
||||||
geoid_field=self.GEOID_TRACT_FIELD_NAME,
|
geoid_field=self.GEOID_TRACT_FIELD_NAME,
|
||||||
|
minimum_population_required_for_imputation=self.MINIMUM_POPULATION_REQUIRED_FOR_IMPUTATION,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Calculating with imputed values")
|
logger.info("Calculating with imputed values")
|
||||||
|
@ -615,13 +623,20 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
- df[self.COLLEGE_ATTENDANCE_FIELD].fillna(
|
- df[self.COLLEGE_ATTENDANCE_FIELD].fillna(
|
||||||
df[self.IMPUTED_COLLEGE_ATTENDANCE_FIELD]
|
df[self.IMPUTED_COLLEGE_ATTENDANCE_FIELD]
|
||||||
)
|
)
|
||||||
|
# Use clip to ensure that the values are not negative if college attendance
|
||||||
|
# is very high
|
||||||
).clip(
|
).clip(
|
||||||
lower=0
|
lower=0
|
||||||
)
|
)
|
||||||
|
|
||||||
# All values should have a value at this point
|
# All values should have a value at this point
|
||||||
assert (
|
assert (
|
||||||
|
# For tracts with >0 population
|
||||||
df[
|
df[
|
||||||
|
df[field_names.TOTAL_POP_FIELD]
|
||||||
|
>= self.MINIMUM_POPULATION_REQUIRED_FOR_IMPUTATION
|
||||||
|
][
|
||||||
|
# Then the imputed field should have no nulls
|
||||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
|
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
|
||||||
]
|
]
|
||||||
.isna()
|
.isna()
|
||||||
|
@ -644,13 +659,5 @@ class CensusACSETL(ExtractTransformLoad):
|
||||||
& df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD].isna()
|
& df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD].isna()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Strip columns and save results to self.
|
# Save results to self.
|
||||||
self.df = df[self.COLUMNS_TO_KEEP]
|
self.output_df = df
|
||||||
|
|
||||||
def load(self) -> None:
|
|
||||||
logger.info("Saving Census ACS Data")
|
|
||||||
|
|
||||||
# mkdir census
|
|
||||||
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ from typing import Any, List, NamedTuple, Tuple
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import geopandas as gpd
|
import geopandas as gpd
|
||||||
|
|
||||||
|
from data_pipeline.score import field_names
|
||||||
from data_pipeline.utils import get_module_logger
|
from data_pipeline.utils import get_module_logger
|
||||||
|
|
||||||
# pylint: disable=unsubscriptable-object
|
# pylint: disable=unsubscriptable-object
|
||||||
|
@ -23,6 +24,7 @@ def _get_fips_mask(
|
||||||
def _get_neighbor_mask(
|
def _get_neighbor_mask(
|
||||||
geo_df: gpd.GeoDataFrame, row: gpd.GeoSeries
|
geo_df: gpd.GeoDataFrame, row: gpd.GeoSeries
|
||||||
) -> pd.Series:
|
) -> pd.Series:
|
||||||
|
"""Returns neighboring tracts."""
|
||||||
return geo_df["geometry"].touches(row["geometry"])
|
return geo_df["geometry"].touches(row["geometry"])
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,24 +42,47 @@ def _choose_best_mask(
|
||||||
def _prepare_dataframe_for_imputation(
|
def _prepare_dataframe_for_imputation(
|
||||||
impute_var_named_tup_list: List[NamedTuple],
|
impute_var_named_tup_list: List[NamedTuple],
|
||||||
geo_df: gpd.GeoDataFrame,
|
geo_df: gpd.GeoDataFrame,
|
||||||
|
population_field: str,
|
||||||
|
minimum_population_required_for_imputation: int = 1,
|
||||||
geoid_field: str = "GEOID10_TRACT",
|
geoid_field: str = "GEOID10_TRACT",
|
||||||
) -> Tuple[Any, gpd.GeoDataFrame]:
|
) -> Tuple[Any, gpd.GeoDataFrame]:
|
||||||
|
"""Helper for imputation.
|
||||||
|
|
||||||
|
Given the inputs of `ImputeVariables`, returns list of tracts that need to be
|
||||||
|
imputed, along with a GeoDataFrame that has a column with the imputed field
|
||||||
|
"primed", meaning it is a copy of the raw field.
|
||||||
|
|
||||||
|
Will drop any rows with population less than
|
||||||
|
`minimum_population_required_for_imputation`.
|
||||||
|
"""
|
||||||
imputing_cols = [
|
imputing_cols = [
|
||||||
impute_var_pair.raw_field_name
|
impute_var_pair.raw_field_name
|
||||||
for impute_var_pair in impute_var_named_tup_list
|
for impute_var_pair in impute_var_named_tup_list
|
||||||
]
|
]
|
||||||
|
|
||||||
# prime column to exist
|
# Prime column to exist
|
||||||
for impute_var_pair in impute_var_named_tup_list:
|
for impute_var_pair in impute_var_named_tup_list:
|
||||||
geo_df[impute_var_pair.imputed_field_name] = geo_df[
|
geo_df[impute_var_pair.imputed_field_name] = geo_df[
|
||||||
impute_var_pair.raw_field_name
|
impute_var_pair.raw_field_name
|
||||||
].copy()
|
].copy()
|
||||||
|
|
||||||
# generate a list of tracts for which at least one of the imputation
|
# Generate a list of tracts for which at least one of the imputation
|
||||||
# columns is null
|
# columns is null that also meets population criteria.
|
||||||
tract_list = geo_df[geo_df[imputing_cols].isna().any(axis=1)][
|
tract_list = geo_df[
|
||||||
geoid_field
|
(
|
||||||
].unique()
|
# First, check whether any of the columns we want to impute contain null
|
||||||
|
# values
|
||||||
|
geo_df[imputing_cols].isna().any(axis=1)
|
||||||
|
# Second, ensure population is either null or >= the minimum population
|
||||||
|
& (
|
||||||
|
geo_df[population_field].isnull()
|
||||||
|
| (
|
||||||
|
geo_df[population_field]
|
||||||
|
>= minimum_population_required_for_imputation
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
][geoid_field].unique()
|
||||||
|
|
||||||
# Check that imputation is a valid choice for this set of fields
|
# Check that imputation is a valid choice for this set of fields
|
||||||
logger.info(f"Imputing values for {len(tract_list)} unique tracts.")
|
logger.info(f"Imputing values for {len(tract_list)} unique tracts.")
|
||||||
|
@ -70,6 +95,8 @@ def calculate_income_measures(
|
||||||
impute_var_named_tup_list: list,
|
impute_var_named_tup_list: list,
|
||||||
geo_df: gpd.GeoDataFrame,
|
geo_df: gpd.GeoDataFrame,
|
||||||
geoid_field: str,
|
geoid_field: str,
|
||||||
|
population_field: str = field_names.TOTAL_POP_FIELD,
|
||||||
|
minimum_population_required_for_imputation: int = 1,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""Impute values based on geographic neighbors
|
"""Impute values based on geographic neighbors
|
||||||
|
|
||||||
|
@ -89,6 +116,8 @@ def calculate_income_measures(
|
||||||
impute_var_named_tup_list=impute_var_named_tup_list,
|
impute_var_named_tup_list=impute_var_named_tup_list,
|
||||||
geo_df=geo_df,
|
geo_df=geo_df,
|
||||||
geoid_field=geoid_field,
|
geoid_field=geoid_field,
|
||||||
|
population_field=population_field,
|
||||||
|
minimum_population_required_for_imputation=minimum_population_required_for_imputation,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Iterate through the dataframe to impute in place
|
# Iterate through the dataframe to impute in place
|
||||||
|
@ -119,6 +148,7 @@ def calculate_income_measures(
|
||||||
],
|
],
|
||||||
column_to_impute=impute_var_pair.raw_field_name,
|
column_to_impute=impute_var_pair.raw_field_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
geo_df.loc[index, impute_var_pair.imputed_field_name] = geo_df[
|
geo_df.loc[index, impute_var_pair.imputed_field_name] = geo_df[
|
||||||
mask_to_use
|
mask_to_use
|
||||||
][impute_var_pair.raw_field_name].mean()
|
][impute_var_pair.raw_field_name].mean()
|
||||||
|
|
|
@ -24,7 +24,6 @@ class EJSCREENETL(ExtractTransformLoad):
|
||||||
|
|
||||||
self.COLUMNS_TO_KEEP = [
|
self.COLUMNS_TO_KEEP = [
|
||||||
self.GEOID_TRACT_FIELD_NAME,
|
self.GEOID_TRACT_FIELD_NAME,
|
||||||
field_names.TOTAL_POP_FIELD,
|
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
field_names.AIR_TOXICS_CANCER_RISK_FIELD,
|
field_names.AIR_TOXICS_CANCER_RISK_FIELD,
|
||||||
field_names.RESPIRATORY_HAZARD_FIELD,
|
field_names.RESPIRATORY_HAZARD_FIELD,
|
||||||
|
@ -66,7 +65,6 @@ class EJSCREENETL(ExtractTransformLoad):
|
||||||
self.output_df = self.df.rename(
|
self.output_df = self.df.rename(
|
||||||
columns={
|
columns={
|
||||||
self.INPUT_GEOID_TRACT_FIELD_NAME: self.GEOID_TRACT_FIELD_NAME,
|
self.INPUT_GEOID_TRACT_FIELD_NAME: self.GEOID_TRACT_FIELD_NAME,
|
||||||
"ACSTOTPOP": field_names.TOTAL_POP_FIELD,
|
|
||||||
"CANCER": field_names.AIR_TOXICS_CANCER_RISK_FIELD,
|
"CANCER": field_names.AIR_TOXICS_CANCER_RISK_FIELD,
|
||||||
"RESP": field_names.RESPIRATORY_HAZARD_FIELD,
|
"RESP": field_names.RESPIRATORY_HAZARD_FIELD,
|
||||||
"DSLPM": field_names.DIESEL_FIELD,
|
"DSLPM": field_names.DIESEL_FIELD,
|
||||||
|
|
|
@ -108,8 +108,12 @@ class TribalOverlapETL(ExtractTransformLoad):
|
||||||
|
|
||||||
# Switch from geographic to projected CRSes
|
# Switch from geographic to projected CRSes
|
||||||
# because logically that's right
|
# because logically that's right
|
||||||
self.census_tract_gdf = self.census_tract_gdf.to_crs(crs=self.CRS_INTEGER)
|
self.census_tract_gdf = self.census_tract_gdf.to_crs(
|
||||||
tribal_gdf_without_points = tribal_gdf_without_points.to_crs(crs=self.CRS_INTEGER)
|
crs=self.CRS_INTEGER
|
||||||
|
)
|
||||||
|
tribal_gdf_without_points = tribal_gdf_without_points.to_crs(
|
||||||
|
crs=self.CRS_INTEGER
|
||||||
|
)
|
||||||
|
|
||||||
# Create a measure for the entire census tract area
|
# Create a measure for the entire census tract area
|
||||||
self.census_tract_gdf["area_tract"] = self.census_tract_gdf.area
|
self.census_tract_gdf["area_tract"] = self.census_tract_gdf.area
|
||||||
|
|
|
@ -15,10 +15,10 @@ def final_score_df():
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def census_df():
|
def census_acs_df():
|
||||||
census_csv = constants.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
|
census_acs_csv = constants.DATA_PATH / "dataset" / "census_acs" / "usa.csv"
|
||||||
return pd.read_csv(
|
return pd.read_csv(
|
||||||
census_csv,
|
census_acs_csv,
|
||||||
dtype={GEOID_TRACT_FIELD: "string"},
|
dtype={GEOID_TRACT_FIELD: "string"},
|
||||||
low_memory=False,
|
low_memory=False,
|
||||||
)
|
)
|
||||||
|
|
|
@ -11,7 +11,7 @@ from .fixtures import (
|
||||||
final_score_df,
|
final_score_df,
|
||||||
ejscreen_df,
|
ejscreen_df,
|
||||||
hud_housing_df,
|
hud_housing_df,
|
||||||
census_df,
|
census_acs_df,
|
||||||
cdc_places_df,
|
cdc_places_df,
|
||||||
census_acs_median_incomes_df,
|
census_acs_median_incomes_df,
|
||||||
cdc_life_expectancy_df,
|
cdc_life_expectancy_df,
|
||||||
|
@ -235,7 +235,7 @@ def test_data_sources(
|
||||||
final_score_df,
|
final_score_df,
|
||||||
hud_housing_df,
|
hud_housing_df,
|
||||||
ejscreen_df,
|
ejscreen_df,
|
||||||
census_df,
|
census_acs_df,
|
||||||
cdc_places_df,
|
cdc_places_df,
|
||||||
census_acs_median_incomes_df,
|
census_acs_median_incomes_df,
|
||||||
cdc_life_expectancy_df,
|
cdc_life_expectancy_df,
|
||||||
|
@ -337,3 +337,41 @@ def test_output_tracts(final_score_df, national_tract_df):
|
||||||
|
|
||||||
def test_all_tracts_have_scores(final_score_df):
|
def test_all_tracts_have_scores(final_score_df):
|
||||||
assert not final_score_df[field_names.SCORE_N_COMMUNITIES].isna().any()
|
assert not final_score_df[field_names.SCORE_N_COMMUNITIES].isna().any()
|
||||||
|
|
||||||
|
|
||||||
|
def test_imputed_tracts(final_score_df):
|
||||||
|
# Make sure that any tracts with zero population have null imputed income
|
||||||
|
tracts_with_zero_population_df = final_score_df[
|
||||||
|
final_score_df[field_names.TOTAL_POP_FIELD] == 0
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
tracts_with_zero_population_df[
|
||||||
|
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||||
|
]
|
||||||
|
.isna()
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Make sure that any tracts with null population have null imputed income
|
||||||
|
tracts_with_null_population_df = final_score_df[
|
||||||
|
final_score_df[field_names.TOTAL_POP_FIELD].isnull()
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
tracts_with_null_population_df[
|
||||||
|
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||||
|
]
|
||||||
|
.isna()
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Make sure that no tracts with population have null imputed income
|
||||||
|
tracts_with_some_population_df = final_score_df[
|
||||||
|
final_score_df[field_names.TOTAL_POP_FIELD] > 0
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
not tracts_with_some_population_df[
|
||||||
|
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD
|
||||||
|
]
|
||||||
|
.isna()
|
||||||
|
.any()
|
||||||
|
)
|
||||||
|
|
|
@ -1,16 +1,16 @@
|
||||||
GEOID10_TRACT,Total population,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Individuals over 64 years old,Individuals under 5 years old,Percent pre-1960s housing (lead paint indicator),Leaky underground storage tanks
|
GEOID10_TRACT,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Individuals over 64 years old,Individuals under 5 years old,Percent pre-1960s housing (lead paint indicator),Leaky underground storage tanks
|
||||||
06027000800,3054,20.0000000000,0.2000000000,0.0162608457,5.9332945205,59.8143830065,134.3731709435,0.0161739005,0.0231458734,0.0088169702,0.0000000476,0.0943661972,0.4021269525,0.2445972495,0.0422396857,0.3691340106,0.0271801764
|
06027000800,20.0000000000,0.2000000000,0.0162608457,5.9332945205,59.8143830065,134.3731709435,0.0161739005,0.0231458734,0.0088169702,0.0000000476,0.0943661972,0.4021269525,0.2445972495,0.0422396857,0.3691340106,0.0271801764
|
||||||
06061021322,20899,30.0000000000,0.5000000000,0.1849562857,12.1102756164,52.7832287582,12.5173455346,0.4515663958,0.2027045525,0.0687928975,0.2667203153,0.0343563903,0.1859250743,0.1406287382,0.0683764773,0.0334588644,0.0258826940
|
06061021322,30.0000000000,0.5000000000,0.1849562857,12.1102756164,52.7832287582,12.5173455346,0.4515663958,0.2027045525,0.0687928975,0.2667203153,0.0343563903,0.1859250743,0.1406287382,0.0683764773,0.0334588644,0.0258826940
|
||||||
06069000802,3049,20.0000000000,0.2000000000,0.0375346206,7.4113546849,47.0434058824,15.7944927934,0.0811927061,0.1674220356,0.0396183204,,0.0324607330,0.2453201970,0.1534929485,0.0787143326,0.3485254692,0.0102735941
|
06069000802,20.0000000000,0.2000000000,0.0375346206,7.4113546849,47.0434058824,15.7944927934,0.0811927061,0.1674220356,0.0396183204,,0.0324607330,0.2453201970,0.1534929485,0.0787143326,0.3485254692,0.0102735941
|
||||||
15001021010,8606,10.0000000000,0.1000000000,0.0067389217,,,0.1074143214,0.0478749209,0.0931096253,0.0027318608,,0.0109090909,0.5159562078,0.1992795724,0.0366023704,0.0112496943,0.0259838494
|
15001021010,10.0000000000,0.1000000000,0.0067389217,,,0.1074143214,0.0478749209,0.0931096253,0.0027318608,,0.0109090909,0.5159562078,0.1992795724,0.0366023704,0.0112496943,0.0259838494
|
||||||
15001021101,3054,10.0000000000,0.1000000000,0.0033713587,,,1.7167679255,0.2484740667,0.2746856427,0.0025910486,,0.0194426442,0.4755657593,0.2976424361,0.0301244270,0.0168539326,0.0375389154
|
15001021101,10.0000000000,0.1000000000,0.0033713587,,,1.7167679255,0.2484740667,0.2746856427,0.0025910486,,0.0194426442,0.4755657593,0.2976424361,0.0301244270,0.0168539326,0.0375389154
|
||||||
15001021402,3778,10.0000000000,0.1000000000,0.0131608945,,,635.9981128640,0.0225482603,0.6278707343,0.0033357209,,0.0407569141,0.1877496671,0.2469560614,0.0751720487,0.1743524953,0.5088713177
|
15001021402,10.0000000000,0.1000000000,0.0131608945,,,635.9981128640,0.0225482603,0.6278707343,0.0033357209,,0.0407569141,0.1877496671,0.2469560614,0.0751720487,0.1743524953,0.5088713177
|
||||||
15001021800,5998,10.0000000000,0.1000000000,0.0049503455,,,0.0743045071,0.0402733327,0.0410968274,0.0038298946,,0.0359848485,0.2698678267,0.2352450817,0.0586862287,0.1676168757,0.1071290552
|
15001021800,10.0000000000,0.1000000000,0.0049503455,,,0.0743045071,0.0402733327,0.0410968274,0.0038298946,,0.0359848485,0.2698678267,0.2352450817,0.0586862287,0.1676168757,0.1071290552
|
||||||
15003010201,4936,10.0000000000,0.1000000000,0.0171119880,,,1493.8870892160,0.0548137804,0.4080845621,0.0694550700,,0.0340041638,0.2999166319,0.1318881686,0.0964343598,0.2131062951,0.0995447326
|
15003010201,10.0000000000,0.1000000000,0.0171119880,,,1493.8870892160,0.0548137804,0.4080845621,0.0694550700,,0.0340041638,0.2999166319,0.1318881686,0.0964343598,0.2131062951,0.0995447326
|
||||||
15007040603,2984,10.0000000000,0.1000000000,0.0225796264,,,255.5966484444,0.1042895043,0.5200441984,0.0065810172,,0.0311909263,0.2676292814,0.2533512064,0.0563002681,0.0935077519,0.1610354485
|
15007040603,10.0000000000,0.1000000000,0.0225796264,,,255.5966484444,0.1042895043,0.5200441984,0.0065810172,,0.0311909263,0.2676292814,0.2533512064,0.0563002681,0.0935077519,0.1610354485
|
||||||
15007040604,3529,10.0000000000,0.1000000000,0.0297040750,,,464.0468169721,0.1282189641,0.3810520320,0.0064334940,,0.0353833193,0.3687102371,0.1790875602,0.0943610088,0.1981538462,0.2277699060
|
15007040604,10.0000000000,0.1000000000,0.0297040750,,,464.0468169721,0.1282189641,0.3810520320,0.0064334940,,0.0353833193,0.3687102371,0.1790875602,0.0943610088,0.1981538462,0.2277699060
|
||||||
15007040700,9552,10.0000000000,0.1000000000,0.0120486502,,,829.6297843840,0.2776903565,0.5315584393,0.0062317499,,0.0328151986,0.2079176730,0.1920016750,0.0808207705,0.1049120679,0.8605507426
|
15007040700,10.0000000000,0.1000000000,0.0120486502,,,829.6297843840,0.2776903565,0.5315584393,0.0062317499,,0.0328151986,0.2079176730,0.1920016750,0.0808207705,0.1049120679,0.8605507426
|
||||||
15009030100,1405,10.0000000000,0.1000000000,0.0026846006,,,,0.0398066625,0.0329594792,0.0046765532,,0.0000000000,0.2911208151,0.2434163701,0.0882562278,0.2135678392,0.0973247551
|
15009030100,10.0000000000,0.1000000000,0.0026846006,,,,0.0398066625,0.0329594792,0.0046765532,,0.0000000000,0.2911208151,0.2434163701,0.0882562278,0.2135678392,0.0973247551
|
||||||
15009030201,2340,10.0000000000,0.1000000000,0.0063521816,,,7.0868595222,0.1292001112,0.0908033666,0.0053511202,,0.0000000000,0.2677266867,0.2367521368,0.0641025641,0.0928229665,0.0098923140
|
15009030201,10.0000000000,0.1000000000,0.0063521816,,,7.0868595222,0.1292001112,0.0908033666,0.0053511202,,0.0000000000,0.2677266867,0.2367521368,0.0641025641,0.0928229665,0.0098923140
|
||||||
15009030402,8562,10.0000000000,0.1000000000,0.0153866969,,,233.6880574427,0.6633705951,0.5914191729,0.0055146115,,0.0122641509,0.1792805419,0.1810324690,0.0463676711,0.0760149726,0.4432670413
|
15009030402,10.0000000000,0.1000000000,0.0153866969,,,233.6880574427,0.6633705951,0.5914191729,0.0055146115,,0.0122641509,0.1792805419,0.1810324690,0.0463676711,0.0760149726,0.4432670413
|
||||||
15009030800,7879,10.0000000000,0.1000000000,0.0169064550,,,575.9991000531,1.0347888110,0.5999348163,0.0061499864,0.0008675195,0.0013422819,0.1386100877,0.1303464907,0.0753902780,0.1220556745,0.0263640121
|
15009030800,10.0000000000,0.1000000000,0.0169064550,,,575.9991000531,1.0347888110,0.5999348163,0.0061499864,0.0008675195,0.0013422819,0.1386100877,0.1303464907,0.0753902780,0.1220556745,0.0263640121
|
||||||
|
|
|
|
@ -1,4 +1,4 @@
|
||||||
OBJECTID,GEOID10_TRACT,Total population,ACSIPOVBAS,ACSEDUCBAS,ACSTOTHH,ACSTOTHU,ACSUNEMPBAS,MINORPOP,MINORPCT,LOWINCOME,Poverty (Less than 200% of federal poverty line),LESSHS,LESSHSPCT,LINGISO,Percent of households in linguistic isolation,UNDER5,Individuals under 5 years old,OVER64,Individuals over 64 years old,UNEMP,UNEMPPCT,PRE1960,Percent pre-1960s housing (lead paint indicator),VULEOPCT,VULSVI6PCT,VULEO,VULSVI6,DISPEO,DISPSVI6,Diesel particulate matter exposure,Air toxics cancer risk,Respiratory hazard index,Traffic proximity and volume,Wastewater discharge,Proximity to NPL sites,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Ozone,PM2.5 in the air,Leaky underground storage tanks,D_LDPNT_2,D_DSLPM_2,D_CANCR_2,D_RESP_2,D_PTRAF_2,D_PWDIS_2,D_PNPL_2,D_PRMP_2,D_PTSDF_2,D_OZONE_2,D_PM25_2,D_UST_2,STATE_NAME,ST_ABBREV,REGION,P_MINORPCT,P_LWINCPCT,P_LESHSPCT,P_LNGISPCT,P_UNDR5PCT,P_OVR64PCT,P_UNEMP,P_UNEMPPCT,P_LDPNT,P_VULEOPCT,P_VULSVI6PCT,P_VULSVI6,P_DISPSVI6,P_DSLPM,P_CANCR,P_RESP,P_PTRAF,P_PWDIS,P_PNPL,P_PRMP,P_PTSDF,P_OZONE,P_PM25,P_UST,P_LDPNT_D2,P_DSLPM_D2,P_CANCR_D2,P_RESP_D2,P_PTRAF_D2,P_PWDIS_D2,P_PNPL_D2,P_PRMP_D2,P_PTSDF_D2,P_OZONE_D2,P_PM25_D2,P_UST_D2,B_MINORPCT,B_LWINCPCT,B_LESHSPCT,B_LNGISPCT,B_UNDR5PCT,B_OVR64PCT,B_UNEMP,B_UNEMPPCT,B_LDPNT,B_VULEOPCT,B_VULSVI6PCT,B_VULSVI6,B_DISPSVI6,B_DSLPM,B_CANCR,B_RESP,B_PTRAF,B_PWDIS,B_PNPL,B_PRMP,B_PTSDF,B_OZONE,B_PM25,B_UST,B_LDPNT_D2,B_DSLPM_D2,B_CANCR_D2,B_RESP_D2,B_PTRAF_D2,B_PWDIS_D2,B_PNPL_D2,B_PRMP_D2,B_PTSDF_D2,B_OZONE_D2,B_PM25_D2,B_UST_D2,T_MINORPCT,T_LWINCPCT,T_LESHSPCT,T_LNGISPCT,T_UNDR5PCT,T_OVR64PCT,T_UNEMPPCT,T_VULEOPCT,T_LDPNT,T_LDPNT_D2,T_DSLPM,T_DSLPM_D2,T_CANCR,T_CANCR_D2,T_RESP,T_RESP_D2,T_PTRAF,T_PTRAF_D2,T_PWDIS,T_PWDIS_D2,T_PNPL,T_PNPL_D2,T_PRMP,T_PRMP_D2,T_PTSDF,T_PTSDF_D2,T_OZONE,T_OZONE_D2,T_PM25,T_PM25_D2,T_UST,T_UST_D2,AREALAND,AREAWATER,NPL_CNT,TSDF_CNT,Shape_Length,Shape_Area
|
OBJECTID,GEOID10_TRACT,ACSTOTPOP,ACSIPOVBAS,ACSEDUCBAS,ACSTOTHH,ACSTOTHU,ACSUNEMPBAS,MINORPOP,MINORPCT,LOWINCOME,Poverty (Less than 200% of federal poverty line),LESSHS,LESSHSPCT,LINGISO,Percent of households in linguistic isolation,UNDER5,Individuals under 5 years old,OVER64,Individuals over 64 years old,UNEMP,UNEMPPCT,PRE1960,Percent pre-1960s housing (lead paint indicator),VULEOPCT,VULSVI6PCT,VULEO,VULSVI6,DISPEO,DISPSVI6,Diesel particulate matter exposure,Air toxics cancer risk,Respiratory hazard index,Traffic proximity and volume,Wastewater discharge,Proximity to NPL sites,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Ozone,PM2.5 in the air,Leaky underground storage tanks,D_LDPNT_2,D_DSLPM_2,D_CANCR_2,D_RESP_2,D_PTRAF_2,D_PWDIS_2,D_PNPL_2,D_PRMP_2,D_PTSDF_2,D_OZONE_2,D_PM25_2,D_UST_2,STATE_NAME,ST_ABBREV,REGION,P_MINORPCT,P_LWINCPCT,P_LESHSPCT,P_LNGISPCT,P_UNDR5PCT,P_OVR64PCT,P_UNEMP,P_UNEMPPCT,P_LDPNT,P_VULEOPCT,P_VULSVI6PCT,P_VULSVI6,P_DISPSVI6,P_DSLPM,P_CANCR,P_RESP,P_PTRAF,P_PWDIS,P_PNPL,P_PRMP,P_PTSDF,P_OZONE,P_PM25,P_UST,P_LDPNT_D2,P_DSLPM_D2,P_CANCR_D2,P_RESP_D2,P_PTRAF_D2,P_PWDIS_D2,P_PNPL_D2,P_PRMP_D2,P_PTSDF_D2,P_OZONE_D2,P_PM25_D2,P_UST_D2,B_MINORPCT,B_LWINCPCT,B_LESHSPCT,B_LNGISPCT,B_UNDR5PCT,B_OVR64PCT,B_UNEMP,B_UNEMPPCT,B_LDPNT,B_VULEOPCT,B_VULSVI6PCT,B_VULSVI6,B_DISPSVI6,B_DSLPM,B_CANCR,B_RESP,B_PTRAF,B_PWDIS,B_PNPL,B_PRMP,B_PTSDF,B_OZONE,B_PM25,B_UST,B_LDPNT_D2,B_DSLPM_D2,B_CANCR_D2,B_RESP_D2,B_PTRAF_D2,B_PWDIS_D2,B_PNPL_D2,B_PRMP_D2,B_PTSDF_D2,B_OZONE_D2,B_PM25_D2,B_UST_D2,T_MINORPCT,T_LWINCPCT,T_LESHSPCT,T_LNGISPCT,T_UNDR5PCT,T_OVR64PCT,T_UNEMPPCT,T_VULEOPCT,T_LDPNT,T_LDPNT_D2,T_DSLPM,T_DSLPM_D2,T_CANCR,T_CANCR_D2,T_RESP,T_RESP_D2,T_PTRAF,T_PTRAF_D2,T_PWDIS,T_PWDIS_D2,T_PNPL,T_PNPL_D2,T_PRMP,T_PRMP_D2,T_PTSDF,T_PTSDF_D2,T_OZONE,T_OZONE_D2,T_PM25,T_PM25_D2,T_UST,T_UST_D2,AREALAND,AREAWATER,NPL_CNT,TSDF_CNT,Shape_Length,Shape_Area
|
||||||
4529,06027000800,3054,3009,2337,1420,2067,1443,1218,0.3988212181,1210,0.4021269525,475,0.2032520325,134,0.0943661972,129,0.0422396857,747,0.2445972495,62,0.0429660430,763,0.3691340106,0.4004740853,0.2309005559,1223.0478564307,705.1702977293,135.9429095904,144.8520486255,0.0162608457,20.0000000000,0.2000000000,134.3731709435,0.0000000476,0.0088169702,0.0161739005,0.0231458734,59.8143830065,5.9332945205,0.0271801764,50.1811514356,2.2105466749,2718.8581918080,27.1885819181,18267.0798289539,0.0000064773,1.1986045786,2.1987270931,3.1465173743,8131.3412612630,806.5893205801,3.6949522625,California,CA,9,58.2565807824,70.8357682483,82.0300855712,83.4211514441,22.4791060804,91.4310072487,20.6342392033,44.8003303446,69.4492207493,64.4805710566,73.9747591523,41.2001973366,69.9936559849,0.4881982980,32.2031638835,14.4688811492,33.6358789383,2.7793036790,3.1380644255,0.3541522801,2.0598614138,97.6642425963,3.6388096802,6.3535808084,71.4956721564,59.1319320934,61.5316181718,60.9745786385,62.4689837463,62.0864910202,59.8317854029,59.0710337447,59.2599060994,64.9284478117,62.2619591744,60.9702180540,6,8,9,9,3,10,3,5,7,7,8,5,7,1,4,2,4,1,1,1,1,11,1,1,8,6,7,7,7,7,6,6,6,7,7,7,40% (58%ile),40% (70%ile),20% (82%ile),9% (83%ile),4% (22%ile),24% (91%ile),4% (44%ile),40% (64%ile),0.37 = fraction pre-1960 (69%ile),71%ile,0.0163 ug/m3 (0%ile),59%ile,20 lifetime risk per million (32%ile),61%ile,0.2 (14%ile),60%ile,130 daily vehicles/meters distance (33%ile),62%ile,0.000000048 toxicity-weighted concentration/meters distance (2%ile),62%ile,0.0088 sites/km distance (3%ile),59%ile,0.016 facilities/km distance (0%ile),59%ile,0.023 facilities/km distance (2%ile),59%ile,59.8 ppb (97%ile),64%ile,5.93 ug/m3 (3%ile),62%ile,0.027 facilities/sq km area (6%ile),60%ile,17743852489.0000000000,41257887.0000000000,0,1,969231.5231135677,27404749177.8422279358
|
4529,06027000800,3054,3009,2337,1420,2067,1443,1218,0.3988212181,1210,0.4021269525,475,0.2032520325,134,0.0943661972,129,0.0422396857,747,0.2445972495,62,0.0429660430,763,0.3691340106,0.4004740853,0.2309005559,1223.0478564307,705.1702977293,135.9429095904,144.8520486255,0.0162608457,20.0000000000,0.2000000000,134.3731709435,0.0000000476,0.0088169702,0.0161739005,0.0231458734,59.8143830065,5.9332945205,0.0271801764,50.1811514356,2.2105466749,2718.8581918080,27.1885819181,18267.0798289539,0.0000064773,1.1986045786,2.1987270931,3.1465173743,8131.3412612630,806.5893205801,3.6949522625,California,CA,9,58.2565807824,70.8357682483,82.0300855712,83.4211514441,22.4791060804,91.4310072487,20.6342392033,44.8003303446,69.4492207493,64.4805710566,73.9747591523,41.2001973366,69.9936559849,0.4881982980,32.2031638835,14.4688811492,33.6358789383,2.7793036790,3.1380644255,0.3541522801,2.0598614138,97.6642425963,3.6388096802,6.3535808084,71.4956721564,59.1319320934,61.5316181718,60.9745786385,62.4689837463,62.0864910202,59.8317854029,59.0710337447,59.2599060994,64.9284478117,62.2619591744,60.9702180540,6,8,9,9,3,10,3,5,7,7,8,5,7,1,4,2,4,1,1,1,1,11,1,1,8,6,7,7,7,7,6,6,6,7,7,7,40% (58%ile),40% (70%ile),20% (82%ile),9% (83%ile),4% (22%ile),24% (91%ile),4% (44%ile),40% (64%ile),0.37 = fraction pre-1960 (69%ile),71%ile,0.0163 ug/m3 (0%ile),59%ile,20 lifetime risk per million (32%ile),61%ile,0.2 (14%ile),60%ile,130 daily vehicles/meters distance (33%ile),62%ile,0.000000048 toxicity-weighted concentration/meters distance (2%ile),62%ile,0.0088 sites/km distance (3%ile),59%ile,0.016 facilities/km distance (0%ile),59%ile,0.023 facilities/km distance (2%ile),59%ile,59.8 ppb (97%ile),64%ile,5.93 ug/m3 (3%ile),62%ile,0.027 facilities/sq km area (6%ile),60%ile,17743852489.0000000000,41257887.0000000000,0,1,969231.5231135677,27404749177.8422279358
|
||||||
8028,06061021322,20899,20874,13290,6549,6904,9172,9199,0.4401646012,3881,0.1859250743,825,0.0620767494,225,0.0343563903,1429,0.0683764773,2939,0.1406287382,312,0.0340165722,231,0.0334588644,0.3130448377,0.1552546718,6542.3240634282,3244.6673856589,-896.9052371663,-589.6780917541,0.1849562857,30.0000000000,0.5000000000,12.5173455346,0.2667203153,0.0687928975,0.4515663958,0.2027045525,52.7832287582,12.1102756164,0.0258826940,-30.0094307337,-165.8882612555,-26907.1571149896,-448.4526185832,-11226.8727654026,-239.2228476257,-61.7007100657,-405.0122653138,-181.8067747336,-47341.5543077505,-10861.7696239112,-23.2143238368,California,CA,9,61.7694531724,28.3124099080,32.2625612545,63.3138029183,65.9392366308,44.1611446180,92.1063805127,31.2336817151,19.3531578232,52.0599864076,48.1147912182,98.1253263672,8.5598852754,35.4160437794,83.7767623034,95.2520218071,6.7786023570,88.6613290583,53.5138135020,56.0049245976,28.8270859466,89.7745222973,94.2035706464,6.2511191138,43.0185694890,24.7769097248,17.2770098374,9.5647689629,49.9350307593,5.0850465016,20.5837755437,15.4478896201,34.6338200533,14.8104044330,10.3206402564,53.0011626680,7,3,4,7,7,5,10,4,2,6,5,11,1,4,9,11,1,9,6,6,3,9,10,1,5,3,2,1,5,1,3,2,4,2,2,6,44% (61%ile),19% (28%ile),6% (32%ile),3% (63%ile),7% (65%ile),14% (44%ile),3% (31%ile),31% (52%ile),0.033 = fraction pre-1960 (19%ile),43%ile,0.185 ug/m3 (35%ile),24%ile,30 lifetime risk per million (83%ile),17%ile,0.5 (95%ile),9%ile,13 daily vehicles/meters distance (6%ile),49%ile,0.27 toxicity-weighted concentration/meters distance (88%ile),5%ile,0.069 sites/km distance (53%ile),20%ile,0.45 facilities/km distance (56%ile),15%ile,0.2 facilities/km distance (28%ile),34%ile,52.8 ppb (89%ile),14%ile,12.1 ug/m3 (94%ile),10%ile,0.026 facilities/sq km area (6%ile),53%ile,258653359.0000000000,119890.0000000000,0,0,124755.3452199987,427225089.6229769588
|
8028,06061021322,20899,20874,13290,6549,6904,9172,9199,0.4401646012,3881,0.1859250743,825,0.0620767494,225,0.0343563903,1429,0.0683764773,2939,0.1406287382,312,0.0340165722,231,0.0334588644,0.3130448377,0.1552546718,6542.3240634282,3244.6673856589,-896.9052371663,-589.6780917541,0.1849562857,30.0000000000,0.5000000000,12.5173455346,0.2667203153,0.0687928975,0.4515663958,0.2027045525,52.7832287582,12.1102756164,0.0258826940,-30.0094307337,-165.8882612555,-26907.1571149896,-448.4526185832,-11226.8727654026,-239.2228476257,-61.7007100657,-405.0122653138,-181.8067747336,-47341.5543077505,-10861.7696239112,-23.2143238368,California,CA,9,61.7694531724,28.3124099080,32.2625612545,63.3138029183,65.9392366308,44.1611446180,92.1063805127,31.2336817151,19.3531578232,52.0599864076,48.1147912182,98.1253263672,8.5598852754,35.4160437794,83.7767623034,95.2520218071,6.7786023570,88.6613290583,53.5138135020,56.0049245976,28.8270859466,89.7745222973,94.2035706464,6.2511191138,43.0185694890,24.7769097248,17.2770098374,9.5647689629,49.9350307593,5.0850465016,20.5837755437,15.4478896201,34.6338200533,14.8104044330,10.3206402564,53.0011626680,7,3,4,7,7,5,10,4,2,6,5,11,1,4,9,11,1,9,6,6,3,9,10,1,5,3,2,1,5,1,3,2,4,2,2,6,44% (61%ile),19% (28%ile),6% (32%ile),3% (63%ile),7% (65%ile),14% (44%ile),3% (31%ile),31% (52%ile),0.033 = fraction pre-1960 (19%ile),43%ile,0.185 ug/m3 (35%ile),24%ile,30 lifetime risk per million (83%ile),17%ile,0.5 (95%ile),9%ile,13 daily vehicles/meters distance (6%ile),49%ile,0.27 toxicity-weighted concentration/meters distance (88%ile),5%ile,0.069 sites/km distance (53%ile),20%ile,0.45 facilities/km distance (56%ile),15%ile,0.2 facilities/km distance (28%ile),34%ile,52.8 ppb (89%ile),14%ile,12.1 ug/m3 (94%ile),10%ile,0.026 facilities/sq km area (6%ile),53%ile,258653359.0000000000,119890.0000000000,0,0,124755.3452199987,427225089.6229769588
|
||||||
8849,06069000802,3049,3045,2076,955,1119,1493,1247,0.4089865530,747,0.2453201970,307,0.1478805395,31,0.0324607330,240,0.0787143326,468,0.1534929485,93,0.0622906899,390,0.3485254692,0.3271533750,0.1778092173,997.4906403941,542.1403034316,-87.8345013597,-17.2605942492,0.0375346206,20.0000000000,0.2000000000,15.7944927934,,0.0396183204,0.0811927061,0.1674220356,47.0434058824,7.4113546849,0.0102735941,-30.6125607956,-3.2968346872,-1756.6900271942,-17.5669002719,-1387.3013987358,,-3.4798554127,-7.1315208575,-14.7054310128,-4132.0340979390,-650.9726431509,-0.9023760119,California,CA,9,59.1858457424,41.3904741949,69.9513617378,62.0187896062,79.0518001240,52.1216510370,37.3180569516,68.3483551403,67.5701406274,54.3994266601,57.9926859232,26.1831217492,58.7612911558,2.0014414700,32.2031638835,14.4688811492,8.1570460385,,34.5749415665,10.3739430074,25.1131375379,84.5333172848,19.2864164585,4.9410824602,42.8621394303,58.0471933934,56.5430390950,57.0023528116,55.7266348497,,54.6373148803,57.1359685902,54.8116596007,56.2167239668,56.9568759225,56.2801621878,6,5,7,7,8,6,4,7,7,6,6,3,6,1,4,2,1,0,4,2,3,9,2,1,5,6,6,6,6,0,6,6,6,6,6,6,41% (59%ile),25% (41%ile),15% (69%ile),3% (62%ile),8% (79%ile),15% (52%ile),6% (68%ile),33% (54%ile),0.35 = fraction pre-1960 (67%ile),42%ile,0.0375 ug/m3 (2%ile),58%ile,20 lifetime risk per million (32%ile),56%ile,0.2 (14%ile),57%ile,16 daily vehicles/meters distance (8%ile),55%ile,,,0.04 sites/km distance (34%ile),54%ile,0.081 facilities/km distance (10%ile),57%ile,0.17 facilities/km distance (25%ile),54%ile,47 ppb (84%ile),56%ile,7.41 ug/m3 (19%ile),56%ile,0.01 facilities/sq km area (4%ile),56%ile,2987635876.0000000000,3272257.0000000000,1,0,422237.6856758550,4643687820.1565904617
|
8849,06069000802,3049,3045,2076,955,1119,1493,1247,0.4089865530,747,0.2453201970,307,0.1478805395,31,0.0324607330,240,0.0787143326,468,0.1534929485,93,0.0622906899,390,0.3485254692,0.3271533750,0.1778092173,997.4906403941,542.1403034316,-87.8345013597,-17.2605942492,0.0375346206,20.0000000000,0.2000000000,15.7944927934,,0.0396183204,0.0811927061,0.1674220356,47.0434058824,7.4113546849,0.0102735941,-30.6125607956,-3.2968346872,-1756.6900271942,-17.5669002719,-1387.3013987358,,-3.4798554127,-7.1315208575,-14.7054310128,-4132.0340979390,-650.9726431509,-0.9023760119,California,CA,9,59.1858457424,41.3904741949,69.9513617378,62.0187896062,79.0518001240,52.1216510370,37.3180569516,68.3483551403,67.5701406274,54.3994266601,57.9926859232,26.1831217492,58.7612911558,2.0014414700,32.2031638835,14.4688811492,8.1570460385,,34.5749415665,10.3739430074,25.1131375379,84.5333172848,19.2864164585,4.9410824602,42.8621394303,58.0471933934,56.5430390950,57.0023528116,55.7266348497,,54.6373148803,57.1359685902,54.8116596007,56.2167239668,56.9568759225,56.2801621878,6,5,7,7,8,6,4,7,7,6,6,3,6,1,4,2,1,0,4,2,3,9,2,1,5,6,6,6,6,0,6,6,6,6,6,6,41% (59%ile),25% (41%ile),15% (69%ile),3% (62%ile),8% (79%ile),15% (52%ile),6% (68%ile),33% (54%ile),0.35 = fraction pre-1960 (67%ile),42%ile,0.0375 ug/m3 (2%ile),58%ile,20 lifetime risk per million (32%ile),56%ile,0.2 (14%ile),57%ile,16 daily vehicles/meters distance (8%ile),55%ile,,,0.04 sites/km distance (34%ile),54%ile,0.081 facilities/km distance (10%ile),57%ile,0.17 facilities/km distance (25%ile),54%ile,47 ppb (84%ile),56%ile,7.41 ug/m3 (19%ile),56%ile,0.01 facilities/sq km area (4%ile),56%ile,2987635876.0000000000,3272257.0000000000,1,0,422237.6856758550,4643687820.1565904617
|
||||||
|
|
|
|
@ -1,6 +1,8 @@
|
||||||
import pathlib
|
import pathlib
|
||||||
from data_pipeline.tests.sources.example.test_etl import TestETL
|
from data_pipeline.tests.sources.example.test_etl import TestETL
|
||||||
from data_pipeline.etl.sources.persistent_poverty.etl import PersistentPovertyETL
|
from data_pipeline.etl.sources.persistent_poverty.etl import (
|
||||||
|
PersistentPovertyETL,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestPersistentPovertyETL(TestETL):
|
class TestPersistentPovertyETL(TestETL):
|
||||||
|
|
Loading…
Add table
Reference in a new issue