revise merge

This commit is contained in:
Saran Ahluwalia 2021-12-31 15:52:16 -05:00
parent 0e3ca6bf30
commit 0d464e518b
5 changed files with 164 additions and 0 deletions

View file

@ -104,6 +104,11 @@ DATASET_LIST = [
"module_dir": "michigan_ejscreen", "module_dir": "michigan_ejscreen",
"class_name": "MichiganEnviroScreenETL", "class_name": "MichiganEnviroScreenETL",
}, },
{
"name": "maryland_ejscreen",
"module_dir": "maryland_ejscreen",
"class_name": "MarylandEJScreenETL",
},
] ]
CENSUS_INFO = { CENSUS_INFO = {
"name": "census", "name": "census",

View file

@ -0,0 +1,25 @@
# Maryland EJSCREEN
The Maryland EJSCREEN application and tool can be found [here](https://p1.cgis.umd.edu/mdejscreen/).
### Methodology Summary
According to the [documentation](https://p1.cgis.umd.edu/mdejscreen/help.html):
There exists two data categories: Population Burden and Population Characteristics.
There are two indicators within Population Burden:Exposure, Socioeconomicl. Within Population Characteristics, there exists two indciators: Sensitive, Environmental Effects. Each respective indicator contains a number of relevant covariates, and an averaged score.
The two "Pollution Burden" average scores are then averaged together and the result is multiplied by the average of the "Population Characteristics" categories to get the total EJ Score for each tract.
For each indicator, the percentile is given. For example, the indicator value for "Asthma Emergency Discharges" with 0.9 is therefore in the 90th percentile, which means only 10% of tracts in Maryland have higher values. EJ Scores near 1 represent areas of the greatest environmental justice concern.
A study of Bladensburg, MD - located in Prince Georges County - demonstrated the application of the MD EJSCREEN (Driver et al., 2019). According to the study, The Bladensburg population is primarily Black (62.7%) and Latinx (33.0%), with 20.1% of the community members living below the federal poverty line. Through an analysis, leveraging the Maryland EJSCREEN, Bladensburg with MD EJSCREEN, the researchers found that Bladensburg has an EJ score higher than 99% of the census tracts in Prince Georges County, indicating a higher prevalence of environmental hazards in the region.
Furthermore, it was determined that Bladensburg residents are at a higher risk of developing cancer due to air pollution than 90100% of the census tracts in the state or county.
5
Source:
Driver, A.; Mehdizadeh, C.; Bara-Garcia, S.; Bodenreider, C.; Lewis, J.; Wilson, S. Utilization of the Maryland Environmental Justice Screening Tool: A Bladensburg, Maryland Case Study. Int. J. Environ. Res. Public Health 2019, 16, 348. https://doi.org/10.3390/ijerph16030348

View file

@ -0,0 +1,115 @@
from glob import glob
import geopandas as gpd
import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
from data_pipeline.config import settings
logger = get_module_logger(__name__)
class MarylandEJScreenETL(ExtractTransformLoad):
"""Maryland EJSCREEN class that ingests dataset represented
here: https://p1.cgis.umd.edu/mdejscreen/help.html
Please see the README in this module for further details.
"""
def __init__(self):
self.MARYLAND_EJSCREEN_URL = (
settings.AWS_JUSTICE40_DATASOURCES_URL + "/MD_EJScreen.zip"
)
self.SHAPE_FILES_PATH = self.TMP_PATH / "mdejscreen"
self.OUTPUT_CSV_PATH = self.DATA_PATH / "dataset" / "maryland_ejscreen"
self.COLUMNS_TO_KEEP = [
self.GEOID_TRACT_FIELD_NAME,
field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD,
field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD,
field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD,
field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD,
field_names.MARYLAND_PERCENTILE_FIELD_NAME,
]
self.df: pd.DataFrame
def extract(self) -> None:
logger.info("Downloading Maryland EJSCREEN Data")
super().extract(
self.MARYLAND_EJSCREEN_URL,
self.TMP_PATH,
)
def transform(self) -> None:
logger.info("Transforming Maryland EJSCREEN Data")
list_of_files = list(glob(str(self.SHAPE_FILES_PATH) + "/*.shp"))
# Ignore counties becauses this is not the level of measurement
# that is consistent with our current scoring and ranking methodology.
dfs_list = [
gpd.read_file(f)
for f in list_of_files
if not f.endswith("CountiesEJScore.shp")
]
# Set the Census tract as the index and drop the geometry column
# that produces the census tract boundaries.
# The latter is because Geopandas raises an exception if there
# are duplicate geometry columns.
# Moreover, since the unit of measurement is at the tract level
# we can consistantly merge this with other datasets
dfs_list = [
df.set_index("Census_Tra").drop("geometry", axis=1)
for df in dfs_list
]
# pylint: disable=unsubscriptable-object
combined_df = gpd.GeoDataFrame(pd.concat(dfs_list, axis=1))
# Reset index so that we no longer have the tract as our index
combined_df = combined_df.reset_index()
# coerce into integer into
# pylint: disable=unsupported-assignment-operation, unsubscriptable-object
combined_df["Census_Tra"] = (combined_df["Census_Tra"]).astype(int)
# Drop the 10 census tracts that are zero: please see here:
# https://github.com/usds/justice40-tool/issues/239#issuecomment-995821572
combined_df = combined_df[combined_df["Census_Tra"] != 0]
# Set our class instance variable.
self.df = combined_df.copy()
# Rename
self.df.rename(
columns={
"Census_Tra": self.GEOID_TRACT_FIELD_NAME,
"EJScore": field_names.MARYLAND_PERCENTILE_FIELD_NAME,
},
inplace=True,
)
# Baseline Comparisons with some quartiles and the 90th percentile.
# Interpretation: The score is greater than or equal to N% of the tracts in the state.
self.df[field_names.MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD] = (
self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.25
)
self.df[field_names.MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD] = (
self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.50
)
self.df[field_names.MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD] = (
self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.75
)
# This percentile is used in the comparison tool.
self.df[field_names.MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD] = (
self.df[field_names.MARYLAND_PERCENTILE_FIELD_NAME] >= 0.90
)
def load(self) -> None:
logger.info("Saving Maryland EJSCREEN CSV")
# write maryland tracts to csv
self.OUTPUT_CSV_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(
self.OUTPUT_CSV_PATH / "maryland_ejscreen.csv", index=False
)

View file

@ -219,6 +219,25 @@ MICHIGAN_EJSCREEN_PRIORITY_COMMUNITY_FIELD: str = (
"Michigan EJSCREEN Priority Community" "Michigan EJSCREEN Priority Community"
) )
# Maryland EJSCREEN Data.
MARYLAND_EJSCREEN_TRACT_25_PERCENT_FIELD: str = (
"Tract is >=25% all other Maryland Tracts"
)
MARYLAND_EJSCREEN_TRACT_50_PERCENT_FIELD: str = (
"Tract is >=50% all other Maryland Tracts"
)
MARYLAND_EJSCREEN_TRACT_75_PERCENT_FIELD: str = (
"Tract is >=75% all other Maryland Tracts"
)
MARYLAND_EJSCREEN_TRACT_90_PERCENT_FIELD: str = (
"Tract is >=90% all other Maryland Tracts"
)
MARYLAND_PERCENTILE_FIELD_NAME: str = (
"Maryland Environmental Justice Percentile"
)
# Child Opportunity Index data # Child Opportunity Index data
# Summer days with maximum temperature above 90F. # Summer days with maximum temperature above 90F.
EXTREME_HEAT_FIELD = "Summer days above 90F" EXTREME_HEAT_FIELD = "Summer days above 90F"