mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 15:41:38 -07:00
wip
This commit is contained in:
parent
cf13036d20
commit
ebe6180f7c
3 changed files with 180 additions and 4 deletions
|
@ -31,6 +31,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.HIGH_SCHOOL_FIELD_NAME = (
|
||||
"Percent individuals age 25 or over with less than high school degree"
|
||||
)
|
||||
self.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD_NAME = (
|
||||
"Median household income (% of state median household income)"
|
||||
)
|
||||
|
||||
# There's another aggregation level (a second level of "buckets").
|
||||
self.AGGREGATION_POLLUTION = "Pollution Burden"
|
||||
|
@ -145,7 +148,12 @@ class ScoreETL(ExtractTransformLoad):
|
|||
renamed_field="Total population",
|
||||
bucket=None,
|
||||
),
|
||||
# The following data sets have buckets, because they're used in the score
|
||||
DataSet(
|
||||
input_field=self.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD_NAME,
|
||||
renamed_field=self.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD_NAME,
|
||||
bucket=None,
|
||||
),
|
||||
# The following data sets have buckets, because they're used in Score C
|
||||
DataSet(
|
||||
input_field="CANCER",
|
||||
renamed_field="Air toxics cancer risk",
|
||||
|
@ -375,9 +383,6 @@ class ScoreETL(ExtractTransformLoad):
|
|||
self.df["Score D"] = self.df[fields_min_max].mean(axis=1)
|
||||
self.df["Score E"] = self.df[fields_percentile].mean(axis=1)
|
||||
|
||||
# Calculate correlations
|
||||
self.df[fields_min_max].corr()
|
||||
|
||||
# Create percentiles for the scores
|
||||
for score_field in [
|
||||
"Score A",
|
||||
|
@ -400,9 +405,40 @@ class ScoreETL(ExtractTransformLoad):
|
|||
>= 1 - threshold
|
||||
)
|
||||
|
||||
# Now for binary (non index) scores.
|
||||
|
||||
# Calculate "Score F", which uses "either/or" thresholds.
|
||||
ami_and_high_school_field_name = "Low AMI, Low HS graduation"
|
||||
meets_socio_field_name = "Meets socioeconomic criteria"
|
||||
meets_burden_field_name = "Meets burden criteria"
|
||||
|
||||
self.df[ami_and_high_school_field_name] = (
|
||||
self.df[self.MEDIAN_INCOME_AS_PERCENT_OF_STATE_FIELD_NAME] < 0.80
|
||||
) & (self.df[self.HIGH_SCHOOL_FIELD_NAME] > 0.2)
|
||||
|
||||
self.df[meets_socio_field_name] = (
|
||||
self.df[ami_and_high_school_field_name]
|
||||
| (self.df[self.POVERTY_FIELD_NAME] > 0.40)
|
||||
| (self.df[self.LINGUISTIC_ISOLATION_FIELD_NAME] > 0.10)
|
||||
| (self.df[self.HIGH_SCHOOL_FIELD_NAME] > 0.4)
|
||||
)
|
||||
|
||||
self.df[meets_burden_field_name] = (
|
||||
self.df["Particulate matter (PM2.5)"] > 10
|
||||
) | (self.df["Respiratory hazard " "index"] > 0.75)
|
||||
|
||||
self.df["Score F (communities)"] = (
|
||||
self.df[ami_and_high_school_field_name] & self.df[meets_burden_field_name]
|
||||
)
|
||||
|
||||
|
||||
def load(self) -> None:
|
||||
logger.info("Saving Score CSV")
|
||||
|
||||
# write nationwide csv
|
||||
self.SCORE_CSV_PATH.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# TODO: drop
|
||||
self.df[0:10000].to_csv(self.SCORE_CSV_PATH / "usa-10000.csv", index=False)
|
||||
|
||||
self.df.to_csv(self.SCORE_CSV_PATH / "usa.csv", index=False)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue