mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 17:44:20 -08:00
Merge branch 'emma-nechamkin/release/score-narwhal' of github.com:usds/justice40-tool into emma-nechamkin/release/score-narwhal
This commit is contained in:
commit
e78c6d0fef
2 changed files with 28 additions and 3 deletions
|
@ -380,7 +380,8 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
), "Join against national tract list ADDED rows"
|
), "Join against national tract list ADDED rows"
|
||||||
logger.info(
|
logger.info(
|
||||||
"Dropped %s tracts not in the 2010 tract data",
|
"Dropped %s tracts not in the 2010 tract data",
|
||||||
pre_join_len - census_tract_df[field_names.GEOID_TRACT_FIELD].nunique()
|
pre_join_len
|
||||||
|
- census_tract_df[field_names.GEOID_TRACT_FIELD].nunique(),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Now sanity-check the merged df.
|
# Now sanity-check the merged df.
|
||||||
|
@ -551,6 +552,9 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
# For *Non-Natural Space*, we may only want to include tracts that have at least 35 acreas, I think. This will
|
# For *Non-Natural Space*, we may only want to include tracts that have at least 35 acreas, I think. This will
|
||||||
# get rid of tracts that we think are aberrations statistically. Right now, we have left this out
|
# get rid of tracts that we think are aberrations statistically. Right now, we have left this out
|
||||||
# pending ground-truthing.
|
# pending ground-truthing.
|
||||||
|
#
|
||||||
|
# For *Traffic Barriers*, we want to exclude low population tracts, which may have high burden because they are
|
||||||
|
# low population alone. We set this low population constant in the if statement.
|
||||||
|
|
||||||
for numeric_column in numeric_columns:
|
for numeric_column in numeric_columns:
|
||||||
drop_tracts = []
|
drop_tracts = []
|
||||||
|
@ -575,6 +579,17 @@ class ScoreETL(ExtractTransformLoad):
|
||||||
f"Dropping {len(drop_tracts)} tracts from Linguistic Isolation"
|
f"Dropping {len(drop_tracts)} tracts from Linguistic Isolation"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
elif numeric_column == field_names.DOT_TRAVEL_BURDEN_FIELD:
|
||||||
|
# Not having any people appears to be correlated with transit burden, but also doesn't represent
|
||||||
|
# on the ground need. For now, we remove these tracts from the percentile calculation. (To be QAed live)
|
||||||
|
low_population = 20
|
||||||
|
drop_tracts = df_copy[
|
||||||
|
df_copy[field_names.TOTAL_POP_FIELD] <= low_population
|
||||||
|
][field_names.GEOID_TRACT_FIELD].to_list()
|
||||||
|
logger.info(
|
||||||
|
f"Dropping {len(drop_tracts)} tracts from DOT traffic burden"
|
||||||
|
)
|
||||||
|
|
||||||
df_copy = self._add_percentiles_to_df(
|
df_copy = self._add_percentiles_to_df(
|
||||||
df=df_copy,
|
df=df_copy,
|
||||||
input_column_name=numeric_column,
|
input_column_name=numeric_column,
|
||||||
|
|
|
@ -60,6 +60,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
field_names.GEOID_TRACT_FIELD
|
field_names.GEOID_TRACT_FIELD
|
||||||
]
|
]
|
||||||
self.GEOMETRY_FIELD_NAME = "geometry"
|
self.GEOMETRY_FIELD_NAME = "geometry"
|
||||||
|
self.LAND_FIELD_NAME = "ALAND10"
|
||||||
|
|
||||||
# We will adjust this upwards while there is some fractional value
|
# We will adjust this upwards while there is some fractional value
|
||||||
# in the score. This is a starting value.
|
# in the score. This is a starting value.
|
||||||
|
@ -86,13 +87,22 @@ class GeoScoreETL(ExtractTransformLoad):
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Reading US GeoJSON (~6 minutes)")
|
logger.info("Reading US GeoJSON (~6 minutes)")
|
||||||
self.geojson_usa_df = gpd.read_file(
|
full_geojson_usa_df = gpd.read_file(
|
||||||
self.CENSUS_USA_GEOJSON,
|
self.CENSUS_USA_GEOJSON,
|
||||||
dtype={self.GEOID_FIELD_NAME: "string"},
|
dtype={self.GEOID_FIELD_NAME: "string"},
|
||||||
usecols=[self.GEOID_FIELD_NAME, self.GEOMETRY_FIELD_NAME],
|
usecols=[
|
||||||
|
self.GEOID_FIELD_NAME,
|
||||||
|
self.GEOMETRY_FIELD_NAME,
|
||||||
|
self.LAND_FIELD_NAME,
|
||||||
|
],
|
||||||
low_memory=False,
|
low_memory=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# We only want to keep tracts to visualize that have non-0 land
|
||||||
|
self.geojson_usa_df = full_geojson_usa_df[
|
||||||
|
full_geojson_usa_df[self.LAND_FIELD_NAME] > 0
|
||||||
|
]
|
||||||
|
|
||||||
logger.info("Reading score CSV")
|
logger.info("Reading score CSV")
|
||||||
self.score_usa_df = pd.read_csv(
|
self.score_usa_df = pd.read_csv(
|
||||||
self.TILE_SCORE_CSV,
|
self.TILE_SCORE_CSV,
|
||||||
|
|
Loading…
Add table
Reference in a new issue