mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 08:21:16 -07:00
Cleaning up quick code (#1349)
Did some quick, mostly cosmetic changes and updates to the quick launch changes. This mostly entailed changing strings to constants and cleaning up some code to make it neater. Changes -- PR AMI, updating ag loss, and dropping pr from some threshold counts.
This commit is contained in:
parent
df268d4d91
commit
aea49cbb5a
6 changed files with 341 additions and 348 deletions
|
@ -22,7 +22,6 @@ DATA_CENSUS_CSV_DIR = DATA_CENSUS_DIR / "csv"
|
|||
DATA_CENSUS_CSV_FILE_PATH = DATA_CENSUS_CSV_DIR / "us.csv"
|
||||
DATA_CENSUS_CSV_STATE_FILE_PATH = DATA_CENSUS_CSV_DIR / "fips_states_2010.csv"
|
||||
|
||||
|
||||
# Score paths
|
||||
DATA_SCORE_DIR = DATA_PATH / "score"
|
||||
|
||||
|
@ -66,6 +65,9 @@ CENSUS_COUNTIES_COLUMNS = ["USPS", "GEOID", "NAME"]
|
|||
# Drop FIPS codes from map
|
||||
DROP_FIPS_CODES = ["66", "78"]
|
||||
|
||||
# Drop FIPS codes from incrementing
|
||||
DROP_FIPS_FROM_NON_WTD_THRESHOLDS = "72"
|
||||
|
||||
# Percent prefixes for rounding
|
||||
PERCENT_PREFIXES_SUFFIXES = [
|
||||
"Percent",
|
||||
|
|
|
@ -298,33 +298,20 @@ class ScoreETL(ExtractTransformLoad):
|
|||
] = df[input_column_name].rank(pct=True, ascending=ascending)
|
||||
|
||||
else:
|
||||
# For agricultural loss, we are using whether there is value at all to determine percentile
|
||||
# This is not the most thoughtfully written code, but it works.
|
||||
|
||||
# Take only rows with agrivalue
|
||||
tmp_df = df[df[field_names.AGRICULTURAL_VALUE_BOOL_FIELD] == 1][
|
||||
[input_column_name, field_names.GEOID_TRACT_FIELD]
|
||||
].copy()
|
||||
|
||||
# Construct a percentile only among those tracts
|
||||
tmp_df["temporary_ranking"] = tmp_df[input_column_name].transform(
|
||||
lambda x: x.rank(pct=True, ascending=True)
|
||||
)
|
||||
|
||||
# # Create a map for just those tracts and map it onto the df
|
||||
temporary_ranking = tmp_df.set_index(field_names.GEOID_TRACT_FIELD)[
|
||||
"temporary_ranking"
|
||||
].to_dict()
|
||||
|
||||
# For agricultural loss, we are using whether there is value at all to determine percentile and then
|
||||
# filling places where the value is False with 0
|
||||
df[
|
||||
f"{output_column_name_root}"
|
||||
f"{field_names.PERCENTILE_FIELD_SUFFIX}"
|
||||
] = np.where(
|
||||
df[field_names.AGRICULTURAL_VALUE_BOOL_FIELD].isna(),
|
||||
np.nan,
|
||||
df[field_names.GEOID_TRACT_FIELD]
|
||||
.map(temporary_ranking)
|
||||
.fillna(0),
|
||||
] = (
|
||||
df.where(
|
||||
df[field_names.AGRICULTURAL_VALUE_BOOL_FIELD].astype(float)
|
||||
== 1.0
|
||||
)[input_column_name]
|
||||
.rank(ascending=ascending, pct=True)
|
||||
.fillna(
|
||||
df[field_names.AGRICULTURAL_VALUE_BOOL_FIELD].astype(float)
|
||||
)
|
||||
)
|
||||
|
||||
# Create the urban/rural percentiles.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue