mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 09:41:26 -08:00
Add several factors to comparator, including number of disadvantaged tracts and population (#2179)
This commit is contained in:
parent
c3a68cb251
commit
7384cc5fec
1 changed files with 95 additions and 3 deletions
|
@ -52,7 +52,7 @@ def compare_score(compare_to_version: str):
|
||||||
|
|
||||||
summary = "# Score Comparison Summary\n"
|
summary = "# Score Comparison Summary\n"
|
||||||
summary += f"Hi! I'm the Score Comparator. I compared the score in production (version {compare_to_version}) to the"
|
summary += f"Hi! I'm the Score Comparator. I compared the score in production (version {compare_to_version}) to the"
|
||||||
summary += " freshly calculated score. Here are the results.\n"
|
summary += " locally calculated score. Here are the results.\n"
|
||||||
|
|
||||||
log_title("Compare Score", "Compare production score to local score")
|
log_title("Compare Score", "Compare production score to local score")
|
||||||
|
|
||||||
|
@ -133,16 +133,108 @@ def compare_score(compare_to_version: str):
|
||||||
else " They don't match.\n"
|
else " They don't match.\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
production_total_population = production_score_df["Total population"].sum()
|
||||||
|
local_total_population = local_score_df["Total population"].sum()
|
||||||
|
|
||||||
|
log_info(
|
||||||
|
f"The total population in all census tracts in production is {production_total_population:,}."
|
||||||
|
)
|
||||||
|
log_info(
|
||||||
|
f"The total population in all census tracts locally is {local_total_population:,}."
|
||||||
|
)
|
||||||
|
log_info(
|
||||||
|
f"The difference in population is {abs(production_total_population - local_total_population):,}."
|
||||||
|
)
|
||||||
|
|
||||||
|
summary += f"* The total population in all census tracts in the production score is {production_total_population:,}."
|
||||||
|
summary += f" The total population in all census tracts locally is {local_total_population:,}."
|
||||||
|
summary += (
|
||||||
|
" They match!\n"
|
||||||
|
if production_total_population == local_total_population
|
||||||
|
else f" The difference is {abs(production_total_population - local_total_population):,}.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
production_disadvantaged_tracts_df = production_score_df.query(
|
||||||
|
"`Definition N community, including adjacency index tracts` == True"
|
||||||
|
)
|
||||||
|
local_disadvantaged_tracts_df = local_score_df.query(
|
||||||
|
"`Definition N community, including adjacency index tracts` == True"
|
||||||
|
)
|
||||||
|
|
||||||
|
production_disadvantaged_tracts_set = set(
|
||||||
|
production_disadvantaged_tracts_df.index.array
|
||||||
|
)
|
||||||
|
local_disadvantaged_tracts_set = set(
|
||||||
|
local_disadvantaged_tracts_df.index.array
|
||||||
|
)
|
||||||
|
|
||||||
|
production_pct_of_population_represented = (
|
||||||
|
production_disadvantaged_tracts_df["Total population"].sum()
|
||||||
|
/ production_total_population
|
||||||
|
)
|
||||||
|
local_pct_of_population_represented = (
|
||||||
|
local_disadvantaged_tracts_df["Total population"].sum()
|
||||||
|
/ local_total_population
|
||||||
|
)
|
||||||
|
|
||||||
|
log_info(
|
||||||
|
f"There are {len(production_disadvantaged_tracts_set):,} disadvantaged tracts in the production score."
|
||||||
|
)
|
||||||
|
log_info(
|
||||||
|
f"This represents {production_pct_of_population_represented:.1%} of the total population."
|
||||||
|
)
|
||||||
|
log_info(
|
||||||
|
f"There are {len(local_disadvantaged_tracts_set):,} in the locally generated score."
|
||||||
|
)
|
||||||
|
log_info(
|
||||||
|
f"This represents {local_pct_of_population_represented:.1%} of the total population."
|
||||||
|
)
|
||||||
|
log_info(
|
||||||
|
f"The difference is {abs(len(production_disadvantaged_tracts_set) - len(local_disadvantaged_tracts_set)):,} tract(s)."
|
||||||
|
)
|
||||||
|
|
||||||
|
summary += f"* There are {len(production_disadvantaged_tracts_set):,} disadvantaged tracts in the production score representing"
|
||||||
|
summary += f" {production_pct_of_population_represented:.1%} of the total population, and {len(local_disadvantaged_tracts_set):,}"
|
||||||
|
summary += f" in the locally generated score representing {local_pct_of_population_represented:.1%} of the total population."
|
||||||
|
summary += (
|
||||||
|
" The number of tracts match!\n"
|
||||||
|
if len(production_disadvantaged_tracts_set)
|
||||||
|
== len(local_disadvantaged_tracts_set)
|
||||||
|
else f" The difference is {abs(len(production_disadvantaged_tracts_set) - len(local_disadvantaged_tracts_set)):,} tract(s).\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
removed_tracts = production_disadvantaged_tracts_set.difference(
|
||||||
|
local_disadvantaged_tracts_set
|
||||||
|
)
|
||||||
|
added_tracts = local_disadvantaged_tracts_set.difference(
|
||||||
|
production_disadvantaged_tracts_set
|
||||||
|
)
|
||||||
|
|
||||||
|
log_info(
|
||||||
|
f"There are {len(removed_tracts):,} tract(s) marked as disadvantaged in the prod score that are not disadvantaged in the local score."
|
||||||
|
)
|
||||||
|
log_info(
|
||||||
|
f"There are {len(added_tracts):,} tract(s) marked as disadvantaged in the local score that are not disadvantaged in the prod score."
|
||||||
|
)
|
||||||
|
|
||||||
|
summary += (
|
||||||
|
f"* There are {len(removed_tracts):,} tract(s) marked as disadvantaged in the production score that are not disadvantaged in the locally"
|
||||||
|
" generated score (i.e. disadvantaged tracts that were removed by the new score)."
|
||||||
|
f" There are {len(added_tracts):,} tract(s) marked as disadvantaged in the locally generated score that are not disadvantaged in the"
|
||||||
|
" production score (i.e. disadvantaged tracts that were added by the new score).\n"
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
comparison_results_df = production_score_df.compare(
|
comparison_results_df = production_score_df.compare(
|
||||||
local_score_df, align_axis=1, keep_shape=False, keep_equal=False
|
local_score_df, align_axis=1, keep_shape=False, keep_equal=False
|
||||||
).rename({"self": "Production", "other": "Local"}, axis=1, level=1)
|
).rename({"self": "Production", "other": "Local"}, axis=1, level=1)
|
||||||
|
|
||||||
summary += f"* I compared all of the census tracts. There are {len(comparison_results_df.index):,} tracts with at least one score difference."
|
summary += "* I compared all values across all census tracts."
|
||||||
|
summary += f" There are {len(comparison_results_df.index):,} tracts with at least one difference."
|
||||||
summary += " Please examine the logs or run the score comparison locally to view them all.\n"
|
summary += " Please examine the logs or run the score comparison locally to view them all.\n"
|
||||||
log_info(
|
log_info(
|
||||||
f"There are {len(comparison_results_df.index)} rows with differences"
|
f"There are {len(comparison_results_df.index)} rows with any differences."
|
||||||
)
|
)
|
||||||
|
|
||||||
log_info("Those differences are:")
|
log_info("Those differences are:")
|
||||||
|
|
Loading…
Add table
Reference in a new issue