Definition L updates (#862)

* Changing FEMA risk measure 

* Adding "basic stats" feature to comparison tool 

* Tweaking Definition L
This commit is contained in:
Lucas Merrill Brown 2021-11-05 15:43:52 -04:00 committed by GitHub
parent 0a65060c0c
commit 03e59f2abd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 265 additions and 63 deletions

View file

@ -291,6 +291,7 @@ class ScoreETL(ExtractTransformLoad):
field_names.LIFE_EXPECTANCY_FIELD,
field_names.ENERGY_BURDEN_FIELD,
field_names.FEMA_RISK_FIELD,
field_names.FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD,
field_names.URBAN_HERUISTIC_FIELD,
field_names.AIR_TOXICS_CANCER_RISK_FIELD,
field_names.RESPITORY_HAZARD_FIELD,

View file

@ -25,10 +25,15 @@ class NationalRiskIndexETL(ExtractTransformLoad):
"FEMA Risk Index Expected Annual Loss Score"
)
self.EXPECTED_ANNUAL_LOSS_RATE = (
"FEMA Risk Index Expected Annual Loss Rate"
)
# Note: also need to edit transform step to add fields to output.
self.COLUMNS_TO_KEEP = [
self.GEOID_FIELD_NAME,
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
self.EXPECTED_ANNUAL_LOSS_RATE,
]
self.df: pd.DataFrame
@ -37,7 +42,7 @@ class NationalRiskIndexETL(ExtractTransformLoad):
"""Unzips NRI dataset from the FEMA data source and writes the files
to the temporary data folder for use in the transform() method
"""
logger.info("Downloading National Risk Index Data")
logger.info("Downloading 405MB National Risk Index Data")
super().extract(
self.NRI_FTP_URL,
self.TMP_PATH,
@ -72,11 +77,58 @@ class NationalRiskIndexETL(ExtractTransformLoad):
inplace=True,
)
# Calculate a risk score that does not include FEMA's measure of community vulnerability.
disaster_categories = [
"AVLN", # Avalanche
"CFLD", # Coastal Flooding
"CWAV", # Cold Wave
"DRGT", # Drought
"ERQK", # Earthquake
"HAIL", # Hail
"HWAV", # Heat Wave
"HRCN", # Hurricane
"ISTM", # Ice Storm
"LNDS", # Landslide
"LTNG", # Lightning
"RFLD", # Riverine Flooding
"SWND", # Strong Wind
"TRND", # Tornado
"TSUN", # Tsunami
"VLCN", # Volcanic Activity
"WFIR", # Wildfire
"WNTW", # Winter Weather
]
# Note: I'm not sure why pylint is so upset with this particular dataframe,
# but it may be a known bug. https://github.com/PyCQA/pylint/issues/1498
for category in disaster_categories:
df_nri[ # pylint: disable=unsupported-assignment-operation
f"{category}"
] = (
df_nri[ # pylint: disable=unsubscriptable-object
f"{category}_EALT"
] # Expected Annual Loss - Total
/ df_nri[ # pylint: disable=unsubscriptable-object
f"{category}_EXPT"
]
)
df_nri[ # pylint: disable=unsupported-assignment-operation
self.EXPECTED_ANNUAL_LOSS_RATE
] = df_nri[ # pylint: disable=unsubscriptable-object
disaster_categories
].sum(
axis=1
)
# Reduce columns.
# Note: normally we wait until writing to CSV for this step, but since the file is so huge,
# move this up here for performance reasons.
df_nri = df_nri[ # pylint: disable=unsubscriptable-object
[self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME, TRACT_COL]
[
self.RISK_INDEX_EXPECTED_ANNUAL_LOSS_SCORE_FIELD_NAME,
self.EXPECTED_ANNUAL_LOSS_RATE,
TRACT_COL,
]
]
# get the full list of Census Block Groups from the ACS data

View file

@ -318,6 +318,28 @@
"# )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b74b0bf",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# Create a FEMA risk index score\n",
"# Note: this can be deleted at a later date.\n",
"FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (\n",
" \"FEMA Risk Index Expected Annual Loss Rate\"\n",
")\n",
"FEMA_COMMUNITIES = \"FEMA Risk Index (top 30th percentile)\"\n",
"merged_df[FEMA_COMMUNITIES] = (\n",
" merged_df[f\"{FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD} (percentile)\"] > 0.70\n",
")\n",
"\n",
"merged_df[FEMA_COMMUNITIES].describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -406,6 +428,11 @@
" priority_communities_field=PERSISTENT_POVERTY_CBG_LEVEL_FIELD,\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=FEMA_COMMUNITIES,\n",
" priority_communities_field=FEMA_COMMUNITIES,\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" ]\n",
")\n",
"\n",
@ -439,11 +466,6 @@
"\n",
"census_tract_indices = [\n",
" Index(\n",
" method_name=\"Persistent Poverty\",\n",
" priority_communities_field=PERSISTENT_POVERTY_TRACT_LEVEL_FIELD,\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
" Index(\n",
" method_name=\"CalEnviroScreen 4.0\",\n",
" priority_communities_field=\"calenviroscreen_priority_community\",\n",
" other_census_tract_fields_to_keep=[\n",
@ -451,6 +473,27 @@
" CALENVIROSCREEN_PERCENTILE_FIELD,\n",
" ],\n",
" ),\n",
" Index(\n",
" method_name=\"Persistent Poverty\",\n",
" priority_communities_field=PERSISTENT_POVERTY_TRACT_LEVEL_FIELD,\n",
" other_census_tract_fields_to_keep=[],\n",
" ),\n",
"]\n",
"\n",
"# These fields will be used for statistical comparisons.\n",
"comparison_fields = [\n",
" \"Percent of individuals < 100% Federal Poverty Line\",\n",
" \"Percent of individuals < 200% Federal Poverty Line\",\n",
" \"Median household income (% of AMI)\",\n",
" \"Percent of households in linguistic isolation\",\n",
" \"Percent individuals age 25 or over with less than high school degree\",\n",
" \"Linguistic isolation (percent)\",\n",
" \"Unemployed civilians (percent)\",\n",
" \"Median household income in the past 12 months\",\n",
" URBAN_HEURISTIC_FIELD,\n",
" LIFE_EXPECTANCY_FIELD,\n",
" HEALTH_INSURANCE_FIELD,\n",
" BAD_HEALTH_FIELD,\n",
"]"
]
},
@ -735,7 +778,120 @@
"write_state_distribution_excel(\n",
" state_distribution_df=state_distribution_df,\n",
" file_path=COMPARISON_OUTPUTS_DIR / f\"{file_prefix}.xlsx\",\n",
")"
")\n",
"\n",
"# Note: this is helpful because this file is extremely long-running, so it alerts the user when the first step\n",
"# of data analysis is done. Can be removed when converted into scripts. -LMB.\n",
"import os\n",
"\n",
"os.system(\"say 'state analysis is written.'\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4d0e783",
"metadata": {},
"outputs": [],
"source": [
"directory = COMPARISON_OUTPUTS_DIR / \"cbg_basic_stats\"\n",
"directory.mkdir(parents=True, exist_ok=True)\n",
"\n",
"# TODO: this Excel-writing function is extremely similar to other Excel-writing functions in this notebook.\n",
"# Refactor to use the same Excel-writing function.\n",
"def write_basic_stats_excel(\n",
" basic_stats_df: pd.DataFrame, file_path: pathlib.PosixPath\n",
") -> None:\n",
" \"\"\"Write the dataframe to excel with special formatting.\"\"\"\n",
" # Create a Pandas Excel writer using XlsxWriter as the engine.\n",
" writer = pd.ExcelWriter(file_path, engine=\"xlsxwriter\")\n",
"\n",
" # Convert the dataframe to an XlsxWriter Excel object. We also turn off the\n",
" # index column at the left of the output dataframe.\n",
" basic_stats_df.to_excel(writer, sheet_name=\"Sheet1\", index=False)\n",
"\n",
" # Get the xlsxwriter workbook and worksheet objects.\n",
" workbook = writer.book\n",
" worksheet = writer.sheets[\"Sheet1\"]\n",
" worksheet.autofilter(0, 0, basic_stats_df.shape[0], basic_stats_df.shape[1])\n",
"\n",
" # Set a width parameter for all columns\n",
" # Note: this is parameterized because every call to `set_column` requires setting the width.\n",
" column_width = 15\n",
"\n",
" for column in basic_stats_df.columns:\n",
" # Turn the column index into excel ranges (e.g., column #95 is \"CR\" and the range may be \"CR2:CR53\").\n",
" column_index = basic_stats_df.columns.get_loc(column)\n",
" column_character = get_excel_column_name(column_index)\n",
"\n",
" # Set all columns to larger width\n",
" worksheet.set_column(\n",
" f\"{column_character}:{column_character}\", column_width\n",
" )\n",
"\n",
" # Add green to red conditional formatting.\n",
" column_ranges = (\n",
" f\"{column_character}2:{column_character}{len(basic_stats_df)+1}\"\n",
" )\n",
" worksheet.conditional_format(\n",
" column_ranges,\n",
" # Min: green, max: red.\n",
" {\n",
" \"type\": \"2_color_scale\",\n",
" \"min_color\": \"#00FF7F\",\n",
" \"max_color\": \"#C82538\",\n",
" },\n",
" )\n",
"\n",
" # Special formatting for all percent columns\n",
" # Note: we can't just search for `percent`, because that's included in the word `percentile`.\n",
" if (\n",
" \"percent \" in column\n",
" or \"(percent)\" in column\n",
" or \"Percent \" in column\n",
" ):\n",
" # Make these columns percentages.\n",
" percentage_format = workbook.add_format({\"num_format\": \"0%\"})\n",
" worksheet.set_column(\n",
" f\"{column_character}:{column_character}\",\n",
" column_width,\n",
" percentage_format,\n",
" )\n",
"\n",
" header_format = workbook.add_format(\n",
" {\"bold\": True, \"text_wrap\": True, \"valign\": \"bottom\"}\n",
" )\n",
"\n",
" # Overwrite both the value and the format of each header cell\n",
" # This is because xlsxwriter / pandas has a known bug where it can't wrap text for a dataframe.\n",
" # See https://stackoverflow.com/questions/42562977/xlsxwriter-text-wrap-not-working.\n",
" for col_num, value in enumerate(basic_stats_df.columns.values):\n",
" worksheet.write(0, col_num, value, header_format)\n",
"\n",
" writer.save()\n",
"\n",
"\n",
"for index in census_block_group_indices:\n",
" print(f\"Basic stats for {index.method_name}\")\n",
" temp_df = merged_df\n",
" temp_df[index.priority_communities_field] = (\n",
" temp_df[index.priority_communities_field] == True\n",
" )\n",
"\n",
" # print(sum(temp_df[\"is_a_priority_cbg\"]))\n",
" grouped_df = (\n",
" temp_df.groupby(index.priority_communities_field).mean().reset_index()\n",
" )\n",
" result_df = grouped_df[\n",
" [index.priority_communities_field] + comparison_fields\n",
" ]\n",
" result_df.to_csv(\n",
" directory / f\"{index.method_name} Basic Stats.csv\", index=False\n",
" )\n",
" write_basic_stats_excel(\n",
" basic_stats_df=result_df,\n",
" file_path=directory / f\"{index.method_name} Basic Stats.xlsx\",\n",
" )"
]
},
{
@ -918,21 +1074,6 @@
" )\n",
"\n",
"\n",
"comparison_fields = [\n",
" \"Percent of individuals < 100% Federal Poverty Line\",\n",
" \"Percent of individuals < 200% Federal Poverty Line\",\n",
" \"Median household income (% of AMI)\",\n",
" \"Percent of households in linguistic isolation\",\n",
" \"Percent individuals age 25 or over with less than high school degree\",\n",
" \"Linguistic isolation (percent)\",\n",
" \"Unemployed civilians (percent)\",\n",
" \"Median household income in the past 12 months\",\n",
" URBAN_HEURISTIC_FIELD,\n",
" LIFE_EXPECTANCY_FIELD,\n",
" HEALTH_INSURANCE_FIELD,\n",
" BAD_HEALTH_FIELD,\n",
"]\n",
"\n",
"for (index_a, index_b) in itertools.combinations(census_block_group_indices, 2):\n",
" print(f\"Comparing {index_a} and {index_b}.\")\n",
" compare_cbg_scores(\n",

View file

@ -57,6 +57,9 @@ AMI_FIELD = "Area Median Income (State or metropolitan)"
# Climate
FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score"
FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD = (
"FEMA Risk Index Expected Annual Loss Rate"
)
# Environment
DIESEL_FIELD = "Diesel particulate matter"

View file

@ -9,7 +9,7 @@ logger = get_module_logger(__name__)
class ScoreL(Score):
def __init__(self, df: pd.DataFrame) -> None:
self.LOW_INCOME_THRESHOLD: float = 0.60
self.LOW_INCOME_THRESHOLD: float = 0.65
self.ENVIRONMENTAL_BURDEN_THRESHOLD: float = 0.90
super().__init__(df)
@ -71,7 +71,7 @@ class ScoreL(Score):
> self.LOW_INCOME_THRESHOLD
) & (
self.df[
field_names.FEMA_RISK_FIELD
field_names.FEMA_EXPECTED_ANNUAL_LOSS_RATE_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
@ -170,13 +170,16 @@ class ScoreL(Score):
# Low income: In 60th percentile or above for percent of block group population
# of households where household income is less than or equal to twice the federal
# poverty level. Source: Census's American Community Survey]
return (
self.df[
field_names.RMP_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX
]
pollution_criteria = (
self.df[field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
) & (
) | (
self.df[field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX]
> self.ENVIRONMENTAL_BURDEN_THRESHOLD
)
return pollution_criteria & (
self.df[
field_names.POVERTY_LESS_THAN_200_FPL_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX

View file

@ -1,6 +1,6 @@
TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE
40300,05007040300,10.492015,Very Low,15.3494,11.5
20100,05001020100,14.705854,Relatively Low,36.725828,12.5
40500,15007040500,10.234981,Very Low,13.997993,13.5
21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5
21101,15001021101,19.434585,Relatively Low,53.392265,15.5
TRACT,TRACTFIPS,RISK_SCORE,RISK_RATNG,RISK_NPCTL,EAL_SCORE,AVLN_EALT,CFLD_EALT,CWAV_EALT,DRGT_EALT,ERQK_EALT,HAIL_EALT,HWAV_EALT,HRCN_EALT,ISTM_EALT,LNDS_EALT,LTNG_EALT,RFLD_EALT,SWND_EALT,TRND_EALT,TSUN_EALT,VLCN_EALT,WFIR_EALT,WNTW_EALT,AVLN_EXPT,CFLD_EXPT,CWAV_EXPT,DRGT_EXPT,ERQK_EXPT,HAIL_EXPT,HWAV_EXPT,HRCN_EXPT,ISTM_EXPT,LNDS_EXPT,LTNG_EXPT,RFLD_EXPT,SWND_EXPT,TRND_EXPT,TSUN_EXPT,VLCN_EXPT,WFIR_EXPT,WNTW_EXPT
40300,05007040300,10.492015,Very Low,15.3494,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5
20100,05001020100,14.705854,Relatively Low,36.725828,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5
40500,15007040500,10.234981,Very Low,13.997993,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5
21010,15001021010,21.537231,Relatively Moderate,59.488033,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5
21101,15001021101,19.434585,Relatively Low,53.392265,15.5,16.5,17.5,18.5,19.5,20.5,21.5,22.5,23.5,24.5,25.5,26.5,27.5,28.5,29.5,30.5,31.5,32.5,33.5,34.5,35.5,36.5,37.5,38.5,39.5,40.5,41.5,42.5,43.5,44.5,45.5,46.5,47.5,48.5,49.5,50.5,51.5

1 TRACT TRACTFIPS RISK_SCORE RISK_RATNG RISK_NPCTL EAL_SCORE AVLN_EALT CFLD_EALT CWAV_EALT DRGT_EALT ERQK_EALT HAIL_EALT HWAV_EALT HRCN_EALT ISTM_EALT LNDS_EALT LTNG_EALT RFLD_EALT SWND_EALT TRND_EALT TSUN_EALT VLCN_EALT WFIR_EALT WNTW_EALT AVLN_EXPT CFLD_EXPT CWAV_EXPT DRGT_EXPT ERQK_EXPT HAIL_EXPT HWAV_EXPT HRCN_EXPT ISTM_EXPT LNDS_EXPT LTNG_EXPT RFLD_EXPT SWND_EXPT TRND_EXPT TSUN_EXPT VLCN_EXPT WFIR_EXPT WNTW_EXPT
2 40300 05007040300 10.492015 Very Low 15.3494 11.5 12.5 13.5 14.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5
3 20100 05001020100 14.705854 Relatively Low 36.725828 12.5 13.5 14.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5
4 40500 15007040500 10.234981 Very Low 13.997993 13.5 14.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5 49.5
5 21010 15001021010 21.537231 Relatively Moderate 59.488033 14.5 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5 49.5 50.5
6 21101 15001021101 19.434585 Relatively Low 53.392265 15.5 16.5 17.5 18.5 19.5 20.5 21.5 22.5 23.5 24.5 25.5 26.5 27.5 28.5 29.5 30.5 31.5 32.5 33.5 34.5 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5 47.5 48.5 49.5 50.5 51.5

View file

@ -1,11 +1,11 @@
GEOID10,FEMA Risk Index Expected Annual Loss Score
050070403001,11.5
050070403002,11.5
050010201001,12.5
050010201002,12.5
150070405001,13.5
150070405002,13.5
150010210101,14.5
150010210102,14.5
150010211011,15.5
150010211012,15.5
GEOID10,FEMA Risk Index Expected Annual Loss Score,FEMA Risk Index Expected Annual Loss Rate
050070403001,11.5,9.540442348853764
050070403002,11.5,9.540442348853764
050010201001,12.5,9.759472262661436
050010201002,12.5,9.759472262661436
150070405001,13.5,9.967264470453644
150070405002,13.5,9.967264470453644
150010210101,14.5,10.16467498073544
150010210102,14.5,10.16467498073544
150010211011,15.5,10.352473850464468
150010211012,15.5,10.352473850464468

1 GEOID10 FEMA Risk Index Expected Annual Loss Score FEMA Risk Index Expected Annual Loss Rate
2 050070403001 11.5 9.540442348853764
3 050070403002 11.5 9.540442348853764
4 050010201001 12.5 9.759472262661436
5 050010201002 12.5 9.759472262661436
6 150070405001 13.5 9.967264470453644
7 150070405002 13.5 9.967264470453644
8 150010210101 14.5 10.16467498073544
9 150010210102 14.5 10.16467498073544
10 150010211011 15.5 10.352473850464468
11 150010211012 15.5 10.352473850464468

View file

@ -1,11 +1,11 @@
GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score
050070403001,05007040300,11.5
050070403002,05007040300,11.5
050010201001,05001020100,12.5
050010201002,05001020100,12.5
150070405001,15007040500,13.5
150070405002,15007040500,13.5
150010210101,15001021010,14.5
150010210102,15001021010,14.5
150010211011,15001021101,15.5
150010211012,15001021101,15.5
GEOID10,GEOID10_TRACT,FEMA Risk Index Expected Annual Loss Score,FEMA Risk Index Expected Annual Loss Rate
050070403001,05007040300,11.5,9.540442348853764
050070403002,05007040300,11.5,9.540442348853764
050010201001,05001020100,12.5,9.759472262661436
050010201002,05001020100,12.5,9.759472262661436
150070405001,15007040500,13.5,9.967264470453644
150070405002,15007040500,13.5,9.967264470453644
150010210101,15001021010,14.5,10.164674980735441
150010210102,15001021010,14.5,10.164674980735441
150010211011,15001021101,15.5,10.352473850464467
150010211012,15001021101,15.5,10.352473850464467

1 GEOID10 GEOID10_TRACT FEMA Risk Index Expected Annual Loss Score FEMA Risk Index Expected Annual Loss Rate
2 050070403001 05007040300 11.5 9.540442348853764
3 050070403002 05007040300 11.5 9.540442348853764
4 050010201001 05001020100 12.5 9.759472262661436
5 050010201002 05001020100 12.5 9.759472262661436
6 150070405001 15007040500 13.5 9.967264470453644
7 150070405002 15007040500 13.5 9.967264470453644
8 150010210101 15001021010 14.5 10.164674980735441
9 150010210102 15001021010 14.5 10.164674980735441
10 150010211011 15001021101 15.5 10.352473850464467
11 150010211012 15001021101 15.5 10.352473850464467

View file

@ -61,9 +61,10 @@ class TestNationalRiskIndexETL:
)
# execution
etl.transform()
# validation
assert etl.df.shape == (10, 3)
assert etl.df.equals(expected)
assert etl.df.shape == (10, 4)
pd.testing.assert_frame_equal(etl.df, expected)
def test_load(self, mock_etl):
"""Tests the load() method for NationalRiskIndexETL
@ -89,7 +90,8 @@ class TestNationalRiskIndexETL:
# execution
etl.load()
output = pd.read_csv(output_path, dtype={BLOCK_COL: str})
# validation
assert output_path.exists()
assert output.shape == (10, 2)
assert output.equals(expected)
assert output.shape == (10, 3)
pd.testing.assert_frame_equal(output, expected)