From 4c7d729cf7e1a04a0a0c95d29da425efc8f6bbc0 Mon Sep 17 00:00:00 2001 From: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com> Date: Wed, 26 Jan 2022 13:57:45 -0500 Subject: [PATCH] Issue 1140 loss rate rounding (#1170) * updated loss rate rounding * fixing a typo in variable name * fixing typo in variable name * oops, now ready to push * updated pickle with float for loss rate columns * updated a typo, now multiplies all loss rates by 100 consistent with other pcts * updated with final pickles, all tests passing * updated incorporating lucas pr comments * changed literal to field name --- .../data_pipeline/etl/score/constants.py | 12 ++++++++++-- .../data_pipeline/etl/score/etl_score_post.py | 10 ++++++++++ .../snapshots/downloadable_data_expected.pkl | Bin 13685 -> 13511 bytes 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 1bd16abc..d38a56c2 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -69,12 +69,20 @@ PERCENT_PREFIXES_SUFFIXES = [ "percent", "Percentage", "Energy burden", - "loss rate", "greater than or equal to 18 years", field_names.PERCENTILE_FIELD_SUFFIX, ] - TILES_ROUND_NUM_DECIMALS = 2 + +# FEMA rounding columns +FEMA_ROUND_NUM_COLUMNS = [ + field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD, + field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD, + field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD, +] + +TILES_FEMA_ROUND_NUM_DECIMALS = 4 + # Tiles data: full field name, tile index name TILES_SCORE_COLUMNS = { field_names.GEOID_TRACT_FIELD: "GTF", diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py index c42d7b84..084ab495 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py @@ -259,6 +259,16 @@ class PostScoreETL(ExtractTransformLoad): pd.to_numeric(df_100, errors="coerce") ).astype("Int64") df[column] = df_int + + elif column in constants.FEMA_ROUND_NUM_COLUMNS: + # Convert loss rates by multiplying by 100 (they are percents) + # and then rounding appropriately. + df_100 = df[column] * 100 + df[column] = floor_series( + series=df_100.astype(float64), + number_of_decimals=constants.TILES_FEMA_ROUND_NUM_DECIMALS, + ) + else: # Round all other floats. df[column] = floor_series( diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 5608343f42a9ba3290dcecdc7dc2a7020a3241ce..1043c8bdbc2c1cc4b3efe9c726d167b3eaf05c8e 100644 GIT binary patch delta 239 zcmeyGbv%=`fo19*lZ~u@SSLG(i%l-)(v-G;u)V<~_vkfu`={-b?@s@womw&_DZ|E_ z)0=CuA)o8y4Q!bpwbB_IJ7cG4xZ5vT@6X%f%jj;u?PbN4zn`}Ol~1l=-wINr1XH7B z_UN^Yv#z^6lmRu8-&+7=4U*s2?e9gp~k!`Y&X)*w1 C4O_wh delta 193 zcmX?}`8A8Rfn{oz=|C-a+xFe*+?F-ZmhILtY$