mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 01:51:17 -07:00
CDC SVI Index: Additions to data-pipeline and comparison tool (#1096)
* wip * working * working * rename * documentation * add link * add readme * update fieldnames * typo * add comparison tool * revise wording * variable change for FIPS * workding * wording in readme * cleanup wording * update comparison tool * final tune up * grammar and punctuation in the documentation * period * cleanup comments * added revisions * parallelism * PR feedback from Lucas * remove extraneous fields from comparison tool * style * updates * remove themes * formatting * remove referenes to percentile rank * remove referenes to percentile rank * typo in fieldnames * updates based on feedback from Lucas * fieldnames formatting * fix broken markdown link Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
parent
95a14adb35
commit
87e08f5fe1
7 changed files with 337 additions and 36 deletions
|
@ -50,7 +50,9 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"df_raw = pd.read_csv('master_raw_data.csv') #file from the 'Data collection.ipynb'\n",
|
||||
"df_raw = pd.read_csv(\n",
|
||||
" \"master_raw_data.csv\"\n",
|
||||
") # file from the 'Data collection.ipynb'\n",
|
||||
"print(df_raw.shape)\n",
|
||||
"print(df_raw.columns)"
|
||||
]
|
||||
|
@ -296,8 +298,8 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"#More advanced imputation method\n",
|
||||
"df_imputed_x = pd.read_excel('20200420_input_final.xlsx')\n",
|
||||
"# More advanced imputation method\n",
|
||||
"df_imputed_x = pd.read_excel(\"20200420_input_final.xlsx\")\n",
|
||||
"df_imputed_x.head()"
|
||||
]
|
||||
},
|
||||
|
@ -389,22 +391,48 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"data = df_imputed_x.copy() #data\n",
|
||||
"model1 = Model(data) #Initializing class variable\n",
|
||||
"data = df_imputed_x.copy() # data\n",
|
||||
"model1 = Model(data) # Initializing class variable\n",
|
||||
"\n",
|
||||
"# variables list to include in our regression model.\n",
|
||||
"columns_regress = ['HE_FOOD', 'HE_WALK', 'HE_VACANCY', 'HE_SUPRFND','HE_HLTHINS','BINGE_CrudePrev',\n",
|
||||
" 'CHECKUP_CrudePrev','BPHIGH_CrudePrev','SLEEP_CrudePrev', 'STROKE_CrudePrev', 'Drug Test', 'Pedalcyclist']\n",
|
||||
"columns_regress = [\n",
|
||||
" \"HE_FOOD\",\n",
|
||||
" \"HE_WALK\",\n",
|
||||
" \"HE_VACANCY\",\n",
|
||||
" \"HE_SUPRFND\",\n",
|
||||
" \"HE_HLTHINS\",\n",
|
||||
" \"BINGE_CrudePrev\",\n",
|
||||
" \"CHECKUP_CrudePrev\",\n",
|
||||
" \"BPHIGH_CrudePrev\",\n",
|
||||
" \"SLEEP_CrudePrev\",\n",
|
||||
" \"STROKE_CrudePrev\",\n",
|
||||
" \"Drug Test\",\n",
|
||||
" \"Pedalcyclist\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# multipliers for each variable to rescale the variables as higher the value better for the health score.\n",
|
||||
"multiply_cols = {'HE_FOOD': -1,'HE_WALK': 1,'HE_VACANCY': -1,'HE_SUPRFND':-1 , 'HE_HLTHINS': 1 ,'BINGE_CrudePrev': -1 , \n",
|
||||
" 'CHECKUP_CrudePrev': 1,'BPHIGH_CrudePrev': -1,'SLEEP_CrudePrev': -1,'STROKE_CrudePrev' : -1, 'Drug Test' : -1, 'Pedalcyclist' : -1,}\n",
|
||||
"multiply_cols = {\n",
|
||||
" \"HE_FOOD\": -1,\n",
|
||||
" \"HE_WALK\": 1,\n",
|
||||
" \"HE_VACANCY\": -1,\n",
|
||||
" \"HE_SUPRFND\": -1,\n",
|
||||
" \"HE_HLTHINS\": 1,\n",
|
||||
" \"BINGE_CrudePrev\": -1,\n",
|
||||
" \"CHECKUP_CrudePrev\": 1,\n",
|
||||
" \"BPHIGH_CrudePrev\": -1,\n",
|
||||
" \"SLEEP_CrudePrev\": -1,\n",
|
||||
" \"STROKE_CrudePrev\": -1,\n",
|
||||
" \"Drug Test\": -1,\n",
|
||||
" \"Pedalcyclist\": -1,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"#target variable\n",
|
||||
"target = 'life expectancy'\n",
|
||||
"# target variable\n",
|
||||
"target = \"life expectancy\"\n",
|
||||
"\n",
|
||||
"#storing the data and model weights to calculate health score.\n",
|
||||
"multiplied_zscore_data_le, params_le = model1.model_output(columns_regress,target,multiply_cols)"
|
||||
"# storing the data and model weights to calculate health score.\n",
|
||||
"multiplied_zscore_data_le, params_le = model1.model_output(\n",
|
||||
" columns_regress, target, multiply_cols\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -456,16 +484,42 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"columns_regress = ['HE_FOOD', 'HE_WALK', 'HE_VACANCY', 'HE_SUPRFND','HE_HLTHINS','BINGE_CrudePrev',\n",
|
||||
" 'CHECKUP_CrudePrev','BPHIGH_CrudePrev','SLEEP_CrudePrev', 'STROKE_CrudePrev', 'Drug Test', 'Pedalcyclist']\n",
|
||||
"columns_regress = [\n",
|
||||
" \"HE_FOOD\",\n",
|
||||
" \"HE_WALK\",\n",
|
||||
" \"HE_VACANCY\",\n",
|
||||
" \"HE_SUPRFND\",\n",
|
||||
" \"HE_HLTHINS\",\n",
|
||||
" \"BINGE_CrudePrev\",\n",
|
||||
" \"CHECKUP_CrudePrev\",\n",
|
||||
" \"BPHIGH_CrudePrev\",\n",
|
||||
" \"SLEEP_CrudePrev\",\n",
|
||||
" \"STROKE_CrudePrev\",\n",
|
||||
" \"Drug Test\",\n",
|
||||
" \"Pedalcyclist\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"multiply_cols = {'HE_FOOD': -1,'HE_WALK': 1,'HE_VACANCY': -1,'HE_SUPRFND':-1 , 'HE_HLTHINS': 1 ,'BINGE_CrudePrev': -1 , \n",
|
||||
" 'CHECKUP_CrudePrev': 1,'BPHIGH_CrudePrev': -1,'SLEEP_CrudePrev': -1,'STROKE_CrudePrev' : -1, 'Drug Test' : -1, 'Pedalcyclist' : -1,}\n",
|
||||
"multiply_cols = {\n",
|
||||
" \"HE_FOOD\": -1,\n",
|
||||
" \"HE_WALK\": 1,\n",
|
||||
" \"HE_VACANCY\": -1,\n",
|
||||
" \"HE_SUPRFND\": -1,\n",
|
||||
" \"HE_HLTHINS\": 1,\n",
|
||||
" \"BINGE_CrudePrev\": -1,\n",
|
||||
" \"CHECKUP_CrudePrev\": 1,\n",
|
||||
" \"BPHIGH_CrudePrev\": -1,\n",
|
||||
" \"SLEEP_CrudePrev\": -1,\n",
|
||||
" \"STROKE_CrudePrev\": -1,\n",
|
||||
" \"Drug Test\": -1,\n",
|
||||
" \"Pedalcyclist\": -1,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"target = 'PHLTH_CrudePrev'\n",
|
||||
"target = \"PHLTH_CrudePrev\"\n",
|
||||
"target_multiplier = -1\n",
|
||||
"\n",
|
||||
"multiplied_zscore_data_1, params_1 = model1.model_output(columns_regress,target,multiply_cols,target_multiplier = target_multiplier)"
|
||||
"multiplied_zscore_data_1, params_1 = model1.model_output(\n",
|
||||
" columns_regress, target, multiply_cols, target_multiplier=target_multiplier\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -517,20 +571,46 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"#regression of MHLTH_CrudePrev \n",
|
||||
"# regression of MHLTH_CrudePrev\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"columns_regress = ['HE_FOOD', 'HE_WALK', 'HE_VACANCY', 'HE_SUPRFND','HE_HLTHINS','BINGE_CrudePrev',\n",
|
||||
" 'CHECKUP_CrudePrev','BPHIGH_CrudePrev','SLEEP_CrudePrev', 'STROKE_CrudePrev', 'Drug Test', 'Pedalcyclist']\n",
|
||||
"columns_regress = [\n",
|
||||
" \"HE_FOOD\",\n",
|
||||
" \"HE_WALK\",\n",
|
||||
" \"HE_VACANCY\",\n",
|
||||
" \"HE_SUPRFND\",\n",
|
||||
" \"HE_HLTHINS\",\n",
|
||||
" \"BINGE_CrudePrev\",\n",
|
||||
" \"CHECKUP_CrudePrev\",\n",
|
||||
" \"BPHIGH_CrudePrev\",\n",
|
||||
" \"SLEEP_CrudePrev\",\n",
|
||||
" \"STROKE_CrudePrev\",\n",
|
||||
" \"Drug Test\",\n",
|
||||
" \"Pedalcyclist\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"multiply_cols = {'HE_FOOD': -1,'HE_WALK': 1,'HE_VACANCY': -1,'HE_SUPRFND':-1 , 'HE_HLTHINS': 1 ,'BINGE_CrudePrev': -1 , \n",
|
||||
" 'CHECKUP_CrudePrev': 1,'BPHIGH_CrudePrev': -1,'SLEEP_CrudePrev': -1,'STROKE_CrudePrev' : -1, 'Drug Test' : -1, 'Pedalcyclist' : -1,}\n",
|
||||
"multiply_cols = {\n",
|
||||
" \"HE_FOOD\": -1,\n",
|
||||
" \"HE_WALK\": 1,\n",
|
||||
" \"HE_VACANCY\": -1,\n",
|
||||
" \"HE_SUPRFND\": -1,\n",
|
||||
" \"HE_HLTHINS\": 1,\n",
|
||||
" \"BINGE_CrudePrev\": -1,\n",
|
||||
" \"CHECKUP_CrudePrev\": 1,\n",
|
||||
" \"BPHIGH_CrudePrev\": -1,\n",
|
||||
" \"SLEEP_CrudePrev\": -1,\n",
|
||||
" \"STROKE_CrudePrev\": -1,\n",
|
||||
" \"Drug Test\": -1,\n",
|
||||
" \"Pedalcyclist\": -1,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"target = 'MHLTH_CrudePrev'\n",
|
||||
"target = \"MHLTH_CrudePrev\"\n",
|
||||
"target_multiplier = -1\n",
|
||||
"\n",
|
||||
"multiplied_zscore_data_2, params_2 = model1.model_output(columns_regress,target,multiply_cols,target_multiplier = target_multiplier)"
|
||||
"multiplied_zscore_data_2, params_2 = model1.model_output(\n",
|
||||
" columns_regress, target, multiply_cols, target_multiplier=target_multiplier\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -579,10 +659,12 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"health_scores1 = HealthScores(params_1,params_2,params_le,multiplied_zscore_data_1,data['geoid'])\n",
|
||||
"final_data,weights_table = health_scores1.final_scaled_data()\n",
|
||||
"health_scores1 = HealthScores(\n",
|
||||
" params_1, params_2, params_le, multiplied_zscore_data_1, data[\"geoid\"]\n",
|
||||
")\n",
|
||||
"final_data, weights_table = health_scores1.final_scaled_data()\n",
|
||||
"\n",
|
||||
"health_scores1.weights #Can access the weights directly like this."
|
||||
"health_scores1.weights # Can access the weights directly like this."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
@ -276,6 +276,21 @@
|
|||
"mapping_inequality_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fceb3136",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cdc_svi_index_path = DATA_DIR / \"dataset\" / \"cdc_svi_index\" / \"usa.csv\"\n",
|
||||
"cdc_svi_index_df = pd.read_csv(\n",
|
||||
" cdc_svi_index_path,\n",
|
||||
" dtype={ExtractTransformLoad.GEOID_TRACT_FIELD_NAME: \"string\"},\n",
|
||||
")\n",
|
||||
"cdc_svi_index_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
@ -302,7 +317,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load alternative energy-related definition \n",
|
||||
"# Load alternative energy-related definition\n",
|
||||
"energy_definition_alternative_draft_path = (\n",
|
||||
" DATA_DIR / \"dataset\" / \"energy_definition_alternative_draft\" / \"usa.csv\"\n",
|
||||
")\n",
|
||||
|
@ -370,7 +385,8 @@
|
|||
" epa_rsei_aggregate_df,\n",
|
||||
" maryland_ejscreen_df,\n",
|
||||
" energy_definition_alternative_draft_df,\n",
|
||||
" michigan_ejscreen_df\n",
|
||||
" michigan_ejscreen_df,\n",
|
||||
" cdc_svi_index_df,\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"merged_df = functools.reduce(\n",
|
||||
|
@ -493,8 +509,8 @@
|
|||
" ),\n",
|
||||
" Index(\n",
|
||||
" method_name=\"EPA RSEI Aggregate Microdata\",\n",
|
||||
" priority_communities_field=field_names.EPA_RSEI_SCORE_THRESHOLD_FIELD\n",
|
||||
" ), \n",
|
||||
" priority_communities_field=field_names.EPA_RSEI_SCORE_THRESHOLD_FIELD,\n",
|
||||
" ),\n",
|
||||
" Index(\n",
|
||||
" method_name=\"Persistent Poverty\",\n",
|
||||
" priority_communities_field=PERSISTENT_POVERTY_TRACT_LEVEL_FIELD,\n",
|
||||
|
@ -502,15 +518,19 @@
|
|||
" Index(\n",
|
||||
" method_name=\"Maryland EJSCREEN\",\n",
|
||||
" priority_communities_field=field_names.MARYLAND_EJSCREEN_BURDENED_THRESHOLD_FIELD,\n",
|
||||
" ), \n",
|
||||
" ),\n",
|
||||
" Index(\n",
|
||||
" method_name=field_names.ENERGY_RELATED_COMMUNITIES_DEFINITION_ALTERNATIVE,\n",
|
||||
" priority_communities_field=field_names.ENERGY_RELATED_COMMUNITIES_DEFINITION_ALTERNATIVE,\n",
|
||||
" ),\n",
|
||||
" Index(\n",
|
||||
" method_name=\"CDC SVI Index\",\n",
|
||||
" priority_communities_field=field_names.CDC_SVI_INDEX_THEMES_PRIORITY_COMMUNITY,\n",
|
||||
" ),\n",
|
||||
" Index(\n",
|
||||
" method_name=\"Michigan EJSCREEN\",\n",
|
||||
" priority_communities_field=field_names.MICHIGAN_EJSCREEN_PRIORITY_COMMUNITY_FIELD,\n",
|
||||
" ), \n",
|
||||
" ),\n",
|
||||
" ]\n",
|
||||
" # Insert indices for each of the HOLC factors.\n",
|
||||
" # Note: since these involve no renaming, we write them using list comprehension.\n",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue