cdc_places update

This commit is contained in:
Jorge Escobar 2022-01-03 18:14:36 -05:00
parent 006493ab24
commit c9ee6a43c1

View file

@ -2,6 +2,7 @@ import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger, download_file_from_url
from data_pipeline.score import field_names
logger = get_module_logger(__name__)
@ -49,6 +50,20 @@ class CDCPlacesETL(ExtractTransformLoad):
values=self.CDC_VALUE_FIELD_NAME,
)
# rename columns to be used in score
rename_fields = {
"Current asthma among adults aged >=18 years": field_names.ASTHMA_FIELD, # 'Current asthma among adults aged greater than or equal to 18 years'
"Coronary heart disease among adults aged >=18 years": field.names.HEART_DISEASE_FIELD, # "Coronary heart disease among adults aged greater than or equal to 18 years"
"Cancer (excluding skin cancer) among adults aged >=18 years": field_names.CANCER_FIELD, # 'Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years',
"Diagnosed diabetes among adults aged >=18 years": field_names.DIABETES_FIELD, # 'Diagnosed diabetes among adults aged greater than or equal to 18 years',
"Physical health not good for >=14 days among adults aged >=18 years": field_names.PHYS_HEALTH_NOT_GOOD_FIELD, # 'Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years',
}
self.df.rename(
columns=rename_fields,
inplace=True,
errors="raise",
)
# Make the index (the census tract ID) a column, not the index.
self.df.reset_index(inplace=True)