passing score

This commit is contained in:
Jorge Escobar 2022-01-04 16:58:08 -05:00
commit a16bf7cf58
3 changed files with 42 additions and 19 deletions

View file

@ -52,11 +52,11 @@ class CDCPlacesETL(ExtractTransformLoad):
# rename columns to be used in score
rename_fields = {
"Current asthma among adults aged >=18 years": field_names.ASTHMA_FIELD, # 'Current asthma among adults aged greater than or equal to 18 years'
"Coronary heart disease among adults aged >=18 years": field.names.HEART_DISEASE_FIELD, # "Coronary heart disease among adults aged greater than or equal to 18 years"
"Cancer (excluding skin cancer) among adults aged >=18 years": field_names.CANCER_FIELD, # 'Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years',
"Diagnosed diabetes among adults aged >=18 years": field_names.DIABETES_FIELD, # 'Diagnosed diabetes among adults aged greater than or equal to 18 years',
"Physical health not good for >=14 days among adults aged >=18 years": field_names.PHYS_HEALTH_NOT_GOOD_FIELD, # 'Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years',
"Current asthma among adults aged >=18 years": field_names.ASTHMA_FIELD,
"Coronary heart disease among adults aged >=18 years": field_names.HEART_DISEASE_FIELD,
"Cancer (excluding skin cancer) among adults aged >=18 years": field_names.CANCER_FIELD,
"Diagnosed diabetes among adults aged >=18 years": field_names.DIABETES_FIELD,
"Physical health not good for >=14 days among adults aged >=18 years": field_names.PHYS_HEALTH_NOT_GOOD_FIELD,
}
self.df.rename(
columns=rename_fields,

View file

@ -5,6 +5,7 @@ from data_pipeline.etl.sources.census_acs.etl_utils import (
retrieve_census_acs_data,
)
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
logger = get_module_logger(__name__)
@ -353,18 +354,29 @@ class CensusACSETL(ExtractTransformLoad):
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
) / df[self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED]
# strip columns
df = df[self.COLUMNS_TO_KEEP]
# Save results to self.
self.df = df
# rename columns to be used in score
rename_fields = {
"Percent of individuals < 200% Federal Poverty Line": field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
}
self.df.rename(
columns=rename_fields,
inplace=True,
errors="raise",
)
def load(self) -> None:
logger.info("Saving Census ACS Data")
# mkdir census
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df[self.COLUMNS_TO_KEEP].to_csv(
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
)
self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)
def validate(self) -> None:
logger.info("Validating Census ACS Data")

View file

@ -5,6 +5,7 @@ from data_pipeline.etl.sources.census_acs.etl_utils import (
retrieve_census_acs_data,
)
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
logger = get_module_logger(__name__)
@ -149,15 +150,6 @@ class CensusACS2010ETL(ExtractTransformLoad):
+ df["C17002_007E"]
) / df["C17002_001E"]
# Save results to self.
self.df = df
def load(self) -> None:
logger.info("Saving Census ACS Data")
# mkdir census
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
columns_to_include = [
self.GEOID_TRACT_FIELD_NAME,
self.UNEMPLOYED_FIELD_NAME,
@ -166,7 +158,7 @@ class CensusACS2010ETL(ExtractTransformLoad):
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
]
output_df = self.df[columns_to_include]
output_df = df[columns_to_include]
# Add the year to the end of every column, so when it's all joined in the
# score df, it's obvious which year this data is from.
@ -178,7 +170,26 @@ class CensusACS2010ETL(ExtractTransformLoad):
}
)
output_df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)
# rename columns to be used in score
rename_fields = {
"Percent of individuals < 100% Federal Poverty Line in 2010": field_names.CENSUS_POVERTY_LESS_THAN_100_FPL_FIELD_2010,
}
output_df.rename(
columns=rename_fields,
inplace=True,
errors="raise",
)
# Save results to self.
self.df = output_df
def load(self) -> None:
logger.info("Saving Census ACS Data")
# mkdir census
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)
def validate(self) -> None:
logger.info("Validating Census ACS Data")