mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 08:21:16 -07:00
Download column order completed (#1077)
* Download column order completed * Kameron changes * Lucas and Beth column order changes * cdc_places update * passing score * pandas error * checkpoint * score passing * rounding complete - percentages still showing one decimal * fixing tests * fixing percentages * updating comment * int percentages! 🎉🎉 * forgot to pass back to df * passing tests Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
parent
98ff4bd9d8
commit
d686bb856e
13 changed files with 232 additions and 133 deletions
|
@ -5,6 +5,7 @@ from data_pipeline.etl.sources.census_acs.etl_utils import (
|
|||
retrieve_census_acs_data,
|
||||
)
|
||||
from data_pipeline.utils import get_module_logger
|
||||
from data_pipeline.score import field_names
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
@ -22,7 +23,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.TOTAL_UNEMPLOYED_FIELD,
|
||||
self.TOTAL_IN_LABOR_FORCE,
|
||||
]
|
||||
self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)"
|
||||
self.UNEMPLOYED_FIELD_NAME = "Unemployment (percent)"
|
||||
|
||||
self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)"
|
||||
self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = (
|
||||
|
@ -353,18 +354,29 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
|
||||
) / df[self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED]
|
||||
|
||||
# strip columns
|
||||
df = df[self.COLUMNS_TO_KEEP]
|
||||
|
||||
# Save results to self.
|
||||
self.df = df
|
||||
|
||||
# rename columns to be used in score
|
||||
rename_fields = {
|
||||
"Percent of individuals < 200% Federal Poverty Line": field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
|
||||
}
|
||||
self.df.rename(
|
||||
columns=rename_fields,
|
||||
inplace=True,
|
||||
errors="raise",
|
||||
)
|
||||
|
||||
def load(self) -> None:
|
||||
logger.info("Saving Census ACS Data")
|
||||
|
||||
# mkdir census
|
||||
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.df[self.COLUMNS_TO_KEEP].to_csv(
|
||||
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
|
||||
)
|
||||
self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)
|
||||
|
||||
def validate(self) -> None:
|
||||
logger.info("Validating Census ACS Data")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue