Download column order completed (#1077)

* Download column order completed

* Kameron changes

* Lucas and Beth column order changes

* cdc_places update

* passing score

* pandas error

* checkpoint

* score passing

* rounding complete - percentages still showing one decimal

* fixing tests

* fixing percentages

* updating comment

* int percentages! 🎉🎉

* forgot to pass back to df

* passing tests

Co-authored-by: lucasmbrown-usds <lucas.m.brown@omb.eop.gov>
This commit is contained in:
Jorge Escobar 2022-01-13 15:04:16 -05:00 committed by GitHub
commit d686bb856e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 232 additions and 133 deletions

View file

@ -5,6 +5,7 @@ from data_pipeline.etl.sources.census_acs.etl_utils import (
retrieve_census_acs_data,
)
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
logger = get_module_logger(__name__)
@ -22,7 +23,7 @@ class CensusACSETL(ExtractTransformLoad):
self.TOTAL_UNEMPLOYED_FIELD,
self.TOTAL_IN_LABOR_FORCE,
]
self.UNEMPLOYED_FIELD_NAME = "Unemployed civilians (percent)"
self.UNEMPLOYED_FIELD_NAME = "Unemployment (percent)"
self.LINGUISTIC_ISOLATION_FIELD_NAME = "Linguistic isolation (percent)"
self.LINGUISTIC_ISOLATION_TOTAL_FIELD_NAME = (
@ -353,18 +354,29 @@ class CensusACSETL(ExtractTransformLoad):
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
) / df[self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED]
# strip columns
df = df[self.COLUMNS_TO_KEEP]
# Save results to self.
self.df = df
# rename columns to be used in score
rename_fields = {
"Percent of individuals < 200% Federal Poverty Line": field_names.POVERTY_LESS_THAN_200_FPL_FIELD,
}
self.df.rename(
columns=rename_fields,
inplace=True,
errors="raise",
)
def load(self) -> None:
logger.info("Saving Census ACS Data")
# mkdir census
self.OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
self.df[self.COLUMNS_TO_KEEP].to_csv(
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False
)
self.df.to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)
def validate(self) -> None:
logger.info("Validating Census ACS Data")