mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 08:21:16 -07:00
Initial refactor for Score ETL (#618)
* WIP refactor * Exract score calculations into their own methods * do all initial df prep in single method * Fix error in docs for running etl for single dataset * WIP understanding HUD and linguistic iso data * Add comments from initial group review on PR Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
This commit is contained in:
parent
470c474367
commit
ac62933d16
4 changed files with 200 additions and 141 deletions
|
@ -21,11 +21,11 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
"Linguistic isolation (total)"
|
||||
)
|
||||
self.LINGUISTIC_ISOLATION_FIELDS = [
|
||||
"C16002_001E",
|
||||
"C16002_004E",
|
||||
"C16002_007E",
|
||||
"C16002_010E",
|
||||
"C16002_013E",
|
||||
"C16002_001E", # Estimate!!Total
|
||||
"C16002_004E", # Estimate!!Total!!Spanish!!Limited English speaking household
|
||||
"C16002_007E", # Estimate!!Total!!Other Indo-European languages!!Limited English speaking household
|
||||
"C16002_010E", # Estimate!!Total!!Asian and Pacific Island languages!!Limited English speaking household
|
||||
"C16002_013E", # Estimate!!Total!!Other languages!!Limited English speaking household
|
||||
]
|
||||
self.MEDIAN_INCOME_FIELD = "B19013_001E"
|
||||
self.MEDIAN_INCOME_FIELD_NAME = (
|
||||
|
|
|
@ -272,6 +272,12 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
- self.df[RENTER_OCCUPIED_NOT_COMPUTED_FIELDS].sum(axis=1)
|
||||
)
|
||||
|
||||
|
||||
self.df["DENOM INCL NOT COMPUTED"] = (
|
||||
self.df[OWNER_OCCUPIED_POPULATION_FIELD]
|
||||
+ self.df[RENTER_OCCUPIED_POPULATION_FIELD]
|
||||
)
|
||||
|
||||
# TODO: add small sample size checks
|
||||
self.df[self.HOUSING_BURDEN_FIELD_NAME] = self.df[
|
||||
self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME
|
||||
|
@ -293,5 +299,6 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME,
|
||||
self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME,
|
||||
self.HOUSING_BURDEN_FIELD_NAME,
|
||||
"DENOM INCL NOT COMPUTED",
|
||||
]
|
||||
].to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue