Initial refactor for Score ETL (#618)

* WIP refactor

* Exract score calculations into their own methods

* do all initial df prep in single method

* Fix error in docs for running etl for single dataset

* WIP understanding HUD and linguistic iso data

* Add comments from initial group review on PR

Co-authored-by: Shelby Switzer <shelby.switzer@cms.hhs.gov>
This commit is contained in:
Shelby Switzer 2021-09-10 10:34:34 -04:00 committed by GitHub
commit ac62933d16
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 200 additions and 141 deletions

View file

@ -272,6 +272,12 @@ class HudHousingETL(ExtractTransformLoad):
- self.df[RENTER_OCCUPIED_NOT_COMPUTED_FIELDS].sum(axis=1)
)
self.df["DENOM INCL NOT COMPUTED"] = (
self.df[OWNER_OCCUPIED_POPULATION_FIELD]
+ self.df[RENTER_OCCUPIED_POPULATION_FIELD]
)
# TODO: add small sample size checks
self.df[self.HOUSING_BURDEN_FIELD_NAME] = self.df[
self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME
@ -293,5 +299,6 @@ class HudHousingETL(ExtractTransformLoad):
self.HOUSING_BURDEN_NUMERATOR_FIELD_NAME,
self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME,
self.HOUSING_BURDEN_FIELD_NAME,
"DENOM INCL NOT COMPUTED",
]
].to_csv(path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False)