mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
added indoor plumbing to chas
This commit is contained in:
parent
bbb5bbc60a
commit
cac1e0406f
1 changed files with 48 additions and 94 deletions
|
@ -19,6 +19,9 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
self.HOUSING_BURDEN_DENOMINATOR_FIELD_NAME = (
|
||||
"HOUSING_BURDEN_DENOMINATOR"
|
||||
)
|
||||
self.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD_NAME = (
|
||||
"No kitchen or indoor plumbing (percent)"
|
||||
)
|
||||
|
||||
# Note: some variable definitions.
|
||||
# HUD-adjusted median family income (HAMFI).
|
||||
|
@ -27,7 +30,8 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
# - incomplete plumbing facilities,
|
||||
# - more than 1 person per room,
|
||||
# - cost burden greater than 30%.
|
||||
# Table 8 is the desired table.
|
||||
# Table 8 is the desired table for housing burden
|
||||
# Table 3 is the desired table for no kitchen or indoor plumbing
|
||||
|
||||
self.df: pd.DataFrame
|
||||
|
||||
|
@ -38,124 +42,74 @@ class HudHousingETL(ExtractTransformLoad):
|
|||
self.HOUSING_ZIP_FILE_DIR,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
logger.info("Transforming HUD Housing Data")
|
||||
|
||||
def _read_chas_table(self, file_name):
|
||||
# New file name:
|
||||
tmp_csv_file_path = self.HOUSING_ZIP_FILE_DIR / "140" / "Table8.csv"
|
||||
self.df = pd.read_csv(
|
||||
tmp_csv_file_path = self.HOUSING_ZIP_FILE_DIR / "140" / file_name
|
||||
tmp_df = pd.read_csv(
|
||||
filepath_or_buffer=tmp_csv_file_path,
|
||||
encoding="latin-1",
|
||||
)
|
||||
|
||||
# Rename and reformat block group ID
|
||||
self.df.rename(
|
||||
columns={"geoid": self.GEOID_TRACT_FIELD_NAME}, inplace=True
|
||||
)
|
||||
|
||||
# The CHAS data has census tract ids such as `14000US01001020100`
|
||||
# Whereas the rest of our data uses, for the same tract, `01001020100`.
|
||||
# the characters before `US`:
|
||||
self.df[self.GEOID_TRACT_FIELD_NAME] = self.df[
|
||||
self.GEOID_TRACT_FIELD_NAME
|
||||
].str.replace(r"^.*?US", "", regex=True)
|
||||
# This reformats and renames this field.
|
||||
tmp_df[self.GEOID_TRACT_FIELD_NAME] = tmp_df["geoid"].str.replace(
|
||||
r"^.*?US", "", regex=True
|
||||
)
|
||||
|
||||
return tmp_df
|
||||
|
||||
def transform(self) -> None:
|
||||
logger.info("Transforming HUD Housing Data")
|
||||
|
||||
table_8 = self._read_chas_table("Table8.csv")
|
||||
table_3 = self._read_chas_table("Table3.csv")
|
||||
|
||||
self.df = table_8.merge(
|
||||
table_3, how="outer", on=self.GEOID_TRACT_FIELD_NAME
|
||||
)
|
||||
|
||||
# Calculate share that lacks indoor plumbing or kitchen
|
||||
# This is computed as
|
||||
# (
|
||||
# owner occupied without plumbing + renter occupied without plumbing
|
||||
# ) / (
|
||||
# total of owner and renter occupied
|
||||
# )
|
||||
self.df[self.NO_KITCHEN_OR_INDOOR_PLUMBING_FIELD_NAME] = (
|
||||
# T3_est3: owner-occupied lacking complete plumbing or kitchen facilities for all levels of income
|
||||
# T3_est46: subtotal: renter-occupied lacking complete plumbing or kitchen facilities for all levels of income
|
||||
# T3_est2: subtotal: owner-occupied for all levels of income
|
||||
# T3_est45: subtotal: renter-occupied for all levels of income
|
||||
self.df["T3_est3"]
|
||||
+ self.df["T3_est46"]
|
||||
) / (self.df["T3_est2"] + self.df["T3_est45"])
|
||||
|
||||
# Calculate housing burden
|
||||
# This is quite a number of steps. It does not appear to be accessible nationally in a simpler format, though.
|
||||
# See "CHAS data dictionary 12-16.xlsx"
|
||||
|
||||
# Owner occupied numerator fields
|
||||
OWNER_OCCUPIED_NUMERATOR_FIELDS = [
|
||||
# Column Name
|
||||
# Line_Type
|
||||
# Tenure
|
||||
# Household income
|
||||
# Cost burden
|
||||
# Facilities
|
||||
"T8_est7",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# less than or equal to 30% of HAMFI
|
||||
# greater than 30% but less than or equal to 50%
|
||||
# All
|
||||
"T8_est10",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# less than or equal to 30% of HAMFI
|
||||
# greater than 50%
|
||||
# All
|
||||
"T8_est20",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# greater than 30% but less than or equal to 50% of HAMFI
|
||||
# greater than 30% but less than or equal to 50%
|
||||
# All
|
||||
"T8_est23",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# greater than 30% but less than or equal to 50% of HAMFI
|
||||
# greater than 50%
|
||||
# All
|
||||
"T8_est33",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# greater than 50% but less than or equal to 80% of HAMFI
|
||||
# greater than 30% but less than or equal to 50%
|
||||
# All
|
||||
"T8_est36",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# greater than 50% but less than or equal to 80% of HAMFI
|
||||
# greater than 50%
|
||||
# All
|
||||
"T8_est7", # Owner, less than or equal to 30% of HAMFI, greater than 30% but less than or equal to 50%
|
||||
"T8_est10", # Owner, less than or equal to 30% of HAMFI, greater than 50%
|
||||
"T8_est20", # Owner, greater than 30% but less than or equal to 50% of HAMFI, greater than 30% but less than or equal to 50%
|
||||
"T8_est23", # Owner, greater than 30% but less than or equal to 50% of HAMFI, greater than 50%
|
||||
"T8_est33", # Owner, greater than 50% but less than or equal to 80% of HAMFI, greater than 30% but less than or equal to 50%
|
||||
"T8_est36", # Owner, greater than 50% but less than or equal to 80% of HAMFI, greater than 50%
|
||||
]
|
||||
|
||||
# These rows have the values where HAMFI was not computed, b/c of no or negative income.
|
||||
# They are in the same order as the rows above
|
||||
OWNER_OCCUPIED_NOT_COMPUTED_FIELDS = [
|
||||
# Column Name
|
||||
# Line_Type
|
||||
# Tenure
|
||||
# Household income
|
||||
# Cost burden
|
||||
# Facilities
|
||||
"T8_est13",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# less than or equal to 30% of HAMFI
|
||||
# not computed (no/negative income)
|
||||
# All
|
||||
"T8_est26",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# greater than 30% but less than or equal to 50% of HAMFI
|
||||
# not computed (no/negative income)
|
||||
# All
|
||||
"T8_est39",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# greater than 50% but less than or equal to 80% of HAMFI
|
||||
# not computed (no/negative income)
|
||||
# All
|
||||
"T8_est52",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# greater than 80% but less than or equal to 100% of HAMFI
|
||||
# not computed (no/negative income)
|
||||
# All
|
||||
"T8_est65",
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# greater than 100% of HAMFI
|
||||
# not computed (no/negative income)
|
||||
# All
|
||||
]
|
||||
|
||||
# This represents all owner-occupied housing units
|
||||
OWNER_OCCUPIED_POPULATION_FIELD = "T8_est2"
|
||||
# Subtotal
|
||||
# Owner occupied
|
||||
# All
|
||||
# All
|
||||
# All
|
||||
|
||||
# Renter occupied numerator fields
|
||||
RENTER_OCCUPIED_NUMERATOR_FIELDS = [
|
||||
|
|
Loading…
Add table
Reference in a new issue