mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 05:31:17 -07:00
Add median house value to Definition L (#882)
* Added house value to ETL * Adding house value to score formula and comp tool
This commit is contained in:
parent
54bdda0f02
commit
05ebf9b48c
7 changed files with 90 additions and 48 deletions
|
@ -50,6 +50,11 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
"Percent of individuals < 200% Federal Poverty Line"
|
||||
)
|
||||
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD = "B25077_001E"
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME = (
|
||||
"Median value ($) of owner-occupied housing units"
|
||||
)
|
||||
|
||||
self.STATE_GEOID_FIELD_NAME = "GEOID2"
|
||||
self.df: pd.DataFrame
|
||||
|
||||
|
@ -78,7 +83,10 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
# Emploment fields
|
||||
"B23025_005E",
|
||||
"B23025_003E",
|
||||
# Income field
|
||||
self.MEDIAN_INCOME_FIELD,
|
||||
# House value
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD,
|
||||
]
|
||||
+ self.LINGUISTIC_ISOLATION_FIELDS
|
||||
+ self.POVERTY_FIELDS,
|
||||
|
@ -94,22 +102,27 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
def transform(self) -> None:
|
||||
logger.info("Starting Census ACS Transform")
|
||||
|
||||
# Rename median income
|
||||
self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[
|
||||
self.MEDIAN_INCOME_FIELD
|
||||
]
|
||||
# Rename two fields.
|
||||
self.df = self.df.rename(
|
||||
columns={
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD: self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||
self.MEDIAN_INCOME_FIELD: self.MEDIAN_INCOME_FIELD_NAME,
|
||||
}
|
||||
)
|
||||
|
||||
# Handle null values for CBG median income, which are `-666666666`.
|
||||
missing_value_count = sum(
|
||||
self.df[self.MEDIAN_INCOME_FIELD_NAME] == -666666666
|
||||
)
|
||||
logger.info(
|
||||
f"There are {missing_value_count} ({int(100*missing_value_count/self.df[self.MEDIAN_INCOME_FIELD_NAME].count())}%) values of "
|
||||
+ f"`{self.MEDIAN_INCOME_FIELD_NAME}` being marked as null values."
|
||||
)
|
||||
self.df[self.MEDIAN_INCOME_FIELD_NAME] = self.df[
|
||||
self.MEDIAN_INCOME_FIELD_NAME
|
||||
].replace(to_replace=-666666666, value=None)
|
||||
# Handle null values for various fields, which are `-666666666`.
|
||||
for field in [
|
||||
self.MEDIAN_INCOME_FIELD_NAME,
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||
]:
|
||||
missing_value_count = sum(self.df[field] == -666666666)
|
||||
logger.info(
|
||||
f"There are {missing_value_count} ({int(100*missing_value_count/self.df[field].count())}%) values of "
|
||||
+ f"`{field}` being marked as null values."
|
||||
)
|
||||
self.df[field] = self.df[field].replace(
|
||||
to_replace=-666666666, value=None
|
||||
)
|
||||
|
||||
# Calculate percent unemployment.
|
||||
# TODO: remove small-sample data that should be `None` instead of a high-variance fraction.
|
||||
|
@ -133,8 +146,6 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
/ self.df["C16002_001E"]
|
||||
)
|
||||
|
||||
self.df[self.LINGUISTIC_ISOLATION_FIELD_NAME].describe()
|
||||
|
||||
# Calculate percent at different poverty thresholds
|
||||
self.df[self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME] = (
|
||||
self.df["C17002_002E"] + self.df["C17002_003E"]
|
||||
|
@ -170,6 +181,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME,
|
||||
self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME,
|
||||
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||
]
|
||||
|
||||
self.df[columns_to_include].to_csv(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue