mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-22 01:31:25 -08:00
Low-Income Poverty Calculation
This commit is contained in:
parent
ba2e5eca45
commit
44f5aae0ca
8 changed files with 5858 additions and 73 deletions
|
@ -56,14 +56,33 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.MEDIAN_INCOME_FIELD_NAME = (
|
||||
"Median household income in the past 12 months"
|
||||
)
|
||||
|
||||
self.POVERTY_DATASET_TOTAL = "C17002_001E" # Estimate!!Total,
|
||||
self.POVERTY_UNDER_50PCT = "C17002_002E" # Estimate!!Total!!Under .50
|
||||
self.POVERTY_50PCT_TO_99PCT = (
|
||||
"C17002_003E" # Estimate!!Total!!.50 to .99
|
||||
)
|
||||
self.POVERTY_100PCT_TO_124PCT = (
|
||||
"C17002_004E" # Estimate!!Total!!1.00 to 1.24
|
||||
)
|
||||
self.POVERTY_125PCT_TO_149PCT = (
|
||||
"C17002_005E" # Estimate!!Total!!1.25 to 1.49
|
||||
)
|
||||
self.POVERTY_150PCT_TO_184PCT = (
|
||||
"C17002_006E" # Estimate!!Total!!1.50 to 1.84
|
||||
)
|
||||
self.POVERTY_185PCT_TO_199PCT = (
|
||||
"C17002_007E" # Estimate!!Total!!1.85 to 1.99
|
||||
)
|
||||
|
||||
self.POVERTY_FIELDS = [
|
||||
"C17002_001E", # Estimate!!Total,
|
||||
"C17002_002E", # Estimate!!Total!!Under .50
|
||||
"C17002_003E", # Estimate!!Total!!.50 to .99
|
||||
"C17002_004E", # Estimate!!Total!!1.00 to 1.24
|
||||
"C17002_005E", # Estimate!!Total!!1.25 to 1.49
|
||||
"C17002_006E", # Estimate!!Total!!1.50 to 1.84
|
||||
"C17002_007E", # Estimate!!Total!!1.85 to 1.99
|
||||
self.POVERTY_DATASET_TOTAL,
|
||||
self.POVERTY_UNDER_50PCT,
|
||||
self.POVERTY_50PCT_TO_99PCT,
|
||||
self.POVERTY_100PCT_TO_124PCT,
|
||||
self.POVERTY_125PCT_TO_149PCT,
|
||||
self.POVERTY_150PCT_TO_184PCT,
|
||||
self.POVERTY_185PCT_TO_199PCT,
|
||||
]
|
||||
|
||||
self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME = (
|
||||
|
@ -75,19 +94,30 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Percent of individuals < 200% Federal Poverty Line"
|
||||
)
|
||||
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Percent of individuals < 200% Federal Poverty Line, imputed"
|
||||
self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME = (
|
||||
"Total population of individuals < 200% Federal Poverty Line"
|
||||
)
|
||||
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Percent of individuals < 200% Federal Poverty Line," + " imputed"
|
||||
)
|
||||
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME = (
|
||||
"Total population of individuals < 200% Federal Poverty Line,"
|
||||
+ " imputed"
|
||||
)
|
||||
self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME = (
|
||||
"Total population of individuals < 100% Federal Poverty Line"
|
||||
)
|
||||
self.IMPUTED_POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME = (
|
||||
"Total population of individuals < 100% Federal Poverty Line,"
|
||||
+ " imputed"
|
||||
)
|
||||
|
||||
self.ADJUSTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Adjusted percent of individuals < 200% Federal Poverty Line"
|
||||
)
|
||||
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME_PRELIMINARY = (
|
||||
"Preliminary adjusted percent of individuals < 200% Federal Poverty Line,"
|
||||
+ " imputed"
|
||||
)
|
||||
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
|
||||
"Adjusted percent of individuals < 200% Federal Poverty Line,"
|
||||
+ " imputed"
|
||||
|
@ -148,32 +178,102 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
)
|
||||
self.HIGH_SCHOOL_ED_FIELD = "Percent individuals age 25 or over with less than high school degree"
|
||||
|
||||
# College attendance fields
|
||||
self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED = (
|
||||
"B14004_001E" # Estimate!!Total
|
||||
)
|
||||
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC = "B14004_003E" # Estimate!!Total!!Male!!Enrolled in public college or graduate school
|
||||
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE = "B14004_008E" # Estimate!!Total!!Male!!Enrolled in private college or graduate school
|
||||
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC = "B14004_019E" # Estimate!!Total!!Female!!Enrolled in public college or graduate school
|
||||
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE = "B14004_024E" # Estimate!!Total!!Female!!Enrolled in private college or graduate school
|
||||
## Off-Campus University Student Poverty Fields
|
||||
# Estimate!!Total:!!Income in the past 12 months below the poverty level:!!
|
||||
# Enrolled in school:!!Enrolled in college undergraduate years
|
||||
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE = "B14006_009E"
|
||||
# Estimate!!Total:!!Income in the past 12 months below the poverty level:!!
|
||||
# Enrolled in school:!!Enrolled in graduate or professional school
|
||||
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE = "B14006_010E"
|
||||
# Estimate!!Total:!!Income in the past 12 months at or above the poverty level:!!
|
||||
# Enrolled in school:!!Enrolled in college undergraduate years
|
||||
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE = "B14006_019E"
|
||||
# Estimate!!Total:!!Income in the past 12 months at or above the poverty level:!!
|
||||
# Enrolled in school:!!Enrolled in graduate or professional school
|
||||
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_GRADUATE = "B14006_020E"
|
||||
|
||||
self.COLLEGE_ATTENDANCE_FIELDS = [
|
||||
self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED,
|
||||
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC,
|
||||
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE,
|
||||
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC,
|
||||
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE,
|
||||
self.UNIVERSITY_POVERTY_FIELDS = [
|
||||
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE,
|
||||
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE,
|
||||
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE,
|
||||
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_GRADUATE,
|
||||
]
|
||||
|
||||
self.COLLEGE_ATTENDANCE_FIELD = (
|
||||
self.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD = (
|
||||
"Population below poverty line enrolled in an undergraduate program"
|
||||
+ " (excluding students living in university housing)"
|
||||
)
|
||||
self.IMPUTED_OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD = (
|
||||
"Population below poverty line enrolled in an undergraduate program"
|
||||
+ " (excluding students living in university housing), imputed"
|
||||
)
|
||||
self.OFFCAMPUS_UNDERGRADUATE_FIELD = (
|
||||
"Population enrolled in an undergraduate program"
|
||||
+ " (excluding students living in university housing)"
|
||||
)
|
||||
self.IMPUTED_OFFCAMPUS_UNDERGRADUATE_FIELD = (
|
||||
"Population enrolled in an undergraduate program"
|
||||
+ " (excluding students living in university housing), imputed"
|
||||
)
|
||||
self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD = (
|
||||
"Population below poverty line enrolled in an undergraduate, graduate, or professional program"
|
||||
+ " (excluding students living in university housing)"
|
||||
)
|
||||
self.IMPUTED_OFFCAMPUS_UNIVERSITY_POVERTY_FIELD = (
|
||||
"Population below poverty line enrolled in an undergraduate, graduate, or professional program"
|
||||
+ " (excluding students living in university housing), imputed"
|
||||
)
|
||||
self.OFFCAMPUS_UNIVERSITY_FIELD = (
|
||||
"Population enrolled in an undergraduate, graduate, or professional program"
|
||||
+ " (excluding students living in university housing)"
|
||||
)
|
||||
self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD = (
|
||||
"Population enrolled in an undergraduate, graduate, or professional program"
|
||||
+ " (excluding students living in university housing), imputed"
|
||||
)
|
||||
self.IMPUTED_POVERTY_DATASET_TOTAL = (
|
||||
"Total population in poverty dataset (all income levels)"
|
||||
+ ", imputed"
|
||||
)
|
||||
self.OVERALL_RATIO_200FPL_TO_100FPL = (
|
||||
"Ratio <200% FPL to <100% FPL, overall"
|
||||
)
|
||||
self.OFFCAMPUS_UNIVERSITY_POPULATION_COUNT_UNDER_200PCT_FPL = "Estimated population count of off-campus university students <200% FPL"
|
||||
self.POPULATION_COUNT_UNDER_200PCT_FPL_MINUS_OFFCAMPUS_UNIVERSITY_ESTIMATE = (
|
||||
"Estimated population count of people in a househould with income <200% FPL"
|
||||
+ ", excluding all university students"
|
||||
)
|
||||
self.POPULATION_TOTAL_IN_POVERTY_DATASET_MINUS_OFFCAMPUS_UNVERSITY = (
|
||||
"Everyone in poverty dataset"
|
||||
+ ", minus all off-campus university students"
|
||||
)
|
||||
|
||||
# University Enrollment Rates (15+ population, includes students in dorms)
|
||||
self.UNIVERSITY_ATTENDANCE_TOTAL_POPULATION_ASKED = (
|
||||
"B14004_001E" # Estimate!!Total
|
||||
)
|
||||
self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PUBLIC = "B14004_003E" # Estimate!!Total!!Male!!Enrolled in public college or graduate school
|
||||
self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PRIVATE = "B14004_008E" # Estimate!!Total!!Male!!Enrolled in private college or graduate school
|
||||
self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PUBLIC = "B14004_019E" # Estimate!!Total!!Female!!Enrolled in public college or graduate school
|
||||
self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PRIVATE = "B14004_024E" # Estimate!!Total!!Female!!Enrolled in private college or graduate school
|
||||
|
||||
self.UNIVERSITY_ATTENDANCE_FIELDS = [
|
||||
self.UNIVERSITY_ATTENDANCE_TOTAL_POPULATION_ASKED,
|
||||
self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PUBLIC,
|
||||
self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PRIVATE,
|
||||
self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PUBLIC,
|
||||
self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PRIVATE,
|
||||
]
|
||||
|
||||
self.UNIVERSITY_ATTENDANCE_FIELD = (
|
||||
"Percent enrollment in college or graduate school"
|
||||
)
|
||||
|
||||
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD = (
|
||||
self.IMPUTED_UNIVERSITY_ATTENDANCE_FIELD = (
|
||||
"Percent enrollment in college or graduate school, imputed"
|
||||
)
|
||||
|
||||
self.COLLEGE_NON_ATTENDANCE_FIELD = "Percent of population not currently enrolled in college or graduate school"
|
||||
self.UNIVERSITY_NON_ATTENDANCE_FIELD = "Percent of population not currently enrolled in college or graduate school"
|
||||
|
||||
self.RE_FIELDS = [
|
||||
"B02001_001E",
|
||||
|
@ -295,11 +395,29 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME,
|
||||
self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME,
|
||||
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
|
||||
self.HIGH_SCHOOL_ED_FIELD,
|
||||
self.COLLEGE_ATTENDANCE_FIELD,
|
||||
self.COLLEGE_NON_ATTENDANCE_FIELD,
|
||||
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
|
||||
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE,
|
||||
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE,
|
||||
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE,
|
||||
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_GRADUATE,
|
||||
self.OVERALL_RATIO_200FPL_TO_100FPL,
|
||||
self.OFFCAMPUS_UNIVERSITY_POPULATION_COUNT_UNDER_200PCT_FPL,
|
||||
self.POPULATION_COUNT_UNDER_200PCT_FPL_MINUS_OFFCAMPUS_UNIVERSITY_ESTIMATE,
|
||||
self.POPULATION_TOTAL_IN_POVERTY_DATASET_MINUS_OFFCAMPUS_UNVERSITY,
|
||||
self.UNIVERSITY_ATTENDANCE_FIELD,
|
||||
self.UNIVERSITY_NON_ATTENDANCE_FIELD,
|
||||
self.IMPUTED_UNIVERSITY_ATTENDANCE_FIELD,
|
||||
self.OFFCAMPUS_UNIVERSITY_FIELD,
|
||||
self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD,
|
||||
self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
self.IMPUTED_POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
|
||||
self.IMPUTED_OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
|
||||
self.POVERTY_DATASET_TOTAL,
|
||||
self.IMPUTED_POVERTY_DATASET_TOTAL,
|
||||
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
|
||||
]
|
||||
+ self.RE_OUTPUT_FIELDS
|
||||
|
@ -315,6 +433,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
)
|
||||
|
||||
self.df: pd.DataFrame
|
||||
self.geo_df: gpd.GeoDataFrame
|
||||
|
||||
def get_data_sources(self) -> [DataSource]:
|
||||
# Define the variables to retrieve
|
||||
|
@ -328,7 +447,8 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
+ self.POVERTY_FIELDS
|
||||
+ self.EDUCATIONAL_FIELDS
|
||||
+ self.RE_FIELDS
|
||||
+ self.COLLEGE_ATTENDANCE_FIELDS
|
||||
+ self.UNIVERSITY_POVERTY_FIELDS
|
||||
+ self.UNIVERSITY_ATTENDANCE_FIELDS
|
||||
+ self.AGE_INPUT_FIELDS
|
||||
)
|
||||
|
||||
|
@ -383,11 +503,7 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
dtype={field_names.GEOID_TRACT_FIELD: "string"},
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
df = self.df
|
||||
|
||||
# Here we join the geometry of the US to the dataframe so that we can impute
|
||||
# The income of neighbors. first this looks locally; if there's no local
|
||||
# Load the census GeoJSON. irst this looks locally; if there's no local
|
||||
# geojson file for all of the US, this will read it off of S3
|
||||
logger.debug("Reading in geojson for the country")
|
||||
if not os.path.exists(
|
||||
|
@ -400,13 +516,18 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
self.DATA_PATH,
|
||||
)
|
||||
|
||||
geo_df = gpd.read_file(
|
||||
self.geo_df = gpd.read_file(
|
||||
self.DATA_PATH / "census" / "geojson" / "us.json",
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
df = self.df
|
||||
|
||||
# Here we join the geometry of the US to the dataframe so that we can impute
|
||||
# The income of neighbors.
|
||||
df = CensusACSETL.merge_geojson(
|
||||
df=df,
|
||||
usa_geo_df=geo_df,
|
||||
usa_geo_df=self.geo_df,
|
||||
)
|
||||
|
||||
# Rename some fields.
|
||||
|
@ -455,24 +576,57 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
|
||||
# Calculate percent at different poverty thresholds
|
||||
df[self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME] = (
|
||||
df["C17002_002E"] + df["C17002_003E"]
|
||||
) / df["C17002_001E"]
|
||||
df[self.POVERTY_UNDER_50PCT] + df[self.POVERTY_50PCT_TO_99PCT]
|
||||
) / df[self.POVERTY_DATASET_TOTAL]
|
||||
|
||||
df[self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME] = (
|
||||
df["C17002_002E"]
|
||||
+ df["C17002_003E"]
|
||||
+ df["C17002_004E"]
|
||||
+ df["C17002_005E"]
|
||||
) / df["C17002_001E"]
|
||||
df[self.POVERTY_UNDER_50PCT]
|
||||
+ df[self.POVERTY_50PCT_TO_99PCT]
|
||||
+ df[self.POVERTY_100PCT_TO_124PCT]
|
||||
+ df[self.POVERTY_125PCT_TO_149PCT]
|
||||
) / df[self.POVERTY_DATASET_TOTAL]
|
||||
|
||||
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME] = (
|
||||
df["C17002_002E"]
|
||||
+ df["C17002_003E"]
|
||||
+ df["C17002_004E"]
|
||||
+ df["C17002_005E"]
|
||||
+ df["C17002_006E"]
|
||||
+ df["C17002_007E"]
|
||||
) / df["C17002_001E"]
|
||||
df[self.POVERTY_UNDER_50PCT]
|
||||
+ df[self.POVERTY_50PCT_TO_99PCT]
|
||||
+ df[self.POVERTY_100PCT_TO_124PCT]
|
||||
+ df[self.POVERTY_125PCT_TO_149PCT]
|
||||
+ df[self.POVERTY_150PCT_TO_184PCT]
|
||||
+ df[self.POVERTY_185PCT_TO_199PCT]
|
||||
) / df[self.POVERTY_DATASET_TOTAL]
|
||||
|
||||
# COUNT of Povery less than 200%
|
||||
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME] = (
|
||||
df[self.POVERTY_UNDER_50PCT]
|
||||
+ df[self.POVERTY_50PCT_TO_99PCT]
|
||||
+ df[self.POVERTY_100PCT_TO_124PCT]
|
||||
+ df[self.POVERTY_125PCT_TO_149PCT]
|
||||
+ df[self.POVERTY_150PCT_TO_184PCT]
|
||||
+ df[self.POVERTY_185PCT_TO_199PCT]
|
||||
)
|
||||
|
||||
df[self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME] = (
|
||||
df[self.POVERTY_UNDER_50PCT] + df[self.POVERTY_50PCT_TO_99PCT]
|
||||
)
|
||||
|
||||
# Off-Campus University Fields:
|
||||
df[self.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD] = df[
|
||||
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE
|
||||
]
|
||||
df[self.OFFCAMPUS_UNDERGRADUATE_FIELD] = (
|
||||
df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE]
|
||||
+ df[self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE]
|
||||
)
|
||||
df[self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD] = (
|
||||
df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE]
|
||||
+ df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE]
|
||||
)
|
||||
df[self.OFFCAMPUS_UNIVERSITY_FIELD] = (
|
||||
df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE]
|
||||
+ df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE]
|
||||
+ df[self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE]
|
||||
+ df[self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_GRADUATE]
|
||||
)
|
||||
|
||||
# Calculate educational attainment
|
||||
educational_numerator_fields = [
|
||||
|
@ -596,16 +750,16 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
df[sum_columns].sum(axis=1) / df[field_names.TOTAL_POP_FIELD]
|
||||
)
|
||||
|
||||
# Calculate college attendance and adjust low income
|
||||
df[self.COLLEGE_ATTENDANCE_FIELD] = (
|
||||
df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC]
|
||||
+ df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE]
|
||||
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC]
|
||||
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
|
||||
) / df[self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED]
|
||||
# Calculate university attendance and adjust low income
|
||||
df[self.UNIVERSITY_ATTENDANCE_FIELD] = (
|
||||
df[self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PUBLIC]
|
||||
+ df[self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PRIVATE]
|
||||
+ df[self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PUBLIC]
|
||||
+ df[self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
|
||||
) / df[self.UNIVERSITY_ATTENDANCE_TOTAL_POPULATION_ASKED]
|
||||
|
||||
df[self.COLLEGE_NON_ATTENDANCE_FIELD] = (
|
||||
1 - df[self.COLLEGE_ATTENDANCE_FIELD]
|
||||
df[self.UNIVERSITY_NON_ATTENDANCE_FIELD] = (
|
||||
1 - df[self.UNIVERSITY_ATTENDANCE_FIELD]
|
||||
)
|
||||
|
||||
# we impute income for both income measures
|
||||
|
@ -618,8 +772,36 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
imputed_field_name=self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
|
||||
),
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.COLLEGE_ATTENDANCE_FIELD,
|
||||
imputed_field_name=self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
|
||||
raw_field_name=self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
imputed_field_name=self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
),
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD,
|
||||
imputed_field_name=self.IMPUTED_OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD,
|
||||
),
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.OFFCAMPUS_UNDERGRADUATE_FIELD,
|
||||
imputed_field_name=self.IMPUTED_OFFCAMPUS_UNDERGRADUATE_FIELD,
|
||||
),
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
|
||||
imputed_field_name=self.IMPUTED_OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
|
||||
),
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.OFFCAMPUS_UNIVERSITY_FIELD,
|
||||
imputed_field_name=self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD,
|
||||
),
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.UNIVERSITY_ATTENDANCE_FIELD,
|
||||
imputed_field_name=self.IMPUTED_UNIVERSITY_ATTENDANCE_FIELD,
|
||||
),
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.POVERTY_DATASET_TOTAL,
|
||||
imputed_field_name=self.IMPUTED_POVERTY_DATASET_TOTAL,
|
||||
),
|
||||
CensusACSETL.ImputeVariables(
|
||||
raw_field_name=self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
imputed_field_name=self.IMPUTED_POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
),
|
||||
],
|
||||
geo_df=df,
|
||||
|
@ -629,21 +811,156 @@ class CensusACSETL(ExtractTransformLoad):
|
|||
|
||||
logger.debug("Calculating with imputed values")
|
||||
|
||||
# pylint: disable=pointless-string-statement
|
||||
"""
|
||||
POVERTY CALCULATION
|
||||
|
||||
Goal: Calculate the portion of people in in households where income
|
||||
is less than or equal to twice the federal poverty level,
|
||||
not including students enrolled in higher ed.
|
||||
|
||||
Approach: To do this, we must make an adjustment to remove off-campus university students
|
||||
from numbers reported by the ACS. We use the "interpolated" method to estimate
|
||||
the number of off-campus university students actually included in the unadjusted numerator.
|
||||
|
||||
Interpolated Poverty Calculation, Step-by-Step Methodology
|
||||
|
||||
Step 1: Estimate ratio of overall population <200% FPL : overall population <100% FPL
|
||||
Overall ratio 200:100 FPL =
|
||||
max(
|
||||
max[
|
||||
Total population <200% FPL,
|
||||
1
|
||||
]
|
||||
/
|
||||
max[
|
||||
Total population <100% FPL,
|
||||
1
|
||||
],
|
||||
1)
|
||||
|
||||
Step 2: Interpolate the number of off-campus university students <200% FPL
|
||||
Estimated university population <200% FPL =
|
||||
min(
|
||||
max[
|
||||
University population <100% FPL x Overall ratio 200:100 FPL,
|
||||
0 # nb: actual lower bound is the population university <100%, because ratio is clipped at 1
|
||||
],
|
||||
Total number of off-campus university students
|
||||
)
|
||||
|
||||
Step 3: Subtract off-campus university students from both numerator and denominator of the unadjusted poverty rate
|
||||
Adjusted poverty rate =
|
||||
min(
|
||||
max [
|
||||
(
|
||||
max[
|
||||
Overall <200% FPL population - Estimated university population <200% FPL,
|
||||
0
|
||||
]
|
||||
/
|
||||
max[
|
||||
Everyone in poverty dataset - University total population,
|
||||
1
|
||||
],
|
||||
),
|
||||
0
|
||||
],
|
||||
1
|
||||
)
|
||||
"""
|
||||
# pylint: enable=pointless-string-statement
|
||||
|
||||
### Add fields for poverty calculation numerator
|
||||
|
||||
# Step 1: Estimate ratio of overall population <200% FPL : overall population <100% FPL
|
||||
df[self.OVERALL_RATIO_200FPL_TO_100FPL] = (
|
||||
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME]
|
||||
.fillna(
|
||||
df[
|
||||
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME
|
||||
]
|
||||
# Use clip to for consistency with denominator
|
||||
)
|
||||
.clip(lower=1)
|
||||
/ df[self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME]
|
||||
.fillna(
|
||||
df[
|
||||
self.IMPUTED_POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME
|
||||
]
|
||||
# Use clip to ensure we never divide by 0
|
||||
)
|
||||
.clip(lower=1)
|
||||
# Use clip to ensure that the ratio of poverty <200%:<100% is not lower than 1
|
||||
).clip(lower=1)
|
||||
|
||||
# Step 2: Interpolate the number of off-campus university students <200% FPL
|
||||
df[self.OFFCAMPUS_UNIVERSITY_POPULATION_COUNT_UNDER_200PCT_FPL] = (
|
||||
df[self.OVERALL_RATIO_200FPL_TO_100FPL]
|
||||
* (
|
||||
df[
|
||||
self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD
|
||||
].fillna( # corresponds to <100% FPL
|
||||
df[self.IMPUTED_OFFCAMPUS_UNIVERSITY_POVERTY_FIELD]
|
||||
)
|
||||
)
|
||||
# ensure that estimated count of university <200% is between 0 and the total number of university students
|
||||
# nb: actual lower bound is university <100%, because ratio is clipped at 1
|
||||
).clip(
|
||||
lower=0,
|
||||
upper=df[self.OFFCAMPUS_UNIVERSITY_FIELD].fillna(
|
||||
df[self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD]
|
||||
),
|
||||
)
|
||||
|
||||
# Step 3a: Subtract off-campus university students from numerator of the unadjusted poverty rate
|
||||
df[
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
|
||||
self.POPULATION_COUNT_UNDER_200PCT_FPL_MINUS_OFFCAMPUS_UNIVERSITY_ESTIMATE
|
||||
] = (
|
||||
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME].fillna(
|
||||
df[self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME]
|
||||
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME].fillna(
|
||||
df[
|
||||
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME
|
||||
]
|
||||
)
|
||||
- df[self.COLLEGE_ATTENDANCE_FIELD].fillna(
|
||||
df[self.IMPUTED_COLLEGE_ATTENDANCE_FIELD]
|
||||
)
|
||||
# Use clip to ensure that the values are not negative if college attendance
|
||||
# is very high
|
||||
- df[self.OFFCAMPUS_UNIVERSITY_POPULATION_COUNT_UNDER_200PCT_FPL]
|
||||
# Use clip as extra precaution against values <=0
|
||||
).clip(
|
||||
lower=0
|
||||
)
|
||||
|
||||
### Add denominator field for poverty calculation
|
||||
# Step 3b: Subtract off-campus university students from denominator of the unadjusted poverty rate
|
||||
df[
|
||||
self.POPULATION_TOTAL_IN_POVERTY_DATASET_MINUS_OFFCAMPUS_UNVERSITY
|
||||
] = (
|
||||
df[self.POVERTY_DATASET_TOTAL].fillna(
|
||||
df[self.IMPUTED_POVERTY_DATASET_TOTAL]
|
||||
)
|
||||
- df[self.OFFCAMPUS_UNIVERSITY_FIELD].fillna(
|
||||
df[self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD]
|
||||
)
|
||||
# Use clip as extra precaution against values <=0
|
||||
).clip(
|
||||
lower=1
|
||||
)
|
||||
|
||||
# Step 3c: Put the numerator and denominator together to calculate the final adjusted poverty rate
|
||||
# NB: numerator and denominator are both already imputed and clipped
|
||||
df[
|
||||
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
|
||||
] = (
|
||||
df[
|
||||
self.POPULATION_COUNT_UNDER_200PCT_FPL_MINUS_OFFCAMPUS_UNIVERSITY_ESTIMATE
|
||||
]
|
||||
/ df[
|
||||
self.POPULATION_TOTAL_IN_POVERTY_DATASET_MINUS_OFFCAMPUS_UNVERSITY
|
||||
]
|
||||
# Clip to ensure percentage is between 0 and
|
||||
).clip(
|
||||
lower=0, upper=1
|
||||
)
|
||||
|
||||
## CHECK OUTPUT AND SAVE RESULTS
|
||||
# All values should have a value at this point
|
||||
assert (
|
||||
# For tracts with >0 population
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,25 @@
|
|||
# How to generate the sample data in this folder
|
||||
|
||||
The sample data in this folder can be easily generated by debugging the `data_pipeline/etl/sources/census_acs/etl.py` file
|
||||
and exporting data using the debugger console. Examples of this exporting are below.
|
||||
|
||||
## Why in pickle format?
|
||||
|
||||
Exporting as a Pickle file keeps all the metadata about the columns including the data types. If we were to export as CSV then we will need
|
||||
to code the data types in the test fixtures for all the columns for the comparison to be correct.
|
||||
|
||||
### Transform tests input files
|
||||
- `acs_transform_input.pkl` - this file contains downloaded Census data that is used as input.
|
||||
- `acs_transform_geojson.geojson` - this file contains the Census GeoJSON data that is used as input.
|
||||
1. Place a breakpoint in `data_pipeline/etl/sources/census_acs/etl.py` in the `transform` method right at the beginning
|
||||
and start the debugger running the ETL run command for Census ACS (`etl-run -d census_acs`).
|
||||
1. Partially export the `self.df` and `self.geo_df` data to files once the debugger pauses at the breakpoint. Use these
|
||||
sample commands in the debugger console.
|
||||
```python
|
||||
t_list = ['01073001100', '01073001400', '01073002000', '01073003802', '01073004000']
|
||||
self.geo_df[self.geo_df['GEOID10'].isin(t_list)].to_file('data_pipeline/tests/sources/census_acs/data/transform/acs_transform_geojson.geojson')
|
||||
test_df = self.df[self.df['GEOID10_TRACT'].isin(t_list)].copy()
|
||||
# Setting this one row to N/A allows the imputations code to succeed
|
||||
test_df.at[4, self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE] = pd.NA
|
||||
test_df.to_pickle('data_pipeline/tests/sources/census_acs/data/transform/acs_transform_input.pkl')
|
||||
```
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"type": "FeatureCollection",
|
||||
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
|
||||
"features": [
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "001400", "GEOID10": "01073001400", "NAME10": "14", "NAMELSAD10": "Census Tract 14", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 2241287, "AWATER10": 0, "INTPTLAT10": "+33.5261497", "INTPTLON10": "-086.8351469" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.840884, 33.527586 ], [ -86.837824, 33.528871 ], [ -86.83693, 33.530023 ], [ -86.83679, 33.530205 ], [ -86.83639, 33.530805 ], [ -86.835352, 33.531179 ], [ -86.833843, 33.532003 ], [ -86.832035, 33.532595 ], [ -86.831245, 33.532854 ], [ -86.829135, 33.533924 ], [ -86.827029, 33.534708 ], [ -86.826575, 33.534968 ], [ -86.825634, 33.535508 ], [ -86.824369, 33.536271 ], [ -86.823926, 33.536515 ], [ -86.823758, 33.536606 ], [ -86.823683, 33.536184 ], [ -86.823591, 33.535542 ], [ -86.823596, 33.53486 ], [ -86.823637, 33.534404 ], [ -86.823739, 33.533793 ], [ -86.82384, 33.533389 ], [ -86.824083, 33.532657 ], [ -86.824469, 33.531855 ], [ -86.825589, 33.530001 ], [ -86.825945, 33.529412 ], [ -86.826278, 33.528861 ], [ -86.82662, 33.528272 ], [ -86.826703, 33.528081 ], [ -86.826785, 33.5278 ], [ -86.826911, 33.527375 ], [ -86.827001, 33.526685 ], [ -86.827025, 33.525543 ], [ -86.827091, 33.524614 ], [ -86.827086, 33.523811 ], [ -86.827086, 33.523767 ], [ -86.827091, 33.523677 ], [ -86.827125, 33.523051 ], [ -86.827117, 33.522607 ], [ -86.827088, 33.522283 ], [ -86.827006, 33.521836 ], [ -86.826987, 33.521701 ], [ -86.826807, 33.521107 ], [ -86.829003, 33.520829 ], [ -86.829532, 33.520498 ], [ -86.830151, 33.52015 ], [ -86.830507, 33.520037 ], [ -86.830666, 33.519986 ], [ -86.831415, 33.519802 ], [ -86.831968, 33.519782 ], [ -86.83261, 33.519835 ], [ -86.834123, 33.520139 ], [ -86.834279, 33.52017 ], [ -86.834582, 33.520223 ], [ -86.836148, 33.520497 ], [ -86.837263, 33.520682 ], [ -86.837708, 33.52073 ], [ -86.837983, 33.520761 ], [ -86.838571, 33.520791 ], [ -86.839295, 33.520792 ], [ -86.83942, 33.520792 ], [ -86.839596, 33.520792 ], [ -86.840901, 33.520808 ], [ -86.842445, 33.520826 ], [ -86.843989, 33.520846 ], [ -86.845617, 33.520865 ], [ -86.846194, 33.520873 ], [ -86.84942, 33.520908 ], [ -86.849421, 33.521133 ], [ -86.84944, 33.521596 ], [ -86.849499, 33.521692 ], [ -86.849433, 33.522393 ], [ -86.849582, 33.523085 ], [ -86.849667, 33.523435 ], [ -86.849748, 33.523804 ], [ -86.849783, 33.523978 ], [ -86.849812, 33.52412 ], [ -86.84985, 33.52431 ], [ -86.849984, 33.524884 ], [ -86.850106, 33.525442 ], [ -86.85023, 33.525973 ], [ -86.850303, 33.526624 ], [ -86.850312, 33.526707 ], [ -86.850132, 33.526701 ], [ -86.84979, 33.526605 ], [ -86.849721, 33.526576 ], [ -86.849042, 33.526293 ], [ -86.848434, 33.526035 ], [ -86.847961, 33.525834 ], [ -86.847891, 33.525805 ], [ -86.847787, 33.525787 ], [ -86.845591, 33.525405 ], [ -86.842991, 33.526505 ], [ -86.840884, 33.527586 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "001100", "GEOID10": "01073001100", "NAME10": "11", "NAMELSAD10": "Census Tract 11", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 11428313, "AWATER10": 3862, "INTPTLAT10": "+33.5423337", "INTPTLON10": "-086.8765161" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.882435, 33.552326 ], [ -86.881871, 33.552684 ], [ -86.881177, 33.552987 ], [ -86.880549, 33.553159 ], [ -86.880024, 33.553247 ], [ -86.879772, 33.553268 ], [ -86.879039, 33.553269 ], [ -86.877437, 33.553193 ], [ -86.873225, 33.553055 ], [ -86.871055, 33.55294 ], [ -86.870488, 33.552953 ], [ -86.870028, 33.552996 ], [ -86.869495, 33.553081 ], [ -86.86902, 33.553198 ], [ -86.868549, 33.553418 ], [ -86.868064, 33.553783 ], [ -86.867614, 33.554299 ], [ -86.867234, 33.554811 ], [ -86.866179, 33.556229 ], [ -86.865886, 33.556531 ], [ -86.865428, 33.556909 ], [ -86.8651, 33.557135 ], [ -86.863343, 33.558141 ], [ -86.861725, 33.55911 ], [ -86.860721, 33.559702 ], [ -86.859731, 33.560316 ], [ -86.858524, 33.561035 ], [ -86.858002, 33.561372 ], [ -86.857324, 33.561719 ], [ -86.856471, 33.562004 ], [ -86.856129, 33.562064 ], [ -86.851595, 33.559404 ], [ -86.848891, 33.559805 ], [ -86.852991, 33.552104 ], [ -86.853223, 33.551239 ], [ -86.855191, 33.543905 ], [ -86.855694, 33.538151 ], [ -86.85606, 33.537889 ], [ -86.856269, 33.537719 ], [ -86.856472, 33.53752 ], [ -86.856779, 33.53719 ], [ -86.857174, 33.536763 ], [ -86.85758, 33.536346 ], [ -86.857795, 33.536127 ], [ -86.858137, 33.535851 ], [ -86.858444, 33.535632 ], [ -86.858851, 33.53539 ], [ -86.85921, 33.535203 ], [ -86.860817, 33.53405 ], [ -86.860936, 33.534028 ], [ -86.864589, 33.533319 ], [ -86.86658, 33.532927 ], [ -86.867357, 33.53278 ], [ -86.867868, 33.532651 ], [ -86.868475, 33.53246 ], [ -86.870603, 33.531691 ], [ -86.870854, 33.531601 ], [ -86.875846, 33.529909 ], [ -86.878638, 33.528917 ], [ -86.878921, 33.528817 ], [ -86.879007, 33.528787 ], [ -86.878999, 33.529822 ], [ -86.8796, 33.529851 ], [ -86.88017, 33.529879 ], [ -86.880793, 33.529909 ], [ -86.880872, 33.529919 ], [ -86.881324, 33.529935 ], [ -86.88209, 33.529947 ], [ -86.882375, 33.529953 ], [ -86.882779, 33.529958 ], [ -86.88349, 33.529994 ], [ -86.884213, 33.530005 ], [ -86.885074, 33.530017 ], [ -86.885649, 33.530026 ], [ -86.885884, 33.530034 ], [ -86.886535, 33.530037 ], [ -86.887886, 33.530039 ], [ -86.887968, 33.530043 ], [ -86.889407, 33.530064 ], [ -86.889415, 33.529614 ], [ -86.889422, 33.529159 ], [ -86.890877, 33.529167 ], [ -86.891569, 33.52917 ], [ -86.891643, 33.52917 ], [ -86.891832, 33.528964 ], [ -86.892208, 33.528556 ], [ -86.893389, 33.527586 ], [ -86.894533, 33.526558 ], [ -86.897192, 33.528505 ], [ -86.896992, 33.530504 ], [ -86.901104, 33.532548 ], [ -86.903492, 33.533105 ], [ -86.908792, 33.535505 ], [ -86.910592, 33.536105 ], [ -86.909292, 33.538305 ], [ -86.909392, 33.539005 ], [ -86.908162, 33.539693 ], [ -86.908095, 33.539893 ], [ -86.907692, 33.540605 ], [ -86.907192, 33.542205 ], [ -86.905392, 33.541305 ], [ -86.903103, 33.542516 ], [ -86.901996, 33.543221 ], [ -86.901608, 33.543456 ], [ -86.901167, 33.54412 ], [ -86.900913, 33.544384 ], [ -86.899614, 33.545658 ], [ -86.899456, 33.545794 ], [ -86.896838, 33.547888 ], [ -86.896206, 33.548394 ], [ -86.895817, 33.548705 ], [ -86.895692, 33.548805 ], [ -86.894818, 33.548368 ], [ -86.894292, 33.548105 ], [ -86.893192, 33.548205 ], [ -86.893149, 33.548341 ], [ -86.892544, 33.550241 ], [ -86.892492, 33.550404 ], [ -86.889392, 33.550505 ], [ -86.888392, 33.549304 ], [ -86.886592, 33.550205 ], [ -86.884997, 33.550205 ], [ -86.8849, 33.551105 ], [ -86.883737, 33.551726 ], [ -86.883615, 33.551775 ], [ -86.883463, 33.551713 ], [ -86.883374, 33.551707 ], [ -86.883263, 33.551733 ], [ -86.883102, 33.551828 ], [ -86.882615, 33.552213 ], [ -86.882435, 33.552326 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "002000", "GEOID10": "01073002000", "NAME10": "20", "NAMELSAD10": "Census Tract 20", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 3605025, "AWATER10": 0, "INTPTLAT10": "+33.5591908", "INTPTLON10": "-086.7233518" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.713902, 33.539302 ], [ -86.714347, 33.539121 ], [ -86.714768, 33.539008 ], [ -86.715254, 33.538933 ], [ -86.715891, 33.538922 ], [ -86.716497, 33.538825 ], [ -86.716288, 33.539004 ], [ -86.719691, 33.540211 ], [ -86.720436, 33.543861 ], [ -86.72065, 33.544904 ], [ -86.72064, 33.545552 ], [ -86.720624, 33.545854 ], [ -86.720641, 33.546602 ], [ -86.720666, 33.547704 ], [ -86.720671, 33.548361 ], [ -86.720685, 33.549495 ], [ -86.720702, 33.550497 ], [ -86.720715, 33.551287 ], [ -86.720184, 33.551274 ], [ -86.720099, 33.551271 ], [ -86.719672, 33.551264 ], [ -86.719527, 33.551266 ], [ -86.718937, 33.55126 ], [ -86.718379, 33.551245 ], [ -86.717792, 33.551257 ], [ -86.717277, 33.551242 ], [ -86.716713, 33.551233 ], [ -86.716142, 33.551223 ], [ -86.715577, 33.551214 ], [ -86.715006, 33.5512 ], [ -86.714442, 33.551191 ], [ -86.713877, 33.551181 ], [ -86.713881, 33.552963 ], [ -86.713323, 33.552953 ], [ -86.712752, 33.552939 ], [ -86.71275, 33.553417 ], [ -86.712753, 33.554307 ], [ -86.713324, 33.554333 ], [ -86.713888, 33.554326 ], [ -86.714462, 33.55435 ], [ -86.71507, 33.554366 ], [ -86.715588, 33.554385 ], [ -86.71612, 33.554384 ], [ -86.716664, 33.554437 ], [ -86.717222, 33.554653 ], [ -86.717741, 33.554882 ], [ -86.71829, 33.55513 ], [ -86.718989, 33.555448 ], [ -86.719231, 33.555565 ], [ -86.719603, 33.555718 ], [ -86.72003, 33.555922 ], [ -86.720245, 33.556011 ], [ -86.720876, 33.556274 ], [ -86.721511, 33.556456 ], [ -86.7218, 33.556658 ], [ -86.722005, 33.556801 ], [ -86.722554, 33.557118 ], [ -86.723614, 33.556025 ], [ -86.723496, 33.555943 ], [ -86.723105, 33.55567 ], [ -86.72257, 33.555315 ], [ -86.723206, 33.554639 ], [ -86.723346, 33.554398 ], [ -86.72355, 33.554328 ], [ -86.723773, 33.55433 ], [ -86.724149, 33.554569 ], [ -86.724693, 33.554905 ], [ -86.725189, 33.555287 ], [ -86.725711, 33.555637 ], [ -86.726233, 33.555992 ], [ -86.726803, 33.556395 ], [ -86.727325, 33.556739 ], [ -86.727847, 33.557094 ], [ -86.728377, 33.557456 ], [ -86.728891, 33.557804 ], [ -86.729406, 33.558156 ], [ -86.729942, 33.55852 ], [ -86.730464, 33.558875 ], [ -86.731032, 33.559263 ], [ -86.729985, 33.560323 ], [ -86.728915, 33.561436 ], [ -86.728813, 33.56155 ], [ -86.727701, 33.563268 ], [ -86.726796, 33.564729 ], [ -86.72602, 33.565979 ], [ -86.725629, 33.56581 ], [ -86.725297, 33.565678 ], [ -86.725149, 33.565606 ], [ -86.724716, 33.565426 ], [ -86.724148, 33.565125 ], [ -86.72354, 33.564846 ], [ -86.722958, 33.56459 ], [ -86.722363, 33.564322 ], [ -86.721769, 33.564055 ], [ -86.721173, 33.563787 ], [ -86.720598, 33.563519 ], [ -86.719989, 33.563246 ], [ -86.719394, 33.562973 ], [ -86.718812, 33.562717 ], [ -86.718355, 33.56251 ], [ -86.718218, 33.562438 ], [ -86.717693, 33.562221 ], [ -86.717621, 33.562149 ], [ -86.717117, 33.56196 ], [ -86.716665, 33.56175 ], [ -86.716194, 33.561544 ], [ -86.715728, 33.561336 ], [ -86.715252, 33.56112 ], [ -86.714781, 33.560924 ], [ -86.714634, 33.561079 ], [ -86.714179, 33.560881 ], [ -86.712726, 33.560236 ], [ -86.712584, 33.560436 ], [ -86.710637, 33.559646 ], [ -86.709203, 33.559076 ], [ -86.708861, 33.558979 ], [ -86.708777, 33.558949 ], [ -86.707958, 33.558616 ], [ -86.707954, 33.558597 ], [ -86.707908, 33.558385 ], [ -86.707745, 33.558329 ], [ -86.70031, 33.555477 ], [ -86.700624, 33.554907 ], [ -86.701888, 33.552604 ], [ -86.702782, 33.55136 ], [ -86.703551, 33.550291 ], [ -86.703873, 33.549843 ], [ -86.704188, 33.549404 ], [ -86.707152, 33.54685 ], [ -86.707466, 33.546616 ], [ -86.707683, 33.546405 ], [ -86.707985, 33.545992 ], [ -86.708486, 33.54538 ], [ -86.709318, 33.544365 ], [ -86.709708, 33.543917 ], [ -86.712316, 33.541302 ], [ -86.712613, 33.541023 ], [ -86.712812, 33.540799 ], [ -86.713745, 33.539422 ], [ -86.713902, 33.539302 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "004000", "GEOID10": "01073004000", "NAME10": "40", "NAMELSAD10": "Census Tract 40", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 2364675, "AWATER10": 0, "INTPTLAT10": "+33.4953245", "INTPTLON10": "-086.8516236" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.854631, 33.487542 ], [ -86.855535, 33.486967 ], [ -86.856538, 33.486322 ], [ -86.857046, 33.485979 ], [ -86.859584, 33.484308 ], [ -86.860059, 33.484021 ], [ -86.86053, 33.483714 ], [ -86.860755, 33.484005 ], [ -86.861035, 33.484352 ], [ -86.861374, 33.484803 ], [ -86.861571, 33.485078 ], [ -86.861742, 33.485286 ], [ -86.862107, 33.48576 ], [ -86.862507, 33.486283 ], [ -86.862427, 33.486486 ], [ -86.861595, 33.487797 ], [ -86.861383, 33.488126 ], [ -86.861291, 33.488236 ], [ -86.861169, 33.488332 ], [ -86.860211, 33.488939 ], [ -86.859816, 33.489189 ], [ -86.859266, 33.489539 ], [ -86.85833, 33.490136 ], [ -86.857567, 33.490628 ], [ -86.856969, 33.491013 ], [ -86.857431, 33.491536 ], [ -86.857855, 33.492015 ], [ -86.858305, 33.492492 ], [ -86.858765, 33.492994 ], [ -86.859221, 33.493495 ], [ -86.859706, 33.494023 ], [ -86.860181, 33.494549 ], [ -86.860593, 33.49501 ], [ -86.860637, 33.495047 ], [ -86.861002, 33.495451 ], [ -86.861354, 33.495838 ], [ -86.861804, 33.495554 ], [ -86.861962, 33.495725 ], [ -86.862138, 33.495924 ], [ -86.862286, 33.496091 ], [ -86.862342, 33.496145 ], [ -86.862401, 33.496183 ], [ -86.86248, 33.496203 ], [ -86.862523, 33.496203 ], [ -86.862657, 33.496178 ], [ -86.862747, 33.496129 ], [ -86.865424, 33.494422 ], [ -86.865495, 33.494501 ], [ -86.865766, 33.494783 ], [ -86.86583, 33.49488 ], [ -86.865848, 33.494895 ], [ -86.865876, 33.494905 ], [ -86.8659, 33.494906 ], [ -86.865968, 33.494895 ], [ -86.866332, 33.494779 ], [ -86.866881, 33.494588 ], [ -86.866885, 33.495165 ], [ -86.866882, 33.495367 ], [ -86.862491, 33.497006 ], [ -86.860903, 33.497855 ], [ -86.856845, 33.500199 ], [ -86.854735, 33.500131 ], [ -86.853969, 33.50011 ], [ -86.852812, 33.500079 ], [ -86.851686, 33.500051 ], [ -86.851194, 33.500034 ], [ -86.850371, 33.500006 ], [ -86.849077, 33.499969 ], [ -86.848377, 33.499947 ], [ -86.848378, 33.500321 ], [ -86.848378, 33.500493 ], [ -86.848378, 33.500678 ], [ -86.84838, 33.50083 ], [ -86.848379, 33.501047 ], [ -86.848371, 33.501387 ], [ -86.848002, 33.501458 ], [ -86.847752, 33.50152 ], [ -86.847574, 33.501633 ], [ -86.847191, 33.501852 ], [ -86.847057, 33.501953 ], [ -86.847392, 33.502364 ], [ -86.847897, 33.502965 ], [ -86.848046, 33.503141 ], [ -86.848257, 33.503394 ], [ -86.848336, 33.503486 ], [ -86.848157, 33.503554 ], [ -86.847219, 33.503858 ], [ -86.846292, 33.504089 ], [ -86.843699, 33.504587 ], [ -86.843203, 33.504334 ], [ -86.843156, 33.504274 ], [ -86.842845, 33.503874 ], [ -86.842556, 33.503511 ], [ -86.842525, 33.503473 ], [ -86.842194, 33.503061 ], [ -86.841884, 33.502679 ], [ -86.841545, 33.50226 ], [ -86.84134, 33.502009 ], [ -86.841222, 33.501863 ], [ -86.840803, 33.50135 ], [ -86.840537, 33.501444 ], [ -86.840278, 33.501514 ], [ -86.840164, 33.501534 ], [ -86.839567, 33.50155 ], [ -86.839587, 33.500984 ], [ -86.839566, 33.500861 ], [ -86.8395, 33.500695 ], [ -86.839428, 33.500655 ], [ -86.839565, 33.500597 ], [ -86.839661, 33.500445 ], [ -86.839589, 33.500419 ], [ -86.839586, 33.499992 ], [ -86.839591, 33.499212 ], [ -86.83959, 33.498823 ], [ -86.839596, 33.498071 ], [ -86.839592, 33.497372 ], [ -86.839593, 33.496633 ], [ -86.840423, 33.496212 ], [ -86.841274, 33.495781 ], [ -86.842156, 33.495302 ], [ -86.843044, 33.494824 ], [ -86.844948, 33.493727 ], [ -86.846142, 33.493001 ], [ -86.84733, 33.492275 ], [ -86.849147, 33.49107 ], [ -86.85242, 33.488964 ], [ -86.85329, 33.488405 ], [ -86.853724, 33.488135 ], [ -86.854431, 33.48767 ], [ -86.854631, 33.487542 ] ] ] } },
|
||||
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "003802", "GEOID10": "01073003802", "NAME10": "38.02", "NAMELSAD10": "Census Tract 38.02", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 3245083, "AWATER10": 0, "INTPTLAT10": "+33.4785702", "INTPTLON10": "-086.8900020" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.903171, 33.471768 ], [ -86.902839, 33.472127 ], [ -86.902225, 33.47278 ], [ -86.901954, 33.473078 ], [ -86.901641, 33.473416 ], [ -86.90141, 33.473658 ], [ -86.900796, 33.474323 ], [ -86.900673, 33.474449 ], [ -86.898775, 33.47648 ], [ -86.898283, 33.477004 ], [ -86.897626, 33.477708 ], [ -86.896539, 33.478865 ], [ -86.896263, 33.479165 ], [ -86.895093, 33.48043 ], [ -86.894581, 33.480994 ], [ -86.894456, 33.481132 ], [ -86.894021, 33.481637 ], [ -86.893961, 33.48169 ], [ -86.893382, 33.482321 ], [ -86.893258, 33.482453 ], [ -86.892746, 33.482886 ], [ -86.892337, 33.483302 ], [ -86.891263, 33.484422 ], [ -86.889966, 33.485827 ], [ -86.888649, 33.487226 ], [ -86.887767, 33.488186 ], [ -86.887057, 33.488954 ], [ -86.886728, 33.489294 ], [ -86.886305, 33.489638 ], [ -86.885219, 33.490482 ], [ -86.884374, 33.491114 ], [ -86.883539, 33.491751 ], [ -86.882698, 33.492396 ], [ -86.881861, 33.493024 ], [ -86.881309, 33.49345 ], [ -86.881062, 33.49364 ], [ -86.880805, 33.493833 ], [ -86.880411, 33.494131 ], [ -86.88002, 33.494431 ], [ -86.879626, 33.494732 ], [ -86.878975, 33.495227 ], [ -86.878498, 33.494588 ], [ -86.878186, 33.494157 ], [ -86.87756, 33.493268 ], [ -86.877271, 33.49286 ], [ -86.876969, 33.492421 ], [ -86.876655, 33.491999 ], [ -86.87634, 33.491571 ], [ -86.876027, 33.491146 ], [ -86.875717, 33.49072 ], [ -86.875757, 33.490693 ], [ -86.876011, 33.49057 ], [ -86.876337, 33.490403 ], [ -86.876675, 33.490237 ], [ -86.877239, 33.489948 ], [ -86.877115, 33.489769 ], [ -86.87693, 33.489514 ], [ -86.876672, 33.48915 ], [ -86.876629, 33.48908 ], [ -86.876559, 33.488988 ], [ -86.87702, 33.488297 ], [ -86.878015, 33.486803 ], [ -86.878888, 33.485874 ], [ -86.879365, 33.485176 ], [ -86.879779, 33.484583 ], [ -86.880179, 33.484014 ], [ -86.880814, 33.483217 ], [ -86.884791, 33.478206 ], [ -86.889113, 33.472352 ], [ -86.889455, 33.47189 ], [ -86.890077, 33.471066 ], [ -86.89052, 33.47046 ], [ -86.890747, 33.470166 ], [ -86.891276, 33.469468 ], [ -86.891699, 33.468898 ], [ -86.892196, 33.4683 ], [ -86.895724, 33.464729 ], [ -86.897917, 33.46251 ], [ -86.899914, 33.460456 ], [ -86.90018, 33.460195 ], [ -86.900652, 33.459741 ], [ -86.900614, 33.460009 ], [ -86.900652, 33.460414 ], [ -86.900791, 33.461207 ], [ -86.900777, 33.463217 ], [ -86.900792, 33.464061 ], [ -86.900791, 33.464861 ], [ -86.900368, 33.466247 ], [ -86.900487, 33.467212 ], [ -86.900642, 33.468474 ], [ -86.900602, 33.469456 ], [ -86.900561, 33.471007 ], [ -86.900554, 33.471353 ], [ -86.900515, 33.471735 ], [ -86.902983, 33.471766 ], [ -86.903171, 33.471768 ] ] ] } }
|
||||
]
|
||||
}
|
Binary file not shown.
|
@ -0,0 +1,119 @@
|
|||
import pytest
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
from pathlib import Path
|
||||
from data_pipeline.etl.sources.census_acs.etl import CensusACSETL
|
||||
|
||||
|
||||
def _check_fields_exist(df: pd.DataFrame, field_names: list):
|
||||
for field in field_names:
|
||||
assert field in df.columns
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transform_census_input_fixture() -> pd.DataFrame:
|
||||
"""
|
||||
Load the Census input data for the transform method tests.
|
||||
|
||||
Returns:
|
||||
Dataframe: the input data
|
||||
"""
|
||||
file = (
|
||||
Path(__file__).parents[0]
|
||||
/ "data"
|
||||
/ "transform"
|
||||
/ "acs_transform_input.pkl"
|
||||
)
|
||||
return pd.read_pickle(file)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transform_census_geojson_fixture() -> gpd.GeoDataFrame:
|
||||
"""
|
||||
Load the Census GeoJSON input data for the transform method.
|
||||
|
||||
Returns:
|
||||
Dataframe: the Census GeoJSON input data
|
||||
"""
|
||||
file = (
|
||||
Path(__file__).parents[0]
|
||||
/ "data"
|
||||
/ "transform"
|
||||
/ "acs_transform_geojson.geojson"
|
||||
)
|
||||
return gpd.read_file(file)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transformed_data_fixture(
|
||||
transform_census_input_fixture: pd.DataFrame,
|
||||
transform_census_geojson_fixture: gpd.GeoDataFrame,
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Tranform the test input data.
|
||||
|
||||
Returns:
|
||||
DataFrame: the transformed data
|
||||
"""
|
||||
acs = CensusACSETL()
|
||||
acs.df = transform_census_input_fixture
|
||||
acs.geo_df = transform_census_geojson_fixture
|
||||
acs.transform()
|
||||
return acs.output_df
|
||||
|
||||
|
||||
#################
|
||||
# Transform tests
|
||||
#################
|
||||
def test_poverty_fields(transformed_data_fixture: pd.DataFrame):
|
||||
result = transformed_data_fixture
|
||||
|
||||
# Test that the poverty and collect fields were added.
|
||||
acs = CensusACSETL()
|
||||
fields_to_test = [
|
||||
acs.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
acs.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
|
||||
]
|
||||
_check_fields_exist(result, fields_to_test)
|
||||
|
||||
assert (
|
||||
result.iloc[0][acs.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME]
|
||||
== 1743
|
||||
)
|
||||
assert (
|
||||
result.iloc[0][acs.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME]
|
||||
== 700
|
||||
)
|
||||
|
||||
assert (
|
||||
result.iloc[1][acs.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME]
|
||||
== 941
|
||||
)
|
||||
assert (
|
||||
result.iloc[1][acs.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME]
|
||||
== 548
|
||||
)
|
||||
|
||||
|
||||
def test_college_undergrad_fields(transformed_data_fixture: pd.DataFrame):
|
||||
result = transformed_data_fixture
|
||||
|
||||
# Test that the poverty and collect fields were added.
|
||||
acs = CensusACSETL()
|
||||
fields_to_test = [
|
||||
acs.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD,
|
||||
acs.OFFCAMPUS_UNDERGRADUATE_FIELD,
|
||||
acs.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
|
||||
acs.OFFCAMPUS_UNIVERSITY_FIELD,
|
||||
]
|
||||
_check_fields_exist(result, fields_to_test)
|
||||
|
||||
assert result.iloc[0][acs.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD] == 0
|
||||
assert result.iloc[0][acs.OFFCAMPUS_UNDERGRADUATE_FIELD] == 296
|
||||
assert result.iloc[0][acs.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD] == 44
|
||||
assert result.iloc[0][acs.OFFCAMPUS_UNIVERSITY_FIELD] == 340
|
||||
|
||||
assert result.iloc[1][acs.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD] == 45
|
||||
assert result.iloc[1][acs.OFFCAMPUS_UNDERGRADUATE_FIELD] == 97
|
||||
assert result.iloc[1][acs.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD] == 45
|
||||
assert result.iloc[1][acs.OFFCAMPUS_UNIVERSITY_FIELD] == 128
|
Loading…
Add table
Reference in a new issue