Low-Income Poverty Calculation

This commit is contained in:
alene 2024-12-10 14:28:54 -05:00 committed by Carlos Felix
parent ba2e5eca45
commit 44f5aae0ca
8 changed files with 5858 additions and 73 deletions

View file

@ -56,14 +56,33 @@ class CensusACSETL(ExtractTransformLoad):
self.MEDIAN_INCOME_FIELD_NAME = (
"Median household income in the past 12 months"
)
self.POVERTY_DATASET_TOTAL = "C17002_001E" # Estimate!!Total,
self.POVERTY_UNDER_50PCT = "C17002_002E" # Estimate!!Total!!Under .50
self.POVERTY_50PCT_TO_99PCT = (
"C17002_003E" # Estimate!!Total!!.50 to .99
)
self.POVERTY_100PCT_TO_124PCT = (
"C17002_004E" # Estimate!!Total!!1.00 to 1.24
)
self.POVERTY_125PCT_TO_149PCT = (
"C17002_005E" # Estimate!!Total!!1.25 to 1.49
)
self.POVERTY_150PCT_TO_184PCT = (
"C17002_006E" # Estimate!!Total!!1.50 to 1.84
)
self.POVERTY_185PCT_TO_199PCT = (
"C17002_007E" # Estimate!!Total!!1.85 to 1.99
)
self.POVERTY_FIELDS = [
"C17002_001E", # Estimate!!Total,
"C17002_002E", # Estimate!!Total!!Under .50
"C17002_003E", # Estimate!!Total!!.50 to .99
"C17002_004E", # Estimate!!Total!!1.00 to 1.24
"C17002_005E", # Estimate!!Total!!1.25 to 1.49
"C17002_006E", # Estimate!!Total!!1.50 to 1.84
"C17002_007E", # Estimate!!Total!!1.85 to 1.99
self.POVERTY_DATASET_TOTAL,
self.POVERTY_UNDER_50PCT,
self.POVERTY_50PCT_TO_99PCT,
self.POVERTY_100PCT_TO_124PCT,
self.POVERTY_125PCT_TO_149PCT,
self.POVERTY_150PCT_TO_184PCT,
self.POVERTY_185PCT_TO_199PCT,
]
self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME = (
@ -75,19 +94,30 @@ class CensusACSETL(ExtractTransformLoad):
self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
"Percent of individuals < 200% Federal Poverty Line"
)
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
"Percent of individuals < 200% Federal Poverty Line, imputed"
self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME = (
"Total population of individuals < 200% Federal Poverty Line"
)
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
"Percent of individuals < 200% Federal Poverty Line," + " imputed"
)
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME = (
"Total population of individuals < 200% Federal Poverty Line,"
+ " imputed"
)
self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME = (
"Total population of individuals < 100% Federal Poverty Line"
)
self.IMPUTED_POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME = (
"Total population of individuals < 100% Federal Poverty Line,"
+ " imputed"
)
self.ADJUSTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
"Adjusted percent of individuals < 200% Federal Poverty Line"
)
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME_PRELIMINARY = (
"Preliminary adjusted percent of individuals < 200% Federal Poverty Line,"
+ " imputed"
)
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME = (
"Adjusted percent of individuals < 200% Federal Poverty Line,"
+ " imputed"
@ -148,32 +178,102 @@ class CensusACSETL(ExtractTransformLoad):
)
self.HIGH_SCHOOL_ED_FIELD = "Percent individuals age 25 or over with less than high school degree"
# College attendance fields
self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED = (
"B14004_001E" # Estimate!!Total
)
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC = "B14004_003E" # Estimate!!Total!!Male!!Enrolled in public college or graduate school
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE = "B14004_008E" # Estimate!!Total!!Male!!Enrolled in private college or graduate school
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC = "B14004_019E" # Estimate!!Total!!Female!!Enrolled in public college or graduate school
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE = "B14004_024E" # Estimate!!Total!!Female!!Enrolled in private college or graduate school
## Off-Campus University Student Poverty Fields
# Estimate!!Total:!!Income in the past 12 months below the poverty level:!!
# Enrolled in school:!!Enrolled in college undergraduate years
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE = "B14006_009E"
# Estimate!!Total:!!Income in the past 12 months below the poverty level:!!
# Enrolled in school:!!Enrolled in graduate or professional school
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE = "B14006_010E"
# Estimate!!Total:!!Income in the past 12 months at or above the poverty level:!!
# Enrolled in school:!!Enrolled in college undergraduate years
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE = "B14006_019E"
# Estimate!!Total:!!Income in the past 12 months at or above the poverty level:!!
# Enrolled in school:!!Enrolled in graduate or professional school
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_GRADUATE = "B14006_020E"
self.COLLEGE_ATTENDANCE_FIELDS = [
self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED,
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC,
self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE,
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC,
self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE,
self.UNIVERSITY_POVERTY_FIELDS = [
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE,
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE,
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE,
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_GRADUATE,
]
self.COLLEGE_ATTENDANCE_FIELD = (
self.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD = (
"Population below poverty line enrolled in an undergraduate program"
+ " (excluding students living in university housing)"
)
self.IMPUTED_OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD = (
"Population below poverty line enrolled in an undergraduate program"
+ " (excluding students living in university housing), imputed"
)
self.OFFCAMPUS_UNDERGRADUATE_FIELD = (
"Population enrolled in an undergraduate program"
+ " (excluding students living in university housing)"
)
self.IMPUTED_OFFCAMPUS_UNDERGRADUATE_FIELD = (
"Population enrolled in an undergraduate program"
+ " (excluding students living in university housing), imputed"
)
self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD = (
"Population below poverty line enrolled in an undergraduate, graduate, or professional program"
+ " (excluding students living in university housing)"
)
self.IMPUTED_OFFCAMPUS_UNIVERSITY_POVERTY_FIELD = (
"Population below poverty line enrolled in an undergraduate, graduate, or professional program"
+ " (excluding students living in university housing), imputed"
)
self.OFFCAMPUS_UNIVERSITY_FIELD = (
"Population enrolled in an undergraduate, graduate, or professional program"
+ " (excluding students living in university housing)"
)
self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD = (
"Population enrolled in an undergraduate, graduate, or professional program"
+ " (excluding students living in university housing), imputed"
)
self.IMPUTED_POVERTY_DATASET_TOTAL = (
"Total population in poverty dataset (all income levels)"
+ ", imputed"
)
self.OVERALL_RATIO_200FPL_TO_100FPL = (
"Ratio <200% FPL to <100% FPL, overall"
)
self.OFFCAMPUS_UNIVERSITY_POPULATION_COUNT_UNDER_200PCT_FPL = "Estimated population count of off-campus university students <200% FPL"
self.POPULATION_COUNT_UNDER_200PCT_FPL_MINUS_OFFCAMPUS_UNIVERSITY_ESTIMATE = (
"Estimated population count of people in a househould with income <200% FPL"
+ ", excluding all university students"
)
self.POPULATION_TOTAL_IN_POVERTY_DATASET_MINUS_OFFCAMPUS_UNVERSITY = (
"Everyone in poverty dataset"
+ ", minus all off-campus university students"
)
# University Enrollment Rates (15+ population, includes students in dorms)
self.UNIVERSITY_ATTENDANCE_TOTAL_POPULATION_ASKED = (
"B14004_001E" # Estimate!!Total
)
self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PUBLIC = "B14004_003E" # Estimate!!Total!!Male!!Enrolled in public college or graduate school
self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PRIVATE = "B14004_008E" # Estimate!!Total!!Male!!Enrolled in private college or graduate school
self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PUBLIC = "B14004_019E" # Estimate!!Total!!Female!!Enrolled in public college or graduate school
self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PRIVATE = "B14004_024E" # Estimate!!Total!!Female!!Enrolled in private college or graduate school
self.UNIVERSITY_ATTENDANCE_FIELDS = [
self.UNIVERSITY_ATTENDANCE_TOTAL_POPULATION_ASKED,
self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PUBLIC,
self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PRIVATE,
self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PUBLIC,
self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PRIVATE,
]
self.UNIVERSITY_ATTENDANCE_FIELD = (
"Percent enrollment in college or graduate school"
)
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD = (
self.IMPUTED_UNIVERSITY_ATTENDANCE_FIELD = (
"Percent enrollment in college or graduate school, imputed"
)
self.COLLEGE_NON_ATTENDANCE_FIELD = "Percent of population not currently enrolled in college or graduate school"
self.UNIVERSITY_NON_ATTENDANCE_FIELD = "Percent of population not currently enrolled in college or graduate school"
self.RE_FIELDS = [
"B02001_001E",
@ -295,11 +395,29 @@ class CensusACSETL(ExtractTransformLoad):
self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME,
self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME,
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
self.MEDIAN_HOUSE_VALUE_FIELD_NAME,
self.HIGH_SCHOOL_ED_FIELD,
self.COLLEGE_ATTENDANCE_FIELD,
self.COLLEGE_NON_ATTENDANCE_FIELD,
self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE,
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE,
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE,
self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_GRADUATE,
self.OVERALL_RATIO_200FPL_TO_100FPL,
self.OFFCAMPUS_UNIVERSITY_POPULATION_COUNT_UNDER_200PCT_FPL,
self.POPULATION_COUNT_UNDER_200PCT_FPL_MINUS_OFFCAMPUS_UNIVERSITY_ESTIMATE,
self.POPULATION_TOTAL_IN_POVERTY_DATASET_MINUS_OFFCAMPUS_UNVERSITY,
self.UNIVERSITY_ATTENDANCE_FIELD,
self.UNIVERSITY_NON_ATTENDANCE_FIELD,
self.IMPUTED_UNIVERSITY_ATTENDANCE_FIELD,
self.OFFCAMPUS_UNIVERSITY_FIELD,
self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD,
self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
self.IMPUTED_POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
self.IMPUTED_OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
self.POVERTY_DATASET_TOTAL,
self.IMPUTED_POVERTY_DATASET_TOTAL,
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
]
+ self.RE_OUTPUT_FIELDS
@ -315,6 +433,7 @@ class CensusACSETL(ExtractTransformLoad):
)
self.df: pd.DataFrame
self.geo_df: gpd.GeoDataFrame
def get_data_sources(self) -> [DataSource]:
# Define the variables to retrieve
@ -328,7 +447,8 @@ class CensusACSETL(ExtractTransformLoad):
+ self.POVERTY_FIELDS
+ self.EDUCATIONAL_FIELDS
+ self.RE_FIELDS
+ self.COLLEGE_ATTENDANCE_FIELDS
+ self.UNIVERSITY_POVERTY_FIELDS
+ self.UNIVERSITY_ATTENDANCE_FIELDS
+ self.AGE_INPUT_FIELDS
)
@ -383,11 +503,7 @@ class CensusACSETL(ExtractTransformLoad):
dtype={field_names.GEOID_TRACT_FIELD: "string"},
)
def transform(self) -> None:
df = self.df
# Here we join the geometry of the US to the dataframe so that we can impute
# The income of neighbors. first this looks locally; if there's no local
# Load the census GeoJSON. irst this looks locally; if there's no local
# geojson file for all of the US, this will read it off of S3
logger.debug("Reading in geojson for the country")
if not os.path.exists(
@ -400,13 +516,18 @@ class CensusACSETL(ExtractTransformLoad):
self.DATA_PATH,
)
geo_df = gpd.read_file(
self.geo_df = gpd.read_file(
self.DATA_PATH / "census" / "geojson" / "us.json",
)
def transform(self) -> None:
df = self.df
# Here we join the geometry of the US to the dataframe so that we can impute
# The income of neighbors.
df = CensusACSETL.merge_geojson(
df=df,
usa_geo_df=geo_df,
usa_geo_df=self.geo_df,
)
# Rename some fields.
@ -455,24 +576,57 @@ class CensusACSETL(ExtractTransformLoad):
# Calculate percent at different poverty thresholds
df[self.POVERTY_LESS_THAN_100_PERCENT_FPL_FIELD_NAME] = (
df["C17002_002E"] + df["C17002_003E"]
) / df["C17002_001E"]
df[self.POVERTY_UNDER_50PCT] + df[self.POVERTY_50PCT_TO_99PCT]
) / df[self.POVERTY_DATASET_TOTAL]
df[self.POVERTY_LESS_THAN_150_PERCENT_FPL_FIELD_NAME] = (
df["C17002_002E"]
+ df["C17002_003E"]
+ df["C17002_004E"]
+ df["C17002_005E"]
) / df["C17002_001E"]
df[self.POVERTY_UNDER_50PCT]
+ df[self.POVERTY_50PCT_TO_99PCT]
+ df[self.POVERTY_100PCT_TO_124PCT]
+ df[self.POVERTY_125PCT_TO_149PCT]
) / df[self.POVERTY_DATASET_TOTAL]
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME] = (
df["C17002_002E"]
+ df["C17002_003E"]
+ df["C17002_004E"]
+ df["C17002_005E"]
+ df["C17002_006E"]
+ df["C17002_007E"]
) / df["C17002_001E"]
df[self.POVERTY_UNDER_50PCT]
+ df[self.POVERTY_50PCT_TO_99PCT]
+ df[self.POVERTY_100PCT_TO_124PCT]
+ df[self.POVERTY_125PCT_TO_149PCT]
+ df[self.POVERTY_150PCT_TO_184PCT]
+ df[self.POVERTY_185PCT_TO_199PCT]
) / df[self.POVERTY_DATASET_TOTAL]
# COUNT of Povery less than 200%
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME] = (
df[self.POVERTY_UNDER_50PCT]
+ df[self.POVERTY_50PCT_TO_99PCT]
+ df[self.POVERTY_100PCT_TO_124PCT]
+ df[self.POVERTY_125PCT_TO_149PCT]
+ df[self.POVERTY_150PCT_TO_184PCT]
+ df[self.POVERTY_185PCT_TO_199PCT]
)
df[self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME] = (
df[self.POVERTY_UNDER_50PCT] + df[self.POVERTY_50PCT_TO_99PCT]
)
# Off-Campus University Fields:
df[self.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD] = df[
self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE
]
df[self.OFFCAMPUS_UNDERGRADUATE_FIELD] = (
df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE]
+ df[self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE]
)
df[self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD] = (
df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE]
+ df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE]
)
df[self.OFFCAMPUS_UNIVERSITY_FIELD] = (
df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE]
+ df[self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_GRADUATE]
+ df[self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_UNDERGRADUATE]
+ df[self.OFFCAMPUS_UNIVERSITY_ABOVE_POVERTY_GRADUATE]
)
# Calculate educational attainment
educational_numerator_fields = [
@ -596,16 +750,16 @@ class CensusACSETL(ExtractTransformLoad):
df[sum_columns].sum(axis=1) / df[field_names.TOTAL_POP_FIELD]
)
# Calculate college attendance and adjust low income
df[self.COLLEGE_ATTENDANCE_FIELD] = (
df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC]
+ df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PRIVATE]
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PUBLIC]
+ df[self.COLLEGE_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
) / df[self.COLLEGE_ATTENDANCE_TOTAL_POPULATION_ASKED]
# Calculate university attendance and adjust low income
df[self.UNIVERSITY_ATTENDANCE_FIELD] = (
df[self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PUBLIC]
+ df[self.UNIVERSITY_ATTENDANCE_MALE_ENROLLED_PRIVATE]
+ df[self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PUBLIC]
+ df[self.UNIVERSITY_ATTENDANCE_FEMALE_ENROLLED_PRIVATE]
) / df[self.UNIVERSITY_ATTENDANCE_TOTAL_POPULATION_ASKED]
df[self.COLLEGE_NON_ATTENDANCE_FIELD] = (
1 - df[self.COLLEGE_ATTENDANCE_FIELD]
df[self.UNIVERSITY_NON_ATTENDANCE_FIELD] = (
1 - df[self.UNIVERSITY_ATTENDANCE_FIELD]
)
# we impute income for both income measures
@ -618,8 +772,36 @@ class CensusACSETL(ExtractTransformLoad):
imputed_field_name=self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME,
),
CensusACSETL.ImputeVariables(
raw_field_name=self.COLLEGE_ATTENDANCE_FIELD,
imputed_field_name=self.IMPUTED_COLLEGE_ATTENDANCE_FIELD,
raw_field_name=self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
imputed_field_name=self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
),
CensusACSETL.ImputeVariables(
raw_field_name=self.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD,
imputed_field_name=self.IMPUTED_OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD,
),
CensusACSETL.ImputeVariables(
raw_field_name=self.OFFCAMPUS_UNDERGRADUATE_FIELD,
imputed_field_name=self.IMPUTED_OFFCAMPUS_UNDERGRADUATE_FIELD,
),
CensusACSETL.ImputeVariables(
raw_field_name=self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
imputed_field_name=self.IMPUTED_OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
),
CensusACSETL.ImputeVariables(
raw_field_name=self.OFFCAMPUS_UNIVERSITY_FIELD,
imputed_field_name=self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD,
),
CensusACSETL.ImputeVariables(
raw_field_name=self.UNIVERSITY_ATTENDANCE_FIELD,
imputed_field_name=self.IMPUTED_UNIVERSITY_ATTENDANCE_FIELD,
),
CensusACSETL.ImputeVariables(
raw_field_name=self.POVERTY_DATASET_TOTAL,
imputed_field_name=self.IMPUTED_POVERTY_DATASET_TOTAL,
),
CensusACSETL.ImputeVariables(
raw_field_name=self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
imputed_field_name=self.IMPUTED_POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
),
],
geo_df=df,
@ -629,21 +811,156 @@ class CensusACSETL(ExtractTransformLoad):
logger.debug("Calculating with imputed values")
# pylint: disable=pointless-string-statement
"""
POVERTY CALCULATION
Goal: Calculate the portion of people in in households where income
is less than or equal to twice the federal poverty level,
not including students enrolled in higher ed.
Approach: To do this, we must make an adjustment to remove off-campus university students
from numbers reported by the ACS. We use the "interpolated" method to estimate
the number of off-campus university students actually included in the unadjusted numerator.
Interpolated Poverty Calculation, Step-by-Step Methodology
Step 1: Estimate ratio of overall population <200% FPL : overall population <100% FPL
Overall ratio 200:100 FPL =
max(
max[
Total population <200% FPL,
1
]
/
max[
Total population <100% FPL,
1
],
1)
Step 2: Interpolate the number of off-campus university students <200% FPL
Estimated university population <200% FPL =
min(
max[
University population <100% FPL x Overall ratio 200:100 FPL,
0 # nb: actual lower bound is the population university <100%, because ratio is clipped at 1
],
Total number of off-campus university students
)
Step 3: Subtract off-campus university students from both numerator and denominator of the unadjusted poverty rate
Adjusted poverty rate =
min(
max [
(
max[
Overall <200% FPL population - Estimated university population <200% FPL,
0
]
/
max[
Everyone in poverty dataset - University total population,
1
],
),
0
],
1
)
"""
# pylint: enable=pointless-string-statement
### Add fields for poverty calculation numerator
# Step 1: Estimate ratio of overall population <200% FPL : overall population <100% FPL
df[self.OVERALL_RATIO_200FPL_TO_100FPL] = (
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME]
.fillna(
df[
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME
]
# Use clip to for consistency with denominator
)
.clip(lower=1)
/ df[self.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME]
.fillna(
df[
self.IMPUTED_POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME
]
# Use clip to ensure we never divide by 0
)
.clip(lower=1)
# Use clip to ensure that the ratio of poverty <200%:<100% is not lower than 1
).clip(lower=1)
# Step 2: Interpolate the number of off-campus university students <200% FPL
df[self.OFFCAMPUS_UNIVERSITY_POPULATION_COUNT_UNDER_200PCT_FPL] = (
df[self.OVERALL_RATIO_200FPL_TO_100FPL]
* (
df[
self.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD
].fillna( # corresponds to <100% FPL
df[self.IMPUTED_OFFCAMPUS_UNIVERSITY_POVERTY_FIELD]
)
)
# ensure that estimated count of university <200% is between 0 and the total number of university students
# nb: actual lower bound is university <100%, because ratio is clipped at 1
).clip(
lower=0,
upper=df[self.OFFCAMPUS_UNIVERSITY_FIELD].fillna(
df[self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD]
),
)
# Step 3a: Subtract off-campus university students from numerator of the unadjusted poverty rate
df[
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
self.POPULATION_COUNT_UNDER_200PCT_FPL_MINUS_OFFCAMPUS_UNIVERSITY_ESTIMATE
] = (
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME].fillna(
df[self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME]
df[self.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME].fillna(
df[
self.IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME
]
)
- df[self.COLLEGE_ATTENDANCE_FIELD].fillna(
df[self.IMPUTED_COLLEGE_ATTENDANCE_FIELD]
)
# Use clip to ensure that the values are not negative if college attendance
# is very high
- df[self.OFFCAMPUS_UNIVERSITY_POPULATION_COUNT_UNDER_200PCT_FPL]
# Use clip as extra precaution against values <=0
).clip(
lower=0
)
### Add denominator field for poverty calculation
# Step 3b: Subtract off-campus university students from denominator of the unadjusted poverty rate
df[
self.POPULATION_TOTAL_IN_POVERTY_DATASET_MINUS_OFFCAMPUS_UNVERSITY
] = (
df[self.POVERTY_DATASET_TOTAL].fillna(
df[self.IMPUTED_POVERTY_DATASET_TOTAL]
)
- df[self.OFFCAMPUS_UNIVERSITY_FIELD].fillna(
df[self.IMPUTED_OFFCAMPUS_UNIVERSITY_FIELD]
)
# Use clip as extra precaution against values <=0
).clip(
lower=1
)
# Step 3c: Put the numerator and denominator together to calculate the final adjusted poverty rate
# NB: numerator and denominator are both already imputed and clipped
df[
self.ADJUSTED_AND_IMPUTED_POVERTY_LESS_THAN_200_PERCENT_FPL_FIELD_NAME
] = (
df[
self.POPULATION_COUNT_UNDER_200PCT_FPL_MINUS_OFFCAMPUS_UNIVERSITY_ESTIMATE
]
/ df[
self.POPULATION_TOTAL_IN_POVERTY_DATASET_MINUS_OFFCAMPUS_UNVERSITY
]
# Clip to ensure percentage is between 0 and
).clip(
lower=0, upper=1
)
## CHECK OUTPUT AND SAVE RESULTS
# All values should have a value at this point
assert (
# For tracts with >0 population

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,25 @@
# How to generate the sample data in this folder
The sample data in this folder can be easily generated by debugging the `data_pipeline/etl/sources/census_acs/etl.py` file
and exporting data using the debugger console. Examples of this exporting are below.
## Why in pickle format?
Exporting as a Pickle file keeps all the metadata about the columns including the data types. If we were to export as CSV then we will need
to code the data types in the test fixtures for all the columns for the comparison to be correct.
### Transform tests input files
- `acs_transform_input.pkl` - this file contains downloaded Census data that is used as input.
- `acs_transform_geojson.geojson` - this file contains the Census GeoJSON data that is used as input.
1. Place a breakpoint in `data_pipeline/etl/sources/census_acs/etl.py` in the `transform` method right at the beginning
and start the debugger running the ETL run command for Census ACS (`etl-run -d census_acs`).
1. Partially export the `self.df` and `self.geo_df` data to files once the debugger pauses at the breakpoint. Use these
sample commands in the debugger console.
```python
t_list = ['01073001100', '01073001400', '01073002000', '01073003802', '01073004000']
self.geo_df[self.geo_df['GEOID10'].isin(t_list)].to_file('data_pipeline/tests/sources/census_acs/data/transform/acs_transform_geojson.geojson')
test_df = self.df[self.df['GEOID10_TRACT'].isin(t_list)].copy()
# Setting this one row to N/A allows the imputations code to succeed
test_df.at[4, self.OFFCAMPUS_UNIVERSITY_BELOW_POVERTY_UNDERGRADUATE] = pd.NA
test_df.to_pickle('data_pipeline/tests/sources/census_acs/data/transform/acs_transform_input.pkl')
```

View file

@ -0,0 +1,11 @@
{
"type": "FeatureCollection",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "001400", "GEOID10": "01073001400", "NAME10": "14", "NAMELSAD10": "Census Tract 14", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 2241287, "AWATER10": 0, "INTPTLAT10": "+33.5261497", "INTPTLON10": "-086.8351469" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.840884, 33.527586 ], [ -86.837824, 33.528871 ], [ -86.83693, 33.530023 ], [ -86.83679, 33.530205 ], [ -86.83639, 33.530805 ], [ -86.835352, 33.531179 ], [ -86.833843, 33.532003 ], [ -86.832035, 33.532595 ], [ -86.831245, 33.532854 ], [ -86.829135, 33.533924 ], [ -86.827029, 33.534708 ], [ -86.826575, 33.534968 ], [ -86.825634, 33.535508 ], [ -86.824369, 33.536271 ], [ -86.823926, 33.536515 ], [ -86.823758, 33.536606 ], [ -86.823683, 33.536184 ], [ -86.823591, 33.535542 ], [ -86.823596, 33.53486 ], [ -86.823637, 33.534404 ], [ -86.823739, 33.533793 ], [ -86.82384, 33.533389 ], [ -86.824083, 33.532657 ], [ -86.824469, 33.531855 ], [ -86.825589, 33.530001 ], [ -86.825945, 33.529412 ], [ -86.826278, 33.528861 ], [ -86.82662, 33.528272 ], [ -86.826703, 33.528081 ], [ -86.826785, 33.5278 ], [ -86.826911, 33.527375 ], [ -86.827001, 33.526685 ], [ -86.827025, 33.525543 ], [ -86.827091, 33.524614 ], [ -86.827086, 33.523811 ], [ -86.827086, 33.523767 ], [ -86.827091, 33.523677 ], [ -86.827125, 33.523051 ], [ -86.827117, 33.522607 ], [ -86.827088, 33.522283 ], [ -86.827006, 33.521836 ], [ -86.826987, 33.521701 ], [ -86.826807, 33.521107 ], [ -86.829003, 33.520829 ], [ -86.829532, 33.520498 ], [ -86.830151, 33.52015 ], [ -86.830507, 33.520037 ], [ -86.830666, 33.519986 ], [ -86.831415, 33.519802 ], [ -86.831968, 33.519782 ], [ -86.83261, 33.519835 ], [ -86.834123, 33.520139 ], [ -86.834279, 33.52017 ], [ -86.834582, 33.520223 ], [ -86.836148, 33.520497 ], [ -86.837263, 33.520682 ], [ -86.837708, 33.52073 ], [ -86.837983, 33.520761 ], [ -86.838571, 33.520791 ], [ -86.839295, 33.520792 ], [ -86.83942, 33.520792 ], [ -86.839596, 33.520792 ], [ -86.840901, 33.520808 ], [ -86.842445, 33.520826 ], [ -86.843989, 33.520846 ], [ -86.845617, 33.520865 ], [ -86.846194, 33.520873 ], [ -86.84942, 33.520908 ], [ -86.849421, 33.521133 ], [ -86.84944, 33.521596 ], [ -86.849499, 33.521692 ], [ -86.849433, 33.522393 ], [ -86.849582, 33.523085 ], [ -86.849667, 33.523435 ], [ -86.849748, 33.523804 ], [ -86.849783, 33.523978 ], [ -86.849812, 33.52412 ], [ -86.84985, 33.52431 ], [ -86.849984, 33.524884 ], [ -86.850106, 33.525442 ], [ -86.85023, 33.525973 ], [ -86.850303, 33.526624 ], [ -86.850312, 33.526707 ], [ -86.850132, 33.526701 ], [ -86.84979, 33.526605 ], [ -86.849721, 33.526576 ], [ -86.849042, 33.526293 ], [ -86.848434, 33.526035 ], [ -86.847961, 33.525834 ], [ -86.847891, 33.525805 ], [ -86.847787, 33.525787 ], [ -86.845591, 33.525405 ], [ -86.842991, 33.526505 ], [ -86.840884, 33.527586 ] ] ] } },
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "001100", "GEOID10": "01073001100", "NAME10": "11", "NAMELSAD10": "Census Tract 11", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 11428313, "AWATER10": 3862, "INTPTLAT10": "+33.5423337", "INTPTLON10": "-086.8765161" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.882435, 33.552326 ], [ -86.881871, 33.552684 ], [ -86.881177, 33.552987 ], [ -86.880549, 33.553159 ], [ -86.880024, 33.553247 ], [ -86.879772, 33.553268 ], [ -86.879039, 33.553269 ], [ -86.877437, 33.553193 ], [ -86.873225, 33.553055 ], [ -86.871055, 33.55294 ], [ -86.870488, 33.552953 ], [ -86.870028, 33.552996 ], [ -86.869495, 33.553081 ], [ -86.86902, 33.553198 ], [ -86.868549, 33.553418 ], [ -86.868064, 33.553783 ], [ -86.867614, 33.554299 ], [ -86.867234, 33.554811 ], [ -86.866179, 33.556229 ], [ -86.865886, 33.556531 ], [ -86.865428, 33.556909 ], [ -86.8651, 33.557135 ], [ -86.863343, 33.558141 ], [ -86.861725, 33.55911 ], [ -86.860721, 33.559702 ], [ -86.859731, 33.560316 ], [ -86.858524, 33.561035 ], [ -86.858002, 33.561372 ], [ -86.857324, 33.561719 ], [ -86.856471, 33.562004 ], [ -86.856129, 33.562064 ], [ -86.851595, 33.559404 ], [ -86.848891, 33.559805 ], [ -86.852991, 33.552104 ], [ -86.853223, 33.551239 ], [ -86.855191, 33.543905 ], [ -86.855694, 33.538151 ], [ -86.85606, 33.537889 ], [ -86.856269, 33.537719 ], [ -86.856472, 33.53752 ], [ -86.856779, 33.53719 ], [ -86.857174, 33.536763 ], [ -86.85758, 33.536346 ], [ -86.857795, 33.536127 ], [ -86.858137, 33.535851 ], [ -86.858444, 33.535632 ], [ -86.858851, 33.53539 ], [ -86.85921, 33.535203 ], [ -86.860817, 33.53405 ], [ -86.860936, 33.534028 ], [ -86.864589, 33.533319 ], [ -86.86658, 33.532927 ], [ -86.867357, 33.53278 ], [ -86.867868, 33.532651 ], [ -86.868475, 33.53246 ], [ -86.870603, 33.531691 ], [ -86.870854, 33.531601 ], [ -86.875846, 33.529909 ], [ -86.878638, 33.528917 ], [ -86.878921, 33.528817 ], [ -86.879007, 33.528787 ], [ -86.878999, 33.529822 ], [ -86.8796, 33.529851 ], [ -86.88017, 33.529879 ], [ -86.880793, 33.529909 ], [ -86.880872, 33.529919 ], [ -86.881324, 33.529935 ], [ -86.88209, 33.529947 ], [ -86.882375, 33.529953 ], [ -86.882779, 33.529958 ], [ -86.88349, 33.529994 ], [ -86.884213, 33.530005 ], [ -86.885074, 33.530017 ], [ -86.885649, 33.530026 ], [ -86.885884, 33.530034 ], [ -86.886535, 33.530037 ], [ -86.887886, 33.530039 ], [ -86.887968, 33.530043 ], [ -86.889407, 33.530064 ], [ -86.889415, 33.529614 ], [ -86.889422, 33.529159 ], [ -86.890877, 33.529167 ], [ -86.891569, 33.52917 ], [ -86.891643, 33.52917 ], [ -86.891832, 33.528964 ], [ -86.892208, 33.528556 ], [ -86.893389, 33.527586 ], [ -86.894533, 33.526558 ], [ -86.897192, 33.528505 ], [ -86.896992, 33.530504 ], [ -86.901104, 33.532548 ], [ -86.903492, 33.533105 ], [ -86.908792, 33.535505 ], [ -86.910592, 33.536105 ], [ -86.909292, 33.538305 ], [ -86.909392, 33.539005 ], [ -86.908162, 33.539693 ], [ -86.908095, 33.539893 ], [ -86.907692, 33.540605 ], [ -86.907192, 33.542205 ], [ -86.905392, 33.541305 ], [ -86.903103, 33.542516 ], [ -86.901996, 33.543221 ], [ -86.901608, 33.543456 ], [ -86.901167, 33.54412 ], [ -86.900913, 33.544384 ], [ -86.899614, 33.545658 ], [ -86.899456, 33.545794 ], [ -86.896838, 33.547888 ], [ -86.896206, 33.548394 ], [ -86.895817, 33.548705 ], [ -86.895692, 33.548805 ], [ -86.894818, 33.548368 ], [ -86.894292, 33.548105 ], [ -86.893192, 33.548205 ], [ -86.893149, 33.548341 ], [ -86.892544, 33.550241 ], [ -86.892492, 33.550404 ], [ -86.889392, 33.550505 ], [ -86.888392, 33.549304 ], [ -86.886592, 33.550205 ], [ -86.884997, 33.550205 ], [ -86.8849, 33.551105 ], [ -86.883737, 33.551726 ], [ -86.883615, 33.551775 ], [ -86.883463, 33.551713 ], [ -86.883374, 33.551707 ], [ -86.883263, 33.551733 ], [ -86.883102, 33.551828 ], [ -86.882615, 33.552213 ], [ -86.882435, 33.552326 ] ] ] } },
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "002000", "GEOID10": "01073002000", "NAME10": "20", "NAMELSAD10": "Census Tract 20", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 3605025, "AWATER10": 0, "INTPTLAT10": "+33.5591908", "INTPTLON10": "-086.7233518" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.713902, 33.539302 ], [ -86.714347, 33.539121 ], [ -86.714768, 33.539008 ], [ -86.715254, 33.538933 ], [ -86.715891, 33.538922 ], [ -86.716497, 33.538825 ], [ -86.716288, 33.539004 ], [ -86.719691, 33.540211 ], [ -86.720436, 33.543861 ], [ -86.72065, 33.544904 ], [ -86.72064, 33.545552 ], [ -86.720624, 33.545854 ], [ -86.720641, 33.546602 ], [ -86.720666, 33.547704 ], [ -86.720671, 33.548361 ], [ -86.720685, 33.549495 ], [ -86.720702, 33.550497 ], [ -86.720715, 33.551287 ], [ -86.720184, 33.551274 ], [ -86.720099, 33.551271 ], [ -86.719672, 33.551264 ], [ -86.719527, 33.551266 ], [ -86.718937, 33.55126 ], [ -86.718379, 33.551245 ], [ -86.717792, 33.551257 ], [ -86.717277, 33.551242 ], [ -86.716713, 33.551233 ], [ -86.716142, 33.551223 ], [ -86.715577, 33.551214 ], [ -86.715006, 33.5512 ], [ -86.714442, 33.551191 ], [ -86.713877, 33.551181 ], [ -86.713881, 33.552963 ], [ -86.713323, 33.552953 ], [ -86.712752, 33.552939 ], [ -86.71275, 33.553417 ], [ -86.712753, 33.554307 ], [ -86.713324, 33.554333 ], [ -86.713888, 33.554326 ], [ -86.714462, 33.55435 ], [ -86.71507, 33.554366 ], [ -86.715588, 33.554385 ], [ -86.71612, 33.554384 ], [ -86.716664, 33.554437 ], [ -86.717222, 33.554653 ], [ -86.717741, 33.554882 ], [ -86.71829, 33.55513 ], [ -86.718989, 33.555448 ], [ -86.719231, 33.555565 ], [ -86.719603, 33.555718 ], [ -86.72003, 33.555922 ], [ -86.720245, 33.556011 ], [ -86.720876, 33.556274 ], [ -86.721511, 33.556456 ], [ -86.7218, 33.556658 ], [ -86.722005, 33.556801 ], [ -86.722554, 33.557118 ], [ -86.723614, 33.556025 ], [ -86.723496, 33.555943 ], [ -86.723105, 33.55567 ], [ -86.72257, 33.555315 ], [ -86.723206, 33.554639 ], [ -86.723346, 33.554398 ], [ -86.72355, 33.554328 ], [ -86.723773, 33.55433 ], [ -86.724149, 33.554569 ], [ -86.724693, 33.554905 ], [ -86.725189, 33.555287 ], [ -86.725711, 33.555637 ], [ -86.726233, 33.555992 ], [ -86.726803, 33.556395 ], [ -86.727325, 33.556739 ], [ -86.727847, 33.557094 ], [ -86.728377, 33.557456 ], [ -86.728891, 33.557804 ], [ -86.729406, 33.558156 ], [ -86.729942, 33.55852 ], [ -86.730464, 33.558875 ], [ -86.731032, 33.559263 ], [ -86.729985, 33.560323 ], [ -86.728915, 33.561436 ], [ -86.728813, 33.56155 ], [ -86.727701, 33.563268 ], [ -86.726796, 33.564729 ], [ -86.72602, 33.565979 ], [ -86.725629, 33.56581 ], [ -86.725297, 33.565678 ], [ -86.725149, 33.565606 ], [ -86.724716, 33.565426 ], [ -86.724148, 33.565125 ], [ -86.72354, 33.564846 ], [ -86.722958, 33.56459 ], [ -86.722363, 33.564322 ], [ -86.721769, 33.564055 ], [ -86.721173, 33.563787 ], [ -86.720598, 33.563519 ], [ -86.719989, 33.563246 ], [ -86.719394, 33.562973 ], [ -86.718812, 33.562717 ], [ -86.718355, 33.56251 ], [ -86.718218, 33.562438 ], [ -86.717693, 33.562221 ], [ -86.717621, 33.562149 ], [ -86.717117, 33.56196 ], [ -86.716665, 33.56175 ], [ -86.716194, 33.561544 ], [ -86.715728, 33.561336 ], [ -86.715252, 33.56112 ], [ -86.714781, 33.560924 ], [ -86.714634, 33.561079 ], [ -86.714179, 33.560881 ], [ -86.712726, 33.560236 ], [ -86.712584, 33.560436 ], [ -86.710637, 33.559646 ], [ -86.709203, 33.559076 ], [ -86.708861, 33.558979 ], [ -86.708777, 33.558949 ], [ -86.707958, 33.558616 ], [ -86.707954, 33.558597 ], [ -86.707908, 33.558385 ], [ -86.707745, 33.558329 ], [ -86.70031, 33.555477 ], [ -86.700624, 33.554907 ], [ -86.701888, 33.552604 ], [ -86.702782, 33.55136 ], [ -86.703551, 33.550291 ], [ -86.703873, 33.549843 ], [ -86.704188, 33.549404 ], [ -86.707152, 33.54685 ], [ -86.707466, 33.546616 ], [ -86.707683, 33.546405 ], [ -86.707985, 33.545992 ], [ -86.708486, 33.54538 ], [ -86.709318, 33.544365 ], [ -86.709708, 33.543917 ], [ -86.712316, 33.541302 ], [ -86.712613, 33.541023 ], [ -86.712812, 33.540799 ], [ -86.713745, 33.539422 ], [ -86.713902, 33.539302 ] ] ] } },
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "004000", "GEOID10": "01073004000", "NAME10": "40", "NAMELSAD10": "Census Tract 40", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 2364675, "AWATER10": 0, "INTPTLAT10": "+33.4953245", "INTPTLON10": "-086.8516236" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.854631, 33.487542 ], [ -86.855535, 33.486967 ], [ -86.856538, 33.486322 ], [ -86.857046, 33.485979 ], [ -86.859584, 33.484308 ], [ -86.860059, 33.484021 ], [ -86.86053, 33.483714 ], [ -86.860755, 33.484005 ], [ -86.861035, 33.484352 ], [ -86.861374, 33.484803 ], [ -86.861571, 33.485078 ], [ -86.861742, 33.485286 ], [ -86.862107, 33.48576 ], [ -86.862507, 33.486283 ], [ -86.862427, 33.486486 ], [ -86.861595, 33.487797 ], [ -86.861383, 33.488126 ], [ -86.861291, 33.488236 ], [ -86.861169, 33.488332 ], [ -86.860211, 33.488939 ], [ -86.859816, 33.489189 ], [ -86.859266, 33.489539 ], [ -86.85833, 33.490136 ], [ -86.857567, 33.490628 ], [ -86.856969, 33.491013 ], [ -86.857431, 33.491536 ], [ -86.857855, 33.492015 ], [ -86.858305, 33.492492 ], [ -86.858765, 33.492994 ], [ -86.859221, 33.493495 ], [ -86.859706, 33.494023 ], [ -86.860181, 33.494549 ], [ -86.860593, 33.49501 ], [ -86.860637, 33.495047 ], [ -86.861002, 33.495451 ], [ -86.861354, 33.495838 ], [ -86.861804, 33.495554 ], [ -86.861962, 33.495725 ], [ -86.862138, 33.495924 ], [ -86.862286, 33.496091 ], [ -86.862342, 33.496145 ], [ -86.862401, 33.496183 ], [ -86.86248, 33.496203 ], [ -86.862523, 33.496203 ], [ -86.862657, 33.496178 ], [ -86.862747, 33.496129 ], [ -86.865424, 33.494422 ], [ -86.865495, 33.494501 ], [ -86.865766, 33.494783 ], [ -86.86583, 33.49488 ], [ -86.865848, 33.494895 ], [ -86.865876, 33.494905 ], [ -86.8659, 33.494906 ], [ -86.865968, 33.494895 ], [ -86.866332, 33.494779 ], [ -86.866881, 33.494588 ], [ -86.866885, 33.495165 ], [ -86.866882, 33.495367 ], [ -86.862491, 33.497006 ], [ -86.860903, 33.497855 ], [ -86.856845, 33.500199 ], [ -86.854735, 33.500131 ], [ -86.853969, 33.50011 ], [ -86.852812, 33.500079 ], [ -86.851686, 33.500051 ], [ -86.851194, 33.500034 ], [ -86.850371, 33.500006 ], [ -86.849077, 33.499969 ], [ -86.848377, 33.499947 ], [ -86.848378, 33.500321 ], [ -86.848378, 33.500493 ], [ -86.848378, 33.500678 ], [ -86.84838, 33.50083 ], [ -86.848379, 33.501047 ], [ -86.848371, 33.501387 ], [ -86.848002, 33.501458 ], [ -86.847752, 33.50152 ], [ -86.847574, 33.501633 ], [ -86.847191, 33.501852 ], [ -86.847057, 33.501953 ], [ -86.847392, 33.502364 ], [ -86.847897, 33.502965 ], [ -86.848046, 33.503141 ], [ -86.848257, 33.503394 ], [ -86.848336, 33.503486 ], [ -86.848157, 33.503554 ], [ -86.847219, 33.503858 ], [ -86.846292, 33.504089 ], [ -86.843699, 33.504587 ], [ -86.843203, 33.504334 ], [ -86.843156, 33.504274 ], [ -86.842845, 33.503874 ], [ -86.842556, 33.503511 ], [ -86.842525, 33.503473 ], [ -86.842194, 33.503061 ], [ -86.841884, 33.502679 ], [ -86.841545, 33.50226 ], [ -86.84134, 33.502009 ], [ -86.841222, 33.501863 ], [ -86.840803, 33.50135 ], [ -86.840537, 33.501444 ], [ -86.840278, 33.501514 ], [ -86.840164, 33.501534 ], [ -86.839567, 33.50155 ], [ -86.839587, 33.500984 ], [ -86.839566, 33.500861 ], [ -86.8395, 33.500695 ], [ -86.839428, 33.500655 ], [ -86.839565, 33.500597 ], [ -86.839661, 33.500445 ], [ -86.839589, 33.500419 ], [ -86.839586, 33.499992 ], [ -86.839591, 33.499212 ], [ -86.83959, 33.498823 ], [ -86.839596, 33.498071 ], [ -86.839592, 33.497372 ], [ -86.839593, 33.496633 ], [ -86.840423, 33.496212 ], [ -86.841274, 33.495781 ], [ -86.842156, 33.495302 ], [ -86.843044, 33.494824 ], [ -86.844948, 33.493727 ], [ -86.846142, 33.493001 ], [ -86.84733, 33.492275 ], [ -86.849147, 33.49107 ], [ -86.85242, 33.488964 ], [ -86.85329, 33.488405 ], [ -86.853724, 33.488135 ], [ -86.854431, 33.48767 ], [ -86.854631, 33.487542 ] ] ] } },
{ "type": "Feature", "properties": { "STATEFP10": "01", "COUNTYFP10": "073", "TRACTCE10": "003802", "GEOID10": "01073003802", "NAME10": "38.02", "NAMELSAD10": "Census Tract 38.02", "MTFCC10": "G5020", "FUNCSTAT10": "S", "ALAND10": 3245083, "AWATER10": 0, "INTPTLAT10": "+33.4785702", "INTPTLON10": "-086.8900020" }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -86.903171, 33.471768 ], [ -86.902839, 33.472127 ], [ -86.902225, 33.47278 ], [ -86.901954, 33.473078 ], [ -86.901641, 33.473416 ], [ -86.90141, 33.473658 ], [ -86.900796, 33.474323 ], [ -86.900673, 33.474449 ], [ -86.898775, 33.47648 ], [ -86.898283, 33.477004 ], [ -86.897626, 33.477708 ], [ -86.896539, 33.478865 ], [ -86.896263, 33.479165 ], [ -86.895093, 33.48043 ], [ -86.894581, 33.480994 ], [ -86.894456, 33.481132 ], [ -86.894021, 33.481637 ], [ -86.893961, 33.48169 ], [ -86.893382, 33.482321 ], [ -86.893258, 33.482453 ], [ -86.892746, 33.482886 ], [ -86.892337, 33.483302 ], [ -86.891263, 33.484422 ], [ -86.889966, 33.485827 ], [ -86.888649, 33.487226 ], [ -86.887767, 33.488186 ], [ -86.887057, 33.488954 ], [ -86.886728, 33.489294 ], [ -86.886305, 33.489638 ], [ -86.885219, 33.490482 ], [ -86.884374, 33.491114 ], [ -86.883539, 33.491751 ], [ -86.882698, 33.492396 ], [ -86.881861, 33.493024 ], [ -86.881309, 33.49345 ], [ -86.881062, 33.49364 ], [ -86.880805, 33.493833 ], [ -86.880411, 33.494131 ], [ -86.88002, 33.494431 ], [ -86.879626, 33.494732 ], [ -86.878975, 33.495227 ], [ -86.878498, 33.494588 ], [ -86.878186, 33.494157 ], [ -86.87756, 33.493268 ], [ -86.877271, 33.49286 ], [ -86.876969, 33.492421 ], [ -86.876655, 33.491999 ], [ -86.87634, 33.491571 ], [ -86.876027, 33.491146 ], [ -86.875717, 33.49072 ], [ -86.875757, 33.490693 ], [ -86.876011, 33.49057 ], [ -86.876337, 33.490403 ], [ -86.876675, 33.490237 ], [ -86.877239, 33.489948 ], [ -86.877115, 33.489769 ], [ -86.87693, 33.489514 ], [ -86.876672, 33.48915 ], [ -86.876629, 33.48908 ], [ -86.876559, 33.488988 ], [ -86.87702, 33.488297 ], [ -86.878015, 33.486803 ], [ -86.878888, 33.485874 ], [ -86.879365, 33.485176 ], [ -86.879779, 33.484583 ], [ -86.880179, 33.484014 ], [ -86.880814, 33.483217 ], [ -86.884791, 33.478206 ], [ -86.889113, 33.472352 ], [ -86.889455, 33.47189 ], [ -86.890077, 33.471066 ], [ -86.89052, 33.47046 ], [ -86.890747, 33.470166 ], [ -86.891276, 33.469468 ], [ -86.891699, 33.468898 ], [ -86.892196, 33.4683 ], [ -86.895724, 33.464729 ], [ -86.897917, 33.46251 ], [ -86.899914, 33.460456 ], [ -86.90018, 33.460195 ], [ -86.900652, 33.459741 ], [ -86.900614, 33.460009 ], [ -86.900652, 33.460414 ], [ -86.900791, 33.461207 ], [ -86.900777, 33.463217 ], [ -86.900792, 33.464061 ], [ -86.900791, 33.464861 ], [ -86.900368, 33.466247 ], [ -86.900487, 33.467212 ], [ -86.900642, 33.468474 ], [ -86.900602, 33.469456 ], [ -86.900561, 33.471007 ], [ -86.900554, 33.471353 ], [ -86.900515, 33.471735 ], [ -86.902983, 33.471766 ], [ -86.903171, 33.471768 ] ] ] } }
]
}

View file

@ -0,0 +1,119 @@
import pytest
import pandas as pd
import geopandas as gpd
from pathlib import Path
from data_pipeline.etl.sources.census_acs.etl import CensusACSETL
def _check_fields_exist(df: pd.DataFrame, field_names: list):
for field in field_names:
assert field in df.columns
@pytest.fixture
def transform_census_input_fixture() -> pd.DataFrame:
"""
Load the Census input data for the transform method tests.
Returns:
Dataframe: the input data
"""
file = (
Path(__file__).parents[0]
/ "data"
/ "transform"
/ "acs_transform_input.pkl"
)
return pd.read_pickle(file)
@pytest.fixture
def transform_census_geojson_fixture() -> gpd.GeoDataFrame:
"""
Load the Census GeoJSON input data for the transform method.
Returns:
Dataframe: the Census GeoJSON input data
"""
file = (
Path(__file__).parents[0]
/ "data"
/ "transform"
/ "acs_transform_geojson.geojson"
)
return gpd.read_file(file)
@pytest.fixture
def transformed_data_fixture(
transform_census_input_fixture: pd.DataFrame,
transform_census_geojson_fixture: gpd.GeoDataFrame,
) -> pd.DataFrame:
"""
Tranform the test input data.
Returns:
DataFrame: the transformed data
"""
acs = CensusACSETL()
acs.df = transform_census_input_fixture
acs.geo_df = transform_census_geojson_fixture
acs.transform()
return acs.output_df
#################
# Transform tests
#################
def test_poverty_fields(transformed_data_fixture: pd.DataFrame):
result = transformed_data_fixture
# Test that the poverty and collect fields were added.
acs = CensusACSETL()
fields_to_test = [
acs.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME,
acs.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME,
]
_check_fields_exist(result, fields_to_test)
assert (
result.iloc[0][acs.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME]
== 1743
)
assert (
result.iloc[0][acs.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME]
== 700
)
assert (
result.iloc[1][acs.POVERTY_LESS_THAN_200_PERCENT_FPL_COUNT_FIELD_NAME]
== 941
)
assert (
result.iloc[1][acs.POVERTY_LESS_THAN_100_PERCENT_FPL_COUNT_FIELD_NAME]
== 548
)
def test_college_undergrad_fields(transformed_data_fixture: pd.DataFrame):
result = transformed_data_fixture
# Test that the poverty and collect fields were added.
acs = CensusACSETL()
fields_to_test = [
acs.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD,
acs.OFFCAMPUS_UNDERGRADUATE_FIELD,
acs.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD,
acs.OFFCAMPUS_UNIVERSITY_FIELD,
]
_check_fields_exist(result, fields_to_test)
assert result.iloc[0][acs.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD] == 0
assert result.iloc[0][acs.OFFCAMPUS_UNDERGRADUATE_FIELD] == 296
assert result.iloc[0][acs.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD] == 44
assert result.iloc[0][acs.OFFCAMPUS_UNIVERSITY_FIELD] == 340
assert result.iloc[1][acs.OFFCAMPUS_UNDERGRADUATE_POVERTY_FIELD] == 45
assert result.iloc[1][acs.OFFCAMPUS_UNDERGRADUATE_FIELD] == 97
assert result.iloc[1][acs.OFFCAMPUS_UNIVERSITY_POVERTY_FIELD] == 45
assert result.iloc[1][acs.OFFCAMPUS_UNIVERSITY_FIELD] == 128