Add tests to make sure each source makes it to the score correctly (#1878)

* Remove unused persistent poverty from score (#1835)

* Test a few datasets for overlap in the final score (#1835)

* Add remaining data sources (#1853)

* Apply code-review feedback (#1835)

* Rearrange a little for readabililty (#1835)

* Add tract test (#1835)

* Add test for score values (#1835)

* Check for unmatched source tracts (#1835)

* Cleanup numeric code to plaintext (#1835)

* Make import more obvious (#1835)
This commit is contained in:
Matt Bowen 2022-09-06 15:10:19 -04:00 committed by GitHub
commit d41153d89d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 328 additions and 18 deletions

View file

@ -42,7 +42,6 @@ class ScoreETL(ExtractTransformLoad):
self.doe_energy_burden_df: pd.DataFrame
self.national_risk_index_df: pd.DataFrame
self.geocorr_urban_rural_df: pd.DataFrame
self.persistent_poverty_df: pd.DataFrame
self.census_decennial_df: pd.DataFrame
self.census_2010_df: pd.DataFrame
self.national_tract_df: pd.DataFrame
@ -159,16 +158,6 @@ class ScoreETL(ExtractTransformLoad):
low_memory=False,
)
# Load persistent poverty
persistent_poverty_csv = (
constants.DATA_PATH / "dataset" / "persistent_poverty" / "usa.csv"
)
self.persistent_poverty_df = pd.read_csv(
persistent_poverty_csv,
dtype={self.GEOID_TRACT_FIELD_NAME: "string"},
low_memory=False,
)
# Load decennial census data
census_decennial_csv = (
constants.DATA_PATH
@ -359,7 +348,6 @@ class ScoreETL(ExtractTransformLoad):
self.doe_energy_burden_df,
self.ejscreen_df,
self.geocorr_urban_rural_df,
self.persistent_poverty_df,
self.national_risk_index_df,
self.census_acs_median_incomes_df,
self.census_decennial_df,
@ -484,7 +472,6 @@ class ScoreETL(ExtractTransformLoad):
non_numeric_columns = [
self.GEOID_TRACT_FIELD_NAME,
field_names.PERSISTENT_POVERTY_FIELD,
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
]