mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 23:11:16 -07:00
adding median income field and running black
This commit is contained in:
parent
6c986adfe4
commit
4ae7eff4c4
6 changed files with 33 additions and 10 deletions
|
@ -59,7 +59,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# Load census data
|
||||
census_csv = self.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
|
||||
self.census_df = pd.read_csv(
|
||||
census_csv, dtype={self.GEOID_FIELD_NAME: "string"}, low_memory=False,
|
||||
census_csv,
|
||||
dtype={self.GEOID_FIELD_NAME: "string"},
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
# Load housing and transportation data
|
||||
|
@ -121,7 +123,8 @@ class ScoreETL(ExtractTransformLoad):
|
|||
|
||||
# Define a named tuple that will be used for each data set input.
|
||||
DataSet = collections.namedtuple(
|
||||
typename="DataSet", field_names=["input_field", "renamed_field", "bucket"],
|
||||
typename="DataSet",
|
||||
field_names=["input_field", "renamed_field", "bucket"],
|
||||
)
|
||||
|
||||
data_sets = [
|
||||
|
@ -138,7 +141,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
bucket=None,
|
||||
),
|
||||
DataSet(
|
||||
input_field="ACSTOTPOP", renamed_field="Total population", bucket=None,
|
||||
input_field="ACSTOTPOP",
|
||||
renamed_field="Total population",
|
||||
bucket=None,
|
||||
),
|
||||
# The following data sets have buckets, because they're used in the score
|
||||
DataSet(
|
||||
|
@ -244,7 +249,9 @@ class ScoreETL(ExtractTransformLoad):
|
|||
}
|
||||
|
||||
self.df.rename(
|
||||
columns=renaming_dict, inplace=True, errors="raise",
|
||||
columns=renaming_dict,
|
||||
inplace=True,
|
||||
errors="raise",
|
||||
)
|
||||
|
||||
columns_to_keep = [data_set.renamed_field for data_set in data_sets]
|
||||
|
|
|
@ -46,7 +46,9 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
|
||||
logger.info("Reading score CSV")
|
||||
self.score_usa_df = pd.read_csv(
|
||||
self.TILE_SCORE_CSV, dtype={"GEOID10": "string"}, low_memory=False,
|
||||
self.TILE_SCORE_CSV,
|
||||
dtype={"GEOID10": "string"},
|
||||
low_memory=False,
|
||||
)
|
||||
|
||||
def transform(self) -> None:
|
||||
|
@ -68,7 +70,8 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
].reset_index(drop=True)
|
||||
|
||||
usa_simplified.rename(
|
||||
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO}, inplace=True,
|
||||
columns={self.TARGET_SCORE_NAME: self.TARGET_SCORE_RENAME_TO},
|
||||
inplace=True,
|
||||
)
|
||||
|
||||
logger.info("Aggregating into tracts (~5 minutes)")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue