mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-07-28 08:01:17 -07:00
Score tests (#1847)
* update Python version on README; tuple typing fix * Alaska tribal points fix (#1821) * Bump mistune from 0.8.4 to 2.0.3 in /data/data-pipeline (#1777) Bumps [mistune](https://github.com/lepture/mistune) from 0.8.4 to 2.0.3. - [Release notes](https://github.com/lepture/mistune/releases) - [Changelog](https://github.com/lepture/mistune/blob/master/docs/changes.rst) - [Commits](https://github.com/lepture/mistune/compare/v0.8.4...v2.0.3) --- updated-dependencies: - dependency-name: mistune dependency-type: indirect ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * poetry update * initial pass of score tests * add threshold tests * added ses threshold (not donut, not island) * testing suite -- stopping for the day * added test for lead proxy indicator * Refactor score tests to make them less verbose and more direct (#1865) * Cleanup tests slightly before refactor (#1846) * Refactor score calculations tests * Feedback from review * Refactor output tests like calculatoin tests (#1846) (#1870) * Reorganize files (#1846) * Switch from lru_cache to fixture scorpes (#1846) * Add tests for all factors (#1846) * Mark smoketests and run as part of be deply (#1846) * Update renamed var (#1846) * Switch from named tuple to dataclass (#1846) This is annoying, but pylint in python3.8 was crashing parsing the named tuple. We weren't using any namedtuple-specific features, so I made the type a dataclass just to get pylint to behave. * Add default timout to requests (#1846) * Fix type (#1846) * Fix merge mistake on poetry.lock (#1846) Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov> Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Matt Bowen <83967628+mattbowen-usds@users.noreply.github.com> Co-authored-by: matt bowen <matthew.r.bowen@omb.eop.gov>
This commit is contained in:
parent
e539db86ab
commit
1c4d3e4142
19 changed files with 1425 additions and 29 deletions
|
@ -1,5 +1,5 @@
|
|||
import functools
|
||||
from collections import namedtuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
@ -496,10 +496,11 @@ class ScoreETL(ExtractTransformLoad):
|
|||
# >= some threshold.
|
||||
# TODO: Add more fields here.
|
||||
# https://github.com/usds/justice40-tool/issues/970
|
||||
ReversePercentile = namedtuple(
|
||||
typename="ReversePercentile",
|
||||
field_names=["field_name", "low_field_name"],
|
||||
)
|
||||
@dataclass
|
||||
class ReversePercentile:
|
||||
field_name: str
|
||||
low_field_name: str
|
||||
|
||||
reverse_percentiles = [
|
||||
# This dictionary follows the format:
|
||||
# <field name> : <field name for low values>
|
||||
|
|
|
@ -51,7 +51,7 @@ class GeoScoreETL(ExtractTransformLoad):
|
|||
|
||||
## TODO: We really should not have this any longer changing
|
||||
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
|
||||
field_names.SCORE_N
|
||||
field_names.FINAL_SCORE_N_BOOLEAN
|
||||
]
|
||||
self.TARGET_SCORE_RENAME_TO = "SCORE"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import List, NamedTuple, Tuple
|
||||
from typing import Any, List, NamedTuple, Tuple
|
||||
import pandas as pd
|
||||
import geopandas as gpd
|
||||
|
||||
|
@ -41,7 +41,7 @@ def _prepare_dataframe_for_imputation(
|
|||
impute_var_named_tup_list: List[NamedTuple],
|
||||
geo_df: gpd.GeoDataFrame,
|
||||
geoid_field: str = "GEOID10_TRACT",
|
||||
) -> Tuple[list, gpd.GeoDataFrame]:
|
||||
) -> Tuple[Any, gpd.GeoDataFrame]:
|
||||
imputing_cols = [
|
||||
impute_var_pair.raw_field_name
|
||||
for impute_var_pair in impute_var_named_tup_list
|
||||
|
|
|
@ -282,12 +282,20 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
|
|||
|
||||
# Download MSA median incomes
|
||||
logger.info("Starting download of MSA median incomes.")
|
||||
download = requests.get(self.MSA_MEDIAN_INCOME_URL, verify=None)
|
||||
download = requests.get(
|
||||
self.MSA_MEDIAN_INCOME_URL,
|
||||
verify=None,
|
||||
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
|
||||
)
|
||||
self.msa_median_incomes = json.loads(download.content)
|
||||
|
||||
# Download state median incomes
|
||||
logger.info("Starting download of state median incomes.")
|
||||
download_state = requests.get(self.STATE_MEDIAN_INCOME_URL, verify=None)
|
||||
download_state = requests.get(
|
||||
self.STATE_MEDIAN_INCOME_URL,
|
||||
verify=None,
|
||||
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
|
||||
)
|
||||
self.state_median_incomes = json.loads(download_state.content)
|
||||
## NOTE we already have PR's MI here
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import pandas as pd
|
|||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.utils import get_module_logger
|
||||
from data_pipeline.score import field_names
|
||||
from data_pipeline.config import settings
|
||||
|
||||
pd.options.mode.chained_assignment = "raise"
|
||||
|
||||
|
@ -270,7 +271,8 @@ class CensusDecennialETL(ExtractTransformLoad):
|
|||
island["var_list"],
|
||||
island["fips"],
|
||||
county,
|
||||
)
|
||||
),
|
||||
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
|
||||
)
|
||||
|
||||
df = json.loads(download.content)
|
||||
|
|
|
@ -3,6 +3,7 @@ import requests
|
|||
|
||||
from data_pipeline.etl.base import ExtractTransformLoad
|
||||
from data_pipeline.utils import get_module_logger
|
||||
from data_pipeline.config import settings
|
||||
|
||||
logger = get_module_logger(__name__)
|
||||
|
||||
|
@ -26,7 +27,11 @@ class HudRecapETL(ExtractTransformLoad):
|
|||
|
||||
def extract(self) -> None:
|
||||
logger.info("Downloading HUD Recap Data")
|
||||
download = requests.get(self.HUD_RECAP_CSV_URL, verify=None)
|
||||
download = requests.get(
|
||||
self.HUD_RECAP_CSV_URL,
|
||||
verify=None,
|
||||
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
|
||||
)
|
||||
file_contents = download.content
|
||||
csv_file = open(self.HUD_RECAP_CSV, "wb")
|
||||
csv_file.write(file_contents)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue