Score tests (#1847)

* update Python version on README; tuple typing fix

* Alaska tribal points fix (#1821)

* Bump mistune from 0.8.4 to 2.0.3 in /data/data-pipeline (#1777)

Bumps [mistune](https://github.com/lepture/mistune) from 0.8.4 to 2.0.3.
- [Release notes](https://github.com/lepture/mistune/releases)
- [Changelog](https://github.com/lepture/mistune/blob/master/docs/changes.rst)
- [Commits](https://github.com/lepture/mistune/compare/v0.8.4...v2.0.3)

---
updated-dependencies:
- dependency-name: mistune
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* poetry update

* initial pass of score tests

* add threshold tests

* added ses threshold (not donut, not island)

* testing suite -- stopping for the day

* added test for lead proxy indicator

* Refactor score tests to make them less verbose and more direct (#1865)

* Cleanup tests slightly before refactor (#1846)

* Refactor score calculations tests

* Feedback from review

* Refactor output tests like calculatoin tests (#1846) (#1870)

* Reorganize files (#1846)

* Switch from lru_cache to fixture scorpes (#1846)

* Add tests for all factors (#1846)

* Mark smoketests and run as part of be deply (#1846)

* Update renamed var (#1846)

* Switch from named tuple to dataclass (#1846)

This is annoying, but pylint in python3.8 was crashing parsing the named
tuple. We weren't using any namedtuple-specific features, so I made the
type a dataclass just to get pylint to behave.

* Add default timout to requests (#1846)

* Fix type (#1846)

* Fix merge mistake on poetry.lock (#1846)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Jorge Escobar <jorge.e.escobar@omb.eop.gov>
Co-authored-by: Jorge Escobar <83969469+esfoobar-usds@users.noreply.github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Matt Bowen <83967628+mattbowen-usds@users.noreply.github.com>
Co-authored-by: matt bowen <matthew.r.bowen@omb.eop.gov>
This commit is contained in:
Emma Nechamkin 2022-08-26 15:23:20 -04:00 committed by GitHub
commit 1c4d3e4142
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 1425 additions and 29 deletions

View file

@ -1,5 +1,5 @@
import functools
from collections import namedtuple
from dataclasses import dataclass
import numpy as np
import pandas as pd
@ -496,10 +496,11 @@ class ScoreETL(ExtractTransformLoad):
# >= some threshold.
# TODO: Add more fields here.
# https://github.com/usds/justice40-tool/issues/970
ReversePercentile = namedtuple(
typename="ReversePercentile",
field_names=["field_name", "low_field_name"],
)
@dataclass
class ReversePercentile:
field_name: str
low_field_name: str
reverse_percentiles = [
# This dictionary follows the format:
# <field name> : <field name for low values>

View file

@ -51,7 +51,7 @@ class GeoScoreETL(ExtractTransformLoad):
## TODO: We really should not have this any longer changing
self.TARGET_SCORE_SHORT_FIELD = constants.TILES_SCORE_COLUMNS[
field_names.SCORE_N
field_names.FINAL_SCORE_N_BOOLEAN
]
self.TARGET_SCORE_RENAME_TO = "SCORE"

View file

@ -1,4 +1,4 @@
from typing import List, NamedTuple, Tuple
from typing import Any, List, NamedTuple, Tuple
import pandas as pd
import geopandas as gpd
@ -41,7 +41,7 @@ def _prepare_dataframe_for_imputation(
impute_var_named_tup_list: List[NamedTuple],
geo_df: gpd.GeoDataFrame,
geoid_field: str = "GEOID10_TRACT",
) -> Tuple[list, gpd.GeoDataFrame]:
) -> Tuple[Any, gpd.GeoDataFrame]:
imputing_cols = [
impute_var_pair.raw_field_name
for impute_var_pair in impute_var_named_tup_list

View file

@ -282,12 +282,20 @@ class CensusACSMedianIncomeETL(ExtractTransformLoad):
# Download MSA median incomes
logger.info("Starting download of MSA median incomes.")
download = requests.get(self.MSA_MEDIAN_INCOME_URL, verify=None)
download = requests.get(
self.MSA_MEDIAN_INCOME_URL,
verify=None,
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
)
self.msa_median_incomes = json.loads(download.content)
# Download state median incomes
logger.info("Starting download of state median incomes.")
download_state = requests.get(self.STATE_MEDIAN_INCOME_URL, verify=None)
download_state = requests.get(
self.STATE_MEDIAN_INCOME_URL,
verify=None,
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
)
self.state_median_incomes = json.loads(download_state.content)
## NOTE we already have PR's MI here

View file

@ -7,6 +7,7 @@ import pandas as pd
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.score import field_names
from data_pipeline.config import settings
pd.options.mode.chained_assignment = "raise"
@ -270,7 +271,8 @@ class CensusDecennialETL(ExtractTransformLoad):
island["var_list"],
island["fips"],
county,
)
),
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
)
df = json.loads(download.content)

View file

@ -3,6 +3,7 @@ import requests
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.utils import get_module_logger
from data_pipeline.config import settings
logger = get_module_logger(__name__)
@ -26,7 +27,11 @@ class HudRecapETL(ExtractTransformLoad):
def extract(self) -> None:
logger.info("Downloading HUD Recap Data")
download = requests.get(self.HUD_RECAP_CSV_URL, verify=None)
download = requests.get(
self.HUD_RECAP_CSV_URL,
verify=None,
timeout=settings.REQUESTS_DEFAULT_TIMOUT,
)
file_contents = download.content
csv_file = open(self.HUD_RECAP_CSV, "wb")
csv_file.write(file_contents)