updating pylint

This commit is contained in:
lucasmbrown-usds 2022-09-28 13:35:52 -04:00
commit 5ff988ab29
25 changed files with 154 additions and 101 deletions

View file

@ -18,14 +18,17 @@ repos:
"--ignore-init-module-imports", "--ignore-init-module-imports",
] ]
- repo: https://github.com/asottile/reorder_python_imports - repo: https://github.com/pycqa/isort
rev: v3.8.3 rev: 5.10.1
hooks: hooks:
- id: reorder-python-imports - id: isort
language_version: python3.9 name: isort (python)
args: args:
[ [
"--application-directories=.", "--force-single-line-imports",
"--profile=black",
"--line-length=80",
"--src-path=.:data/data-pipeline"
] ]
- repo: https://github.com/ambv/black - repo: https://github.com/ambv/black

View file

@ -16,7 +16,7 @@
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"\n", "\n",
"from data_pipeline.score import field_names\n", "from data_pipeline.score import field_names\n",
"from data_pipeline.comparison_tool.src import utils \n", "from data_pipeline.comparison_tool.src import utils\n",
"\n", "\n",
"pd.options.display.float_format = \"{:,.3f}\".format\n", "pd.options.display.float_format = \"{:,.3f}\".format\n",
"%load_ext lab_black" "%load_ext lab_black"
@ -128,9 +128,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"utils.validate_new_data(\n", "utils.validate_new_data(file_path=COMPARATOR_FILE, score_col=COMPARATOR_COLUMN)"
" file_path=COMPARATOR_FILE, score_col=COMPARATOR_COLUMN\n",
")"
] ]
}, },
{ {
@ -148,20 +146,25 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"comparator_cols = [COMPARATOR_COLUMN] + OTHER_COMPARATOR_COLUMNS if OTHER_COMPARATOR_COLUMNS else [COMPARATOR_COLUMN]\n", "comparator_cols = (\n",
" [COMPARATOR_COLUMN] + OTHER_COMPARATOR_COLUMNS\n",
" if OTHER_COMPARATOR_COLUMNS\n",
" else [COMPARATOR_COLUMN]\n",
")\n",
"\n", "\n",
"#papermill_description=Loading_data\n", "# papermill_description=Loading_data\n",
"joined_df = pd.concat(\n", "joined_df = pd.concat(\n",
" [\n", " [\n",
" utils.read_file(\n", " utils.read_file(\n",
" file_path=SCORE_FILE,\n", " file_path=SCORE_FILE,\n",
" columns=[TOTAL_POPULATION_COLUMN, SCORE_COLUMN] + ADDITIONAL_DEMO_COLUMNS,\n", " columns=[TOTAL_POPULATION_COLUMN, SCORE_COLUMN]\n",
" + ADDITIONAL_DEMO_COLUMNS,\n",
" geoid=GEOID_COLUMN,\n", " geoid=GEOID_COLUMN,\n",
" ),\n", " ),\n",
" utils.read_file(\n", " utils.read_file(\n",
" file_path=COMPARATOR_FILE,\n", " file_path=COMPARATOR_FILE,\n",
" columns=comparator_cols,\n", " columns=comparator_cols,\n",
" geoid=GEOID_COLUMN\n", " geoid=GEOID_COLUMN,\n",
" ),\n", " ),\n",
" utils.read_file(\n", " utils.read_file(\n",
" file_path=DEMOGRAPHIC_FILE,\n", " file_path=DEMOGRAPHIC_FILE,\n",
@ -196,13 +199,13 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#papermill_description=Summary_stats\n", "# papermill_description=Summary_stats\n",
"population_df = utils.produce_summary_stats(\n", "population_df = utils.produce_summary_stats(\n",
" joined_df=joined_df,\n", " joined_df=joined_df,\n",
" comparator_column=COMPARATOR_COLUMN,\n", " comparator_column=COMPARATOR_COLUMN,\n",
" score_column=SCORE_COLUMN,\n", " score_column=SCORE_COLUMN,\n",
" population_column=TOTAL_POPULATION_COLUMN,\n", " population_column=TOTAL_POPULATION_COLUMN,\n",
" geoid_column=GEOID_COLUMN\n", " geoid_column=GEOID_COLUMN,\n",
")\n", ")\n",
"population_df" "population_df"
] ]
@ -224,18 +227,18 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#papermill_description=Tract_stats\n", "# papermill_description=Tract_stats\n",
"tract_level_by_identification_df = pd.concat(\n", "tract_level_by_identification_df = pd.concat(\n",
" [\n", " [\n",
" utils.get_demo_series(\n", " utils.get_demo_series(\n",
" grouping_column=COMPARATOR_COLUMN,\n", " grouping_column=COMPARATOR_COLUMN,\n",
" joined_df=joined_df,\n", " joined_df=joined_df,\n",
" demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS\n", " demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n",
" ),\n", " ),\n",
" utils.get_demo_series(\n", " utils.get_demo_series(\n",
" grouping_column=SCORE_COLUMN,\n", " grouping_column=SCORE_COLUMN,\n",
" joined_df=joined_df,\n", " joined_df=joined_df,\n",
" demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS\n", " demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n",
" ),\n", " ),\n",
" ],\n", " ],\n",
" axis=1,\n", " axis=1,\n",
@ -256,17 +259,25 @@
" y=\"Variable\",\n", " y=\"Variable\",\n",
" x=\"Avg in tracts\",\n", " x=\"Avg in tracts\",\n",
" hue=\"Definition\",\n", " hue=\"Definition\",\n",
" data=tract_level_by_identification_df.sort_values(by=COMPARATOR_COLUMN, ascending=False)\n", " data=tract_level_by_identification_df.sort_values(\n",
" by=COMPARATOR_COLUMN, ascending=False\n",
" )\n",
" .stack()\n", " .stack()\n",
" .reset_index()\n", " .reset_index()\n",
" .rename(\n", " .rename(\n",
" columns={\"level_0\": \"Variable\", \"level_1\": \"Definition\", 0: \"Avg in tracts\"}\n", " columns={\n",
" \"level_0\": \"Variable\",\n",
" \"level_1\": \"Definition\",\n",
" 0: \"Avg in tracts\",\n",
" }\n",
" ),\n", " ),\n",
" palette=\"Blues\",\n", " palette=\"Blues\",\n",
")\n", ")\n",
"plt.xlim(0, 1)\n", "plt.xlim(0, 1)\n",
"plt.title(\"Tract level averages by identification strategy\")\n", "plt.title(\"Tract level averages by identification strategy\")\n",
"plt.savefig(os.path.join(OUTPUT_DATA_PATH, \"tract_lvl_avg.jpg\"), bbox_inches='tight')" "plt.savefig(\n",
" os.path.join(OUTPUT_DATA_PATH, \"tract_lvl_avg.jpg\"), bbox_inches=\"tight\"\n",
")"
] ]
}, },
{ {
@ -276,13 +287,13 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#papermill_description=Tract_stats_grouped\n", "# papermill_description=Tract_stats_grouped\n",
"tract_level_by_grouping_df = utils.get_tract_level_grouping(\n", "tract_level_by_grouping_df = utils.get_tract_level_grouping(\n",
" joined_df=joined_df,\n", " joined_df=joined_df,\n",
" score_column=SCORE_COLUMN,\n", " score_column=SCORE_COLUMN,\n",
" comparator_column=COMPARATOR_COLUMN,\n", " comparator_column=COMPARATOR_COLUMN,\n",
" demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n", " demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n",
" keep_missing_values=KEEP_MISSING_VALUES_FOR_SEGMENTATION\n", " keep_missing_values=KEEP_MISSING_VALUES_FOR_SEGMENTATION,\n",
")\n", ")\n",
"\n", "\n",
"tract_level_by_grouping_formatted_df = utils.format_multi_index_for_excel(\n", "tract_level_by_grouping_formatted_df = utils.format_multi_index_for_excel(\n",
@ -315,7 +326,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#papermill_description=Population_stats\n", "# papermill_description=Population_stats\n",
"population_weighted_stats_df = pd.concat(\n", "population_weighted_stats_df = pd.concat(\n",
" [\n", " [\n",
" utils.construct_weighted_statistics(\n", " utils.construct_weighted_statistics(\n",
@ -363,7 +374,7 @@
"comparator_and_cejst_proportion_series, states = utils.get_final_summary_info(\n", "comparator_and_cejst_proportion_series, states = utils.get_final_summary_info(\n",
" population=population_df,\n", " population=population_df,\n",
" comparator_file=COMPARATOR_FILE,\n", " comparator_file=COMPARATOR_FILE,\n",
" geoid_col=GEOID_COLUMN\n", " geoid_col=GEOID_COLUMN,\n",
")" ")"
] ]
}, },
@ -393,7 +404,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#papermill_description=Writing_excel\n", "# papermill_description=Writing_excel\n",
"utils.write_single_comparison_excel(\n", "utils.write_single_comparison_excel(\n",
" output_excel=OUTPUT_EXCEL,\n", " output_excel=OUTPUT_EXCEL,\n",
" population_df=population_df,\n", " population_df=population_df,\n",
@ -401,7 +412,7 @@
" population_weighted_stats_df=population_weighted_stats_df,\n", " population_weighted_stats_df=population_weighted_stats_df,\n",
" tract_level_by_grouping_formatted_df=tract_level_by_grouping_formatted_df,\n", " tract_level_by_grouping_formatted_df=tract_level_by_grouping_formatted_df,\n",
" comparator_and_cejst_proportion_series=comparator_and_cejst_proportion_series,\n", " comparator_and_cejst_proportion_series=comparator_and_cejst_proportion_series,\n",
" states_text=states_text\n", " states_text=states_text,\n",
")" ")"
] ]
} }

View file

@ -1,7 +1,8 @@
from dynaconf import Dynaconf
import data_pipeline
import pathlib import pathlib
import data_pipeline
from dynaconf import Dynaconf
settings = Dynaconf( settings = Dynaconf(
envvar_prefix="DYNACONF", envvar_prefix="DYNACONF",
settings_files=["settings.toml", ".secrets.toml"], settings_files=["settings.toml", ".secrets.toml"],

View file

@ -427,7 +427,9 @@
} }
], ],
"source": [ "source": [
"for col in [col for col in download_codebook.index.to_list() if \"(percentile)\" in col]:\n", "for col in [\n",
" col for col in download_codebook.index.to_list() if \"(percentile)\" in col\n",
"]:\n",
" print(f\" - column_name: {col}\")\n", " print(f\" - column_name: {col}\")\n",
" if \"Low\" not in col:\n", " if \"Low\" not in col:\n",
" print(\n", " print(\n",

View file

@ -11,9 +11,7 @@ from data_pipeline.etl.sources.dot_travel_composite.etl import (
TravelCompositeETL, TravelCompositeETL,
) )
from data_pipeline.etl.sources.eamlis.etl import AbandonedMineETL from data_pipeline.etl.sources.eamlis.etl import AbandonedMineETL
from data_pipeline.etl.sources.fsf_flood_risk.etl import ( from data_pipeline.etl.sources.fsf_flood_risk.etl import FloodRiskETL
FloodRiskETL,
)
from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
from data_pipeline.etl.sources.national_risk_index.etl import ( from data_pipeline.etl.sources.national_risk_index.etl import (
NationalRiskIndexETL, NationalRiskIndexETL,

View file

@ -9,9 +9,7 @@ from data_pipeline.content.schemas.download_schemas import CSVConfig
from data_pipeline.etl.base import ExtractTransformLoad from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.score import constants from data_pipeline.etl.score import constants
from data_pipeline.etl.score.etl_utils import check_score_data_source from data_pipeline.etl.score.etl_utils import check_score_data_source
from data_pipeline.etl.sources.census.etl_utils import ( from data_pipeline.etl.sources.census.etl_utils import check_census_data_source
check_census_data_source,
)
from data_pipeline.score import field_names from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger from data_pipeline.utils import get_module_logger
from data_pipeline.utils import load_dict_from_yaml_object_fields from data_pipeline.utils import load_dict_from_yaml_object_fields

View file

@ -9,9 +9,7 @@ from data_pipeline.content.schemas.download_schemas import ExcelConfig
from data_pipeline.etl.base import ExtractTransformLoad from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.etl.score.etl_utils import create_codebook from data_pipeline.etl.score.etl_utils import create_codebook
from data_pipeline.etl.score.etl_utils import floor_series from data_pipeline.etl.score.etl_utils import floor_series
from data_pipeline.etl.sources.census.etl_utils import ( from data_pipeline.etl.sources.census.etl_utils import check_census_data_source
check_census_data_source,
)
from data_pipeline.score import field_names from data_pipeline.score import field_names
from data_pipeline.utils import column_list_from_yaml_object_fields from data_pipeline.utils import column_list_from_yaml_object_fields
from data_pipeline.utils import get_module_logger from data_pipeline.utils import get_module_logger

View file

@ -96,7 +96,7 @@ def floor_series(series: pd.Series, number_of_decimals: int) -> pd.Series:
if series.isin(unacceptable_values).any(): if series.isin(unacceptable_values).any():
series.replace(mapping, regex=False, inplace=True) series.replace(mapping, regex=False, inplace=True)
multiplication_factor = 10 ** number_of_decimals multiplication_factor = 10**number_of_decimals
# In order to safely cast NaNs # In order to safely cast NaNs
# First coerce series to float type: series.astype(float) # First coerce series to float type: series.astype(float)

View file

@ -5,9 +5,7 @@ from pathlib import Path
import pandas.api.types as ptypes import pandas.api.types as ptypes
import pandas.testing as pdt import pandas.testing as pdt
from data_pipeline.content.schemas.download_schemas import ( from data_pipeline.content.schemas.download_schemas import CSVConfig
CSVConfig,
)
from data_pipeline.etl.score import constants from data_pipeline.etl.score import constants
from data_pipeline.utils import load_yaml_dict_from_file from data_pipeline.utils import load_yaml_dict_from_file

View file

@ -4,7 +4,6 @@ from data_pipeline.utils import get_module_logger
from data_pipeline.utils import remove_all_from_dir from data_pipeline.utils import remove_all_from_dir
from data_pipeline.utils import remove_files_from_dir from data_pipeline.utils import remove_files_from_dir
logger = get_module_logger(__name__) logger = get_module_logger(__name__)

View file

@ -211,7 +211,9 @@
} }
], ],
"source": [ "source": [
"tmp = sns.FacetGrid(data=score_m, col=\"Urban Heuristic Flag\", col_wrap=2, height=7)\n", "tmp = sns.FacetGrid(\n",
" data=score_m, col=\"Urban Heuristic Flag\", col_wrap=2, height=7\n",
")\n",
"tmp.map(\n", "tmp.map(\n",
" sns.distplot,\n", " sns.distplot,\n",
" \"Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)\",\n", " \"Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)\",\n",
@ -250,7 +252,9 @@
")\n", ")\n",
"\n", "\n",
"nri_with_flag[\"total_ag_loss\"] = nri_with_flag.filter(like=\"EALA\").sum(axis=1)\n", "nri_with_flag[\"total_ag_loss\"] = nri_with_flag.filter(like=\"EALA\").sum(axis=1)\n",
"nri_with_flag[\"total_ag_loss_pctile\"] = nri_with_flag[\"total_ag_loss\"].rank(pct=True)\n", "nri_with_flag[\"total_ag_loss_pctile\"] = nri_with_flag[\"total_ag_loss\"].rank(\n",
" pct=True\n",
")\n",
"\n", "\n",
"nri_with_flag.groupby(\"Urban Heuristic Flag\")[\"total_ag_loss_pctile\"].mean()" "nri_with_flag.groupby(\"Urban Heuristic Flag\")[\"total_ag_loss_pctile\"].mean()"
] ]
@ -779,9 +783,9 @@
" \"Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?\"\n", " \"Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?\"\n",
"].astype(int)\n", "].astype(int)\n",
"\n", "\n",
"score_m_adjusted_tracts = set(score_m[score_m[\"adjusted\"] > 0][\"GEOID10_TRACT\"]).union(\n", "score_m_adjusted_tracts = set(\n",
" all_ag_loss_tracts\n", " score_m[score_m[\"adjusted\"] > 0][\"GEOID10_TRACT\"]\n",
")\n", ").union(all_ag_loss_tracts)\n",
"display(len(set(all_scorem_tracts).difference(score_m_adjusted_tracts)))" "display(len(set(all_scorem_tracts).difference(score_m_adjusted_tracts)))"
] ]
}, },
@ -832,7 +836,11 @@
" left_clip = nri_with_flag[nri_with_flag[\"Urban Heuristic Flag\"] == 0][\n", " left_clip = nri_with_flag[nri_with_flag[\"Urban Heuristic Flag\"] == 0][\n",
" \"AGRIVALUE\"\n", " \"AGRIVALUE\"\n",
" ].quantile(threshold)\n", " ].quantile(threshold)\n",
" print(\"At threshold {:.2f}, minimum value is ${:,.0f}\".format(threshold, left_clip))\n", " print(\n",
" \"At threshold {:.2f}, minimum value is ${:,.0f}\".format(\n",
" threshold, left_clip\n",
" )\n",
" )\n",
" tmp_value = nri_with_flag[\"AGRIVALUE\"].clip(lower=left_clip)\n", " tmp_value = nri_with_flag[\"AGRIVALUE\"].clip(lower=left_clip)\n",
" nri_with_flag[\"total_ag_loss_pctile_{:.2f}\".format(threshold)] = (\n", " nri_with_flag[\"total_ag_loss_pctile_{:.2f}\".format(threshold)] = (\n",
" nri_with_flag[\"total_ag_loss\"] / tmp_value\n", " nri_with_flag[\"total_ag_loss\"] / tmp_value\n",
@ -889,7 +897,9 @@
" .set_index(\"Left clip value\")[[\"Rural\", \"Urban\"]]\n", " .set_index(\"Left clip value\")[[\"Rural\", \"Urban\"]]\n",
" .stack()\n", " .stack()\n",
" .reset_index()\n", " .reset_index()\n",
" .rename(columns={\"level_1\": \"Tract classification\", 0: \"Average percentile\"})\n", " .rename(\n",
" columns={\"level_1\": \"Tract classification\", 0: \"Average percentile\"}\n",
" )\n",
")" ")"
] ]
}, },

View file

@ -21,6 +21,7 @@
"source": [ "source": [
"import os\n", "import os\n",
"import sys\n", "import sys\n",
"\n",
"module_path = os.path.abspath(os.path.join(\"../..\"))\n", "module_path = os.path.abspath(os.path.join(\"../..\"))\n",
"if module_path not in sys.path:\n", "if module_path not in sys.path:\n",
" sys.path.append(module_path)" " sys.path.append(module_path)"
@ -94,9 +95,13 @@
"bia_aian_supplemental_geojson = (\n", "bia_aian_supplemental_geojson = (\n",
" GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_AIAN_Supplemental.json\"\n", " GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_AIAN_Supplemental.json\"\n",
")\n", ")\n",
"bia_tsa_geojson_geojson = GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_TSA.json\"\n", "bia_tsa_geojson_geojson = (\n",
" GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_TSA.json\"\n",
")\n",
"alaska_native_villages_geojson = (\n", "alaska_native_villages_geojson = (\n",
" GEOJSON_BASE_PATH / \"alaska_native_villages\" / \"AlaskaNativeVillages.gdb.geojson\"\n", " GEOJSON_BASE_PATH\n",
" / \"alaska_native_villages\"\n",
" / \"AlaskaNativeVillages.gdb.geojson\"\n",
")" ")"
] ]
}, },
@ -131,7 +136,9 @@
"len(\n", "len(\n",
" sorted(\n", " sorted(\n",
" list(\n", " list(\n",
" bia_national_lar_df.LARName.str.replace(r\"\\(.*\\) \", \"\", regex=True).unique()\n", " bia_national_lar_df.LARName.str.replace(\n",
" r\"\\(.*\\) \", \"\", regex=True\n",
" ).unique()\n",
" )\n", " )\n",
" )\n", " )\n",
")" ")"

View file

@ -45,6 +45,7 @@
"source": [ "source": [
"# Read in the score geojson file\n", "# Read in the score geojson file\n",
"from data_pipeline.etl.score.constants import DATA_SCORE_CSV_TILES_FILE_PATH\n", "from data_pipeline.etl.score.constants import DATA_SCORE_CSV_TILES_FILE_PATH\n",
"\n",
"nation = gpd.read_file(DATA_SCORE_CSV_TILES_FILE_PATH)" "nation = gpd.read_file(DATA_SCORE_CSV_TILES_FILE_PATH)"
] ]
}, },
@ -93,10 +94,14 @@
" random_tile_features = json.loads(f.read())\n", " random_tile_features = json.loads(f.read())\n",
"\n", "\n",
"# Flatten data around the features key:\n", "# Flatten data around the features key:\n",
"flatten_features = pd.json_normalize(random_tile_features, record_path=[\"features\"])\n", "flatten_features = pd.json_normalize(\n",
" random_tile_features, record_path=[\"features\"]\n",
")\n",
"\n", "\n",
"# index into the feature properties, get keys and turn into a sorted list\n", "# index into the feature properties, get keys and turn into a sorted list\n",
"random_tile = sorted(list(flatten_features[\"features\"][0][0][\"properties\"].keys()))" "random_tile = sorted(\n",
" list(flatten_features[\"features\"][0][0][\"properties\"].keys())\n",
")"
] ]
}, },
{ {
@ -291,8 +296,8 @@
} }
], ],
"source": [ "source": [
"nation_HRS_GEO = nation[['GEOID10', 'SF', 'CF', 'HRS_ET', 'AML_ET', 'FUDS_ET']]\n", "nation_HRS_GEO = nation[[\"GEOID10\", \"SF\", \"CF\", \"HRS_ET\", \"AML_ET\", \"FUDS_ET\"]]\n",
"nation_HRS_GEO.loc[nation_HRS_GEO['FUDS_ET'] == '0']" "nation_HRS_GEO.loc[nation_HRS_GEO[\"FUDS_ET\"] == \"0\"]"
] ]
}, },
{ {
@ -321,7 +326,7 @@
} }
], ],
"source": [ "source": [
"nation['HRS_ET'].unique()" "nation[\"HRS_ET\"].unique()"
] ]
} }
], ],

View file

@ -18,7 +18,10 @@
" sys.path.append(module_path)\n", " sys.path.append(module_path)\n",
"\n", "\n",
"from data_pipeline.config import settings\n", "from data_pipeline.config import settings\n",
"from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries, get_tract_geojson\n" "from data_pipeline.etl.sources.geo_utils import (\n",
" add_tracts_for_geometries,\n",
" get_tract_geojson,\n",
")"
] ]
}, },
{ {
@ -655,9 +658,9 @@
} }
], ],
"source": [ "source": [
"adjacent_tracts.groupby(\"ORIGINAL_TRACT\")[[\"included\"]].mean().reset_index().rename(\n", "adjacent_tracts.groupby(\"ORIGINAL_TRACT\")[\n",
" columns={\"ORIGINAL_TRACT\": \"GEOID10_TRACT\"}\n", " [\"included\"]\n",
")" "].mean().reset_index().rename(columns={\"ORIGINAL_TRACT\": \"GEOID10_TRACT\"})"
] ]
}, },
{ {

View file

@ -65,7 +65,8 @@
"tmp_path.mkdir(parents=True, exist_ok=True)\n", "tmp_path.mkdir(parents=True, exist_ok=True)\n",
"\n", "\n",
"eamlis_path_in_s3 = (\n", "eamlis_path_in_s3 = (\n",
" settings.AWS_JUSTICE40_DATASOURCES_URL + \"/eAMLIS export of all data.tsv.zip\"\n", " settings.AWS_JUSTICE40_DATASOURCES_URL\n",
" + \"/eAMLIS export of all data.tsv.zip\"\n",
")\n", ")\n",
"\n", "\n",
"unzip_file_from_url(\n", "unzip_file_from_url(\n",

View file

@ -460,7 +460,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"object_ids_to_keep = set(\n", "object_ids_to_keep = set(\n",
" merged_exaple_data[merged_exaple_data[\"_merge\"] == \"both\"].OBJECTID.astype(\"int\")\n", " merged_exaple_data[merged_exaple_data[\"_merge\"] == \"both\"].OBJECTID.astype(\n",
" \"int\"\n",
" )\n",
")\n", ")\n",
"features = []\n", "features = []\n",
"for feature in raw_fuds_geojson[\"features\"]:\n", "for feature in raw_fuds_geojson[\"features\"]:\n",
@ -476,7 +478,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def make_fake_feature(\n", "def make_fake_feature(\n",
" state: str, has_projects: bool, is_eligible: bool, latitude: float, longitude: float\n", " state: str,\n",
" has_projects: bool,\n",
" is_eligible: bool,\n",
" latitude: float,\n",
" longitude: float,\n",
"):\n", "):\n",
" \"\"\"For tracts where we don't have a FUDS, fake one.\"\"\"\n", " \"\"\"For tracts where we don't have a FUDS, fake one.\"\"\"\n",
" make_fake_feature._object_id += 1\n", " make_fake_feature._object_id += 1\n",
@ -537,7 +543,9 @@
"# Create FUDS in CA for each tract that doesn't have a FUDS\n", "# Create FUDS in CA for each tract that doesn't have a FUDS\n",
"for tract_id, point in points.items():\n", "for tract_id, point in points.items():\n",
" for bools in [(True, True), (True, False), (False, False)]:\n", " for bools in [(True, True), (True, False), (False, False)]:\n",
" features.append(make_fake_feature(\"CA\", bools[0], bools[1], point.y, point.x))" " features.append(\n",
" make_fake_feature(\"CA\", bools[0], bools[1], point.y, point.x)\n",
" )"
] ]
}, },
{ {
@ -596,9 +604,9 @@
} }
], ],
"source": [ "source": [
"test_frame_with_tracts_full = test_frame_with_tracts = add_tracts_for_geometries(\n", "test_frame_with_tracts_full = (\n",
" test_frame\n", " test_frame_with_tracts\n",
")" ") = add_tracts_for_geometries(test_frame)"
] ]
}, },
{ {
@ -680,7 +688,9 @@
} }
], ],
"source": [ "source": [
"tracts = test_frame_with_tracts_full[[\"GEOID10_TRACT\", \"geometry\"]].drop_duplicates()\n", "tracts = test_frame_with_tracts_full[\n",
" [\"GEOID10_TRACT\", \"geometry\"]\n",
"].drop_duplicates()\n",
"tracts[\"lat_long\"] = test_frame_with_tracts_full.geometry.apply(\n", "tracts[\"lat_long\"] = test_frame_with_tracts_full.geometry.apply(\n",
" lambda point: (point.x, point.y)\n", " lambda point: (point.x, point.y)\n",
")\n", ")\n",

View file

@ -13,7 +13,7 @@
"import geopandas as gpd\n", "import geopandas as gpd\n",
"\n", "\n",
"# Read in the above json file\n", "# Read in the above json file\n",
"nation=gpd.read_file(\"/Users/vims/Downloads/usa-high-1822-637b.json\")" "nation = gpd.read_file(\"/Users/vims/Downloads/usa-high-1822-637b.json\")"
] ]
}, },
{ {
@ -45,7 +45,7 @@
} }
], ],
"source": [ "source": [
"nation['FUDS_RAW']" "nation[\"FUDS_RAW\"]"
] ]
}, },
{ {
@ -248,7 +248,18 @@
} }
], ],
"source": [ "source": [
"nation_new_ind = nation[['GEOID10', 'SF', 'CF', 'HRS_ET', 'AML_ET', 'AML_RAW','FUDS_ET', 'FUDS_RAW']]\n", "nation_new_ind = nation[\n",
" [\n",
" \"GEOID10\",\n",
" \"SF\",\n",
" \"CF\",\n",
" \"HRS_ET\",\n",
" \"AML_ET\",\n",
" \"AML_RAW\",\n",
" \"FUDS_ET\",\n",
" \"FUDS_RAW\",\n",
" ]\n",
"]\n",
"nation_new_ind" "nation_new_ind"
] ]
}, },
@ -270,7 +281,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['HRS_ET'].unique()" "nation_new_ind[\"HRS_ET\"].unique()"
] ]
}, },
{ {
@ -293,7 +304,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['HRS_ET'].value_counts()" "nation_new_ind[\"HRS_ET\"].value_counts()"
] ]
}, },
{ {
@ -314,7 +325,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['AML_ET'].unique()" "nation_new_ind[\"AML_ET\"].unique()"
] ]
}, },
{ {
@ -337,7 +348,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['AML_ET'].value_counts()" "nation_new_ind[\"AML_ET\"].value_counts()"
] ]
}, },
{ {
@ -358,7 +369,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['AML_RAW'].unique()" "nation_new_ind[\"AML_RAW\"].unique()"
] ]
}, },
{ {
@ -380,7 +391,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['AML_RAW'].value_counts()" "nation_new_ind[\"AML_RAW\"].value_counts()"
] ]
}, },
{ {
@ -401,7 +412,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['FUDS_ET'].unique()" "nation_new_ind[\"FUDS_ET\"].unique()"
] ]
}, },
{ {
@ -424,7 +435,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['FUDS_ET'].value_counts()" "nation_new_ind[\"FUDS_ET\"].value_counts()"
] ]
}, },
{ {
@ -445,7 +456,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['FUDS_RAW'].unique()" "nation_new_ind[\"FUDS_RAW\"].unique()"
] ]
}, },
{ {
@ -468,7 +479,7 @@
} }
], ],
"source": [ "source": [
"nation_new_ind['FUDS_RAW'].value_counts()" "nation_new_ind[\"FUDS_RAW\"].value_counts()"
] ]
} }
], ],

View file

@ -36,8 +36,8 @@
" engine=\"pyogrio\",\n", " engine=\"pyogrio\",\n",
")\n", ")\n",
"end = time.time()\n", "end = time.time()\n",
" \n", "\n",
"print(\"Time taken to execute the function using pyogrio is\", end-begin)" "print(\"Time taken to execute the function using pyogrio is\", end - begin)"
] ]
}, },
{ {
@ -59,11 +59,13 @@
"census_tract_gdf = gpd.read_file(\n", "census_tract_gdf = gpd.read_file(\n",
" CensusETL.NATIONAL_TRACT_JSON_PATH,\n", " CensusETL.NATIONAL_TRACT_JSON_PATH,\n",
" engine=\"fiona\",\n", " engine=\"fiona\",\n",
" include_fields=[\"GEOID10\"]\n", " include_fields=[\"GEOID10\"],\n",
")\n", ")\n",
"end2 = time.time()\n", "end2 = time.time()\n",
" \n", "\n",
"print(\"Time taken to execute the function using include fields is\", end2-begin2)" "print(\n",
" \"Time taken to execute the function using include fields is\", end2 - begin2\n",
")"
] ]
}, },
{ {

View file

@ -1369,7 +1369,9 @@
"\n", "\n",
"results = results.reset_index()\n", "results = results.reset_index()\n",
"\n", "\n",
"results.to_csv(\"~/Downloads/tribal_area_as_a_share_of_tract_area.csv\", index=False)\n", "results.to_csv(\n",
" \"~/Downloads/tribal_area_as_a_share_of_tract_area.csv\", index=False\n",
")\n",
"\n", "\n",
"# Printing results\n", "# Printing results\n",
"print(results)" "print(results)"

View file

@ -11,7 +11,6 @@ from data_pipeline.etl.score.constants import TILES_ISLAND_AREA_FIPS_CODES
from data_pipeline.score import field_names from data_pipeline.score import field_names
from data_pipeline.score.field_names import GEOID_TRACT_FIELD from data_pipeline.score.field_names import GEOID_TRACT_FIELD
pytestmark = pytest.mark.smoketest pytestmark = pytest.mark.smoketest
UNMATCHED_TRACT_THRESHOLD = 1000 UNMATCHED_TRACT_THRESHOLD = 1000

View file

@ -1,9 +1,7 @@
# pylint: disable=protected-access # pylint: disable=protected-access
import pathlib import pathlib
from data_pipeline.etl.sources.doe_energy_burden.etl import ( from data_pipeline.etl.sources.doe_energy_burden.etl import DOEEnergyBurden
DOEEnergyBurden,
)
from data_pipeline.tests.sources.example.test_etl import TestETL from data_pipeline.tests.sources.example.test_etl import TestETL
from data_pipeline.utils import get_module_logger from data_pipeline.utils import get_module_logger

View file

@ -3,9 +3,7 @@ import pathlib
from unittest import mock from unittest import mock
from data_pipeline.etl.base import ValidGeoLevel from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.etl.sources.eamlis.etl import ( from data_pipeline.etl.sources.eamlis.etl import AbandonedMineETL
AbandonedMineETL,
)
from data_pipeline.tests.sources.example.test_etl import TestETL from data_pipeline.tests.sources.example.test_etl import TestETL
from data_pipeline.utils import get_module_logger from data_pipeline.utils import get_module_logger

View file

@ -3,9 +3,7 @@ import pathlib
from unittest import mock from unittest import mock
from data_pipeline.etl.base import ValidGeoLevel from data_pipeline.etl.base import ValidGeoLevel
from data_pipeline.etl.sources.us_army_fuds.etl import ( from data_pipeline.etl.sources.us_army_fuds.etl import USArmyFUDS
USArmyFUDS,
)
from data_pipeline.tests.sources.example.test_etl import TestETL from data_pipeline.tests.sources.example.test_etl import TestETL
from data_pipeline.utils import get_module_logger from data_pipeline.utils import get_module_logger

View file

@ -20,7 +20,6 @@ from data_pipeline.content.schemas.download_schemas import ExcelConfig
from marshmallow import ValidationError from marshmallow import ValidationError
from marshmallow_dataclass import class_schema from marshmallow_dataclass import class_schema
## zlib is not available on all systems ## zlib is not available on all systems
try: try:
import zlib # noqa # pylint: disable=unused-import import zlib # noqa # pylint: disable=unused-import

View file

@ -93,6 +93,8 @@ disable = [
"R0801", # Disables duplicate code. There are a couple places we have similar code and "R0801", # Disables duplicate code. There are a couple places we have similar code and
# unfortunately you can't disable this rule for individual lines or files, it's a # unfortunately you can't disable this rule for individual lines or files, it's a
# known bug. https://github.com/PyCQA/pylint/issues/214# # known bug. https://github.com/PyCQA/pylint/issues/214#
"C0411", # Disables wrong-import-order. Import order is now enforced by isort as a
# pre-commit hook.
] ]
[tool.pylint.FORMAT] [tool.pylint.FORMAT]