From 5ff988ab292506608ce6c8f4110b74b8ff3b3c74 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Wed, 28 Sep 2022 13:35:52 -0400 Subject: [PATCH] updating pylint --- data/data-pipeline/.pre-commit-config.yaml | 13 +++-- .../src/tract_comparison__template.ipynb | 55 +++++++++++-------- data/data-pipeline/data_pipeline/config.py | 5 +- .../content/config/scratch.ipynb | 4 +- .../data_pipeline/etl/score/etl_score.py | 4 +- .../data_pipeline/etl/score/etl_score_geo.py | 4 +- .../data_pipeline/etl/score/etl_score_post.py | 4 +- .../data_pipeline/etl/score/etl_utils.py | 2 +- .../etl/score/tests/test_score_post.py | 4 +- .../etl/sources/tribal/etl_utils.py | 1 - .../ipython/agricultural_loss_indicator.ipynb | 24 +++++--- .../ipython/check_tribal_count.ipynb | 13 ++++- .../compare_tiles_and_geoJson_files.ipynb | 15 +++-- .../ipython/explore_adjacency.ipynb | 11 ++-- .../ipython/explore_eamlis.ipynb | 3 +- .../ipython/generate_fuds_test_data.ipynb | 24 +++++--- .../ipython/geojson_compare_tiles.ipynb | 37 ++++++++----- .../ipython/geopandas_speed_test.ipynb | 12 ++-- .../ipython/tribal_and_tracts_overlap.ipynb | 4 +- .../data_pipeline/tests/score/test_output.py | 1 - .../sources/doe_energy_burden/test_etl.py | 4 +- .../tests/sources/eamlis/test_etl.py | 4 +- .../tests/sources/us_army_fuds/test_etl.py | 4 +- data/data-pipeline/data_pipeline/utils.py | 1 - data/data-pipeline/pyproject.toml | 2 + 25 files changed, 154 insertions(+), 101 deletions(-) diff --git a/data/data-pipeline/.pre-commit-config.yaml b/data/data-pipeline/.pre-commit-config.yaml index 46cefd3b..afc79e54 100644 --- a/data/data-pipeline/.pre-commit-config.yaml +++ b/data/data-pipeline/.pre-commit-config.yaml @@ -18,14 +18,17 @@ repos: "--ignore-init-module-imports", ] -- repo: https://github.com/asottile/reorder_python_imports - rev: v3.8.3 +- repo: https://github.com/pycqa/isort + rev: 5.10.1 hooks: - - id: reorder-python-imports - language_version: python3.9 + - id: isort + name: isort (python) args: [ - "--application-directories=.", + "--force-single-line-imports", + "--profile=black", + "--line-length=80", + "--src-path=.:data/data-pipeline" ] - repo: https://github.com/ambv/black diff --git a/data/data-pipeline/data_pipeline/comparison_tool/src/tract_comparison__template.ipynb b/data/data-pipeline/data_pipeline/comparison_tool/src/tract_comparison__template.ipynb index 10fc8616..915b84fa 100644 --- a/data/data-pipeline/data_pipeline/comparison_tool/src/tract_comparison__template.ipynb +++ b/data/data-pipeline/data_pipeline/comparison_tool/src/tract_comparison__template.ipynb @@ -16,7 +16,7 @@ "import matplotlib.pyplot as plt\n", "\n", "from data_pipeline.score import field_names\n", - "from data_pipeline.comparison_tool.src import utils \n", + "from data_pipeline.comparison_tool.src import utils\n", "\n", "pd.options.display.float_format = \"{:,.3f}\".format\n", "%load_ext lab_black" @@ -128,9 +128,7 @@ "metadata": {}, "outputs": [], "source": [ - "utils.validate_new_data(\n", - " file_path=COMPARATOR_FILE, score_col=COMPARATOR_COLUMN\n", - ")" + "utils.validate_new_data(file_path=COMPARATOR_FILE, score_col=COMPARATOR_COLUMN)" ] }, { @@ -148,20 +146,25 @@ "metadata": {}, "outputs": [], "source": [ - "comparator_cols = [COMPARATOR_COLUMN] + OTHER_COMPARATOR_COLUMNS if OTHER_COMPARATOR_COLUMNS else [COMPARATOR_COLUMN]\n", + "comparator_cols = (\n", + " [COMPARATOR_COLUMN] + OTHER_COMPARATOR_COLUMNS\n", + " if OTHER_COMPARATOR_COLUMNS\n", + " else [COMPARATOR_COLUMN]\n", + ")\n", "\n", - "#papermill_description=Loading_data\n", + "# papermill_description=Loading_data\n", "joined_df = pd.concat(\n", " [\n", " utils.read_file(\n", " file_path=SCORE_FILE,\n", - " columns=[TOTAL_POPULATION_COLUMN, SCORE_COLUMN] + ADDITIONAL_DEMO_COLUMNS,\n", + " columns=[TOTAL_POPULATION_COLUMN, SCORE_COLUMN]\n", + " + ADDITIONAL_DEMO_COLUMNS,\n", " geoid=GEOID_COLUMN,\n", " ),\n", " utils.read_file(\n", " file_path=COMPARATOR_FILE,\n", " columns=comparator_cols,\n", - " geoid=GEOID_COLUMN\n", + " geoid=GEOID_COLUMN,\n", " ),\n", " utils.read_file(\n", " file_path=DEMOGRAPHIC_FILE,\n", @@ -196,13 +199,13 @@ "metadata": {}, "outputs": [], "source": [ - "#papermill_description=Summary_stats\n", + "# papermill_description=Summary_stats\n", "population_df = utils.produce_summary_stats(\n", " joined_df=joined_df,\n", " comparator_column=COMPARATOR_COLUMN,\n", " score_column=SCORE_COLUMN,\n", " population_column=TOTAL_POPULATION_COLUMN,\n", - " geoid_column=GEOID_COLUMN\n", + " geoid_column=GEOID_COLUMN,\n", ")\n", "population_df" ] @@ -224,18 +227,18 @@ "metadata": {}, "outputs": [], "source": [ - "#papermill_description=Tract_stats\n", + "# papermill_description=Tract_stats\n", "tract_level_by_identification_df = pd.concat(\n", " [\n", " utils.get_demo_series(\n", " grouping_column=COMPARATOR_COLUMN,\n", " joined_df=joined_df,\n", - " demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS\n", + " demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n", " ),\n", " utils.get_demo_series(\n", " grouping_column=SCORE_COLUMN,\n", " joined_df=joined_df,\n", - " demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS\n", + " demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n", " ),\n", " ],\n", " axis=1,\n", @@ -256,17 +259,25 @@ " y=\"Variable\",\n", " x=\"Avg in tracts\",\n", " hue=\"Definition\",\n", - " data=tract_level_by_identification_df.sort_values(by=COMPARATOR_COLUMN, ascending=False)\n", + " data=tract_level_by_identification_df.sort_values(\n", + " by=COMPARATOR_COLUMN, ascending=False\n", + " )\n", " .stack()\n", " .reset_index()\n", " .rename(\n", - " columns={\"level_0\": \"Variable\", \"level_1\": \"Definition\", 0: \"Avg in tracts\"}\n", + " columns={\n", + " \"level_0\": \"Variable\",\n", + " \"level_1\": \"Definition\",\n", + " 0: \"Avg in tracts\",\n", + " }\n", " ),\n", " palette=\"Blues\",\n", ")\n", "plt.xlim(0, 1)\n", "plt.title(\"Tract level averages by identification strategy\")\n", - "plt.savefig(os.path.join(OUTPUT_DATA_PATH, \"tract_lvl_avg.jpg\"), bbox_inches='tight')" + "plt.savefig(\n", + " os.path.join(OUTPUT_DATA_PATH, \"tract_lvl_avg.jpg\"), bbox_inches=\"tight\"\n", + ")" ] }, { @@ -276,13 +287,13 @@ "metadata": {}, "outputs": [], "source": [ - "#papermill_description=Tract_stats_grouped\n", + "# papermill_description=Tract_stats_grouped\n", "tract_level_by_grouping_df = utils.get_tract_level_grouping(\n", " joined_df=joined_df,\n", " score_column=SCORE_COLUMN,\n", " comparator_column=COMPARATOR_COLUMN,\n", " demo_columns=ADDITIONAL_DEMO_COLUMNS + DEMOGRAPHIC_COLUMNS,\n", - " keep_missing_values=KEEP_MISSING_VALUES_FOR_SEGMENTATION\n", + " keep_missing_values=KEEP_MISSING_VALUES_FOR_SEGMENTATION,\n", ")\n", "\n", "tract_level_by_grouping_formatted_df = utils.format_multi_index_for_excel(\n", @@ -315,7 +326,7 @@ "metadata": {}, "outputs": [], "source": [ - "#papermill_description=Population_stats\n", + "# papermill_description=Population_stats\n", "population_weighted_stats_df = pd.concat(\n", " [\n", " utils.construct_weighted_statistics(\n", @@ -363,7 +374,7 @@ "comparator_and_cejst_proportion_series, states = utils.get_final_summary_info(\n", " population=population_df,\n", " comparator_file=COMPARATOR_FILE,\n", - " geoid_col=GEOID_COLUMN\n", + " geoid_col=GEOID_COLUMN,\n", ")" ] }, @@ -393,7 +404,7 @@ "metadata": {}, "outputs": [], "source": [ - "#papermill_description=Writing_excel\n", + "# papermill_description=Writing_excel\n", "utils.write_single_comparison_excel(\n", " output_excel=OUTPUT_EXCEL,\n", " population_df=population_df,\n", @@ -401,7 +412,7 @@ " population_weighted_stats_df=population_weighted_stats_df,\n", " tract_level_by_grouping_formatted_df=tract_level_by_grouping_formatted_df,\n", " comparator_and_cejst_proportion_series=comparator_and_cejst_proportion_series,\n", - " states_text=states_text\n", + " states_text=states_text,\n", ")" ] } diff --git a/data/data-pipeline/data_pipeline/config.py b/data/data-pipeline/data_pipeline/config.py index 89a0f1fd..ee75e535 100644 --- a/data/data-pipeline/data_pipeline/config.py +++ b/data/data-pipeline/data_pipeline/config.py @@ -1,7 +1,8 @@ -from dynaconf import Dynaconf -import data_pipeline import pathlib +import data_pipeline +from dynaconf import Dynaconf + settings = Dynaconf( envvar_prefix="DYNACONF", settings_files=["settings.toml", ".secrets.toml"], diff --git a/data/data-pipeline/data_pipeline/content/config/scratch.ipynb b/data/data-pipeline/data_pipeline/content/config/scratch.ipynb index e2535b7e..2ad828b2 100644 --- a/data/data-pipeline/data_pipeline/content/config/scratch.ipynb +++ b/data/data-pipeline/data_pipeline/content/config/scratch.ipynb @@ -427,7 +427,9 @@ } ], "source": [ - "for col in [col for col in download_codebook.index.to_list() if \"(percentile)\" in col]:\n", + "for col in [\n", + " col for col in download_codebook.index.to_list() if \"(percentile)\" in col\n", + "]:\n", " print(f\" - column_name: {col}\")\n", " if \"Low\" not in col:\n", " print(\n", diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index faf533c5..1f035276 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -11,9 +11,7 @@ from data_pipeline.etl.sources.dot_travel_composite.etl import ( TravelCompositeETL, ) from data_pipeline.etl.sources.eamlis.etl import AbandonedMineETL -from data_pipeline.etl.sources.fsf_flood_risk.etl import ( - FloodRiskETL, -) +from data_pipeline.etl.sources.fsf_flood_risk.etl import FloodRiskETL from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL from data_pipeline.etl.sources.national_risk_index.etl import ( NationalRiskIndexETL, diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py index 89a783a3..3f28b7ee 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_geo.py @@ -9,9 +9,7 @@ from data_pipeline.content.schemas.download_schemas import CSVConfig from data_pipeline.etl.base import ExtractTransformLoad from data_pipeline.etl.score import constants from data_pipeline.etl.score.etl_utils import check_score_data_source -from data_pipeline.etl.sources.census.etl_utils import ( - check_census_data_source, -) +from data_pipeline.etl.sources.census.etl_utils import check_census_data_source from data_pipeline.score import field_names from data_pipeline.utils import get_module_logger from data_pipeline.utils import load_dict_from_yaml_object_fields diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py index 5013ef21..74e64f62 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py @@ -9,9 +9,7 @@ from data_pipeline.content.schemas.download_schemas import ExcelConfig from data_pipeline.etl.base import ExtractTransformLoad from data_pipeline.etl.score.etl_utils import create_codebook from data_pipeline.etl.score.etl_utils import floor_series -from data_pipeline.etl.sources.census.etl_utils import ( - check_census_data_source, -) +from data_pipeline.etl.sources.census.etl_utils import check_census_data_source from data_pipeline.score import field_names from data_pipeline.utils import column_list_from_yaml_object_fields from data_pipeline.utils import get_module_logger diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_utils.py b/data/data-pipeline/data_pipeline/etl/score/etl_utils.py index 1817a4f1..66ee7131 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_utils.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_utils.py @@ -96,7 +96,7 @@ def floor_series(series: pd.Series, number_of_decimals: int) -> pd.Series: if series.isin(unacceptable_values).any(): series.replace(mapping, regex=False, inplace=True) - multiplication_factor = 10 ** number_of_decimals + multiplication_factor = 10**number_of_decimals # In order to safely cast NaNs # First coerce series to float type: series.astype(float) diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py index 01d699f6..408a7072 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py @@ -5,9 +5,7 @@ from pathlib import Path import pandas.api.types as ptypes import pandas.testing as pdt -from data_pipeline.content.schemas.download_schemas import ( - CSVConfig, -) +from data_pipeline.content.schemas.download_schemas import CSVConfig from data_pipeline.etl.score import constants from data_pipeline.utils import load_yaml_dict_from_file diff --git a/data/data-pipeline/data_pipeline/etl/sources/tribal/etl_utils.py b/data/data-pipeline/data_pipeline/etl/sources/tribal/etl_utils.py index 8efe14d8..ea97db1c 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/tribal/etl_utils.py +++ b/data/data-pipeline/data_pipeline/etl/sources/tribal/etl_utils.py @@ -4,7 +4,6 @@ from data_pipeline.utils import get_module_logger from data_pipeline.utils import remove_all_from_dir from data_pipeline.utils import remove_files_from_dir - logger = get_module_logger(__name__) diff --git a/data/data-pipeline/data_pipeline/ipython/agricultural_loss_indicator.ipynb b/data/data-pipeline/data_pipeline/ipython/agricultural_loss_indicator.ipynb index b7a0efb1..0463d27a 100644 --- a/data/data-pipeline/data_pipeline/ipython/agricultural_loss_indicator.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/agricultural_loss_indicator.ipynb @@ -211,7 +211,9 @@ } ], "source": [ - "tmp = sns.FacetGrid(data=score_m, col=\"Urban Heuristic Flag\", col_wrap=2, height=7)\n", + "tmp = sns.FacetGrid(\n", + " data=score_m, col=\"Urban Heuristic Flag\", col_wrap=2, height=7\n", + ")\n", "tmp.map(\n", " sns.distplot,\n", " \"Expected agricultural loss rate (Natural Hazards Risk Index) (percentile)\",\n", @@ -250,7 +252,9 @@ ")\n", "\n", "nri_with_flag[\"total_ag_loss\"] = nri_with_flag.filter(like=\"EALA\").sum(axis=1)\n", - "nri_with_flag[\"total_ag_loss_pctile\"] = nri_with_flag[\"total_ag_loss\"].rank(pct=True)\n", + "nri_with_flag[\"total_ag_loss_pctile\"] = nri_with_flag[\"total_ag_loss\"].rank(\n", + " pct=True\n", + ")\n", "\n", "nri_with_flag.groupby(\"Urban Heuristic Flag\")[\"total_ag_loss_pctile\"].mean()" ] @@ -779,9 +783,9 @@ " \"Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?\"\n", "].astype(int)\n", "\n", - "score_m_adjusted_tracts = set(score_m[score_m[\"adjusted\"] > 0][\"GEOID10_TRACT\"]).union(\n", - " all_ag_loss_tracts\n", - ")\n", + "score_m_adjusted_tracts = set(\n", + " score_m[score_m[\"adjusted\"] > 0][\"GEOID10_TRACT\"]\n", + ").union(all_ag_loss_tracts)\n", "display(len(set(all_scorem_tracts).difference(score_m_adjusted_tracts)))" ] }, @@ -832,7 +836,11 @@ " left_clip = nri_with_flag[nri_with_flag[\"Urban Heuristic Flag\"] == 0][\n", " \"AGRIVALUE\"\n", " ].quantile(threshold)\n", - " print(\"At threshold {:.2f}, minimum value is ${:,.0f}\".format(threshold, left_clip))\n", + " print(\n", + " \"At threshold {:.2f}, minimum value is ${:,.0f}\".format(\n", + " threshold, left_clip\n", + " )\n", + " )\n", " tmp_value = nri_with_flag[\"AGRIVALUE\"].clip(lower=left_clip)\n", " nri_with_flag[\"total_ag_loss_pctile_{:.2f}\".format(threshold)] = (\n", " nri_with_flag[\"total_ag_loss\"] / tmp_value\n", @@ -889,7 +897,9 @@ " .set_index(\"Left clip value\")[[\"Rural\", \"Urban\"]]\n", " .stack()\n", " .reset_index()\n", - " .rename(columns={\"level_1\": \"Tract classification\", 0: \"Average percentile\"})\n", + " .rename(\n", + " columns={\"level_1\": \"Tract classification\", 0: \"Average percentile\"}\n", + " )\n", ")" ] }, diff --git a/data/data-pipeline/data_pipeline/ipython/check_tribal_count.ipynb b/data/data-pipeline/data_pipeline/ipython/check_tribal_count.ipynb index 3451f325..7ea4814b 100644 --- a/data/data-pipeline/data_pipeline/ipython/check_tribal_count.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/check_tribal_count.ipynb @@ -21,6 +21,7 @@ "source": [ "import os\n", "import sys\n", + "\n", "module_path = os.path.abspath(os.path.join(\"../..\"))\n", "if module_path not in sys.path:\n", " sys.path.append(module_path)" @@ -94,9 +95,13 @@ "bia_aian_supplemental_geojson = (\n", " GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_AIAN_Supplemental.json\"\n", ")\n", - "bia_tsa_geojson_geojson = GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_TSA.json\"\n", + "bia_tsa_geojson_geojson = (\n", + " GEOJSON_BASE_PATH / \"bia_national_lar\" / \"BIA_TSA.json\"\n", + ")\n", "alaska_native_villages_geojson = (\n", - " GEOJSON_BASE_PATH / \"alaska_native_villages\" / \"AlaskaNativeVillages.gdb.geojson\"\n", + " GEOJSON_BASE_PATH\n", + " / \"alaska_native_villages\"\n", + " / \"AlaskaNativeVillages.gdb.geojson\"\n", ")" ] }, @@ -131,7 +136,9 @@ "len(\n", " sorted(\n", " list(\n", - " bia_national_lar_df.LARName.str.replace(r\"\\(.*\\) \", \"\", regex=True).unique()\n", + " bia_national_lar_df.LARName.str.replace(\n", + " r\"\\(.*\\) \", \"\", regex=True\n", + " ).unique()\n", " )\n", " )\n", ")" diff --git a/data/data-pipeline/data_pipeline/ipython/compare_tiles_and_geoJson_files.ipynb b/data/data-pipeline/data_pipeline/ipython/compare_tiles_and_geoJson_files.ipynb index f3585578..0eabedc0 100644 --- a/data/data-pipeline/data_pipeline/ipython/compare_tiles_and_geoJson_files.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/compare_tiles_and_geoJson_files.ipynb @@ -45,6 +45,7 @@ "source": [ "# Read in the score geojson file\n", "from data_pipeline.etl.score.constants import DATA_SCORE_CSV_TILES_FILE_PATH\n", + "\n", "nation = gpd.read_file(DATA_SCORE_CSV_TILES_FILE_PATH)" ] }, @@ -93,10 +94,14 @@ " random_tile_features = json.loads(f.read())\n", "\n", "# Flatten data around the features key:\n", - "flatten_features = pd.json_normalize(random_tile_features, record_path=[\"features\"])\n", + "flatten_features = pd.json_normalize(\n", + " random_tile_features, record_path=[\"features\"]\n", + ")\n", "\n", "# index into the feature properties, get keys and turn into a sorted list\n", - "random_tile = sorted(list(flatten_features[\"features\"][0][0][\"properties\"].keys()))" + "random_tile = sorted(\n", + " list(flatten_features[\"features\"][0][0][\"properties\"].keys())\n", + ")" ] }, { @@ -291,8 +296,8 @@ } ], "source": [ - "nation_HRS_GEO = nation[['GEOID10', 'SF', 'CF', 'HRS_ET', 'AML_ET', 'FUDS_ET']]\n", - "nation_HRS_GEO.loc[nation_HRS_GEO['FUDS_ET'] == '0']" + "nation_HRS_GEO = nation[[\"GEOID10\", \"SF\", \"CF\", \"HRS_ET\", \"AML_ET\", \"FUDS_ET\"]]\n", + "nation_HRS_GEO.loc[nation_HRS_GEO[\"FUDS_ET\"] == \"0\"]" ] }, { @@ -321,7 +326,7 @@ } ], "source": [ - "nation['HRS_ET'].unique()" + "nation[\"HRS_ET\"].unique()" ] } ], diff --git a/data/data-pipeline/data_pipeline/ipython/explore_adjacency.ipynb b/data/data-pipeline/data_pipeline/ipython/explore_adjacency.ipynb index e0e5191c..af099cbc 100644 --- a/data/data-pipeline/data_pipeline/ipython/explore_adjacency.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/explore_adjacency.ipynb @@ -18,7 +18,10 @@ " sys.path.append(module_path)\n", "\n", "from data_pipeline.config import settings\n", - "from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries, get_tract_geojson\n" + "from data_pipeline.etl.sources.geo_utils import (\n", + " add_tracts_for_geometries,\n", + " get_tract_geojson,\n", + ")" ] }, { @@ -655,9 +658,9 @@ } ], "source": [ - "adjacent_tracts.groupby(\"ORIGINAL_TRACT\")[[\"included\"]].mean().reset_index().rename(\n", - " columns={\"ORIGINAL_TRACT\": \"GEOID10_TRACT\"}\n", - ")" + "adjacent_tracts.groupby(\"ORIGINAL_TRACT\")[\n", + " [\"included\"]\n", + "].mean().reset_index().rename(columns={\"ORIGINAL_TRACT\": \"GEOID10_TRACT\"})" ] }, { diff --git a/data/data-pipeline/data_pipeline/ipython/explore_eamlis.ipynb b/data/data-pipeline/data_pipeline/ipython/explore_eamlis.ipynb index 87107be2..71612a7d 100644 --- a/data/data-pipeline/data_pipeline/ipython/explore_eamlis.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/explore_eamlis.ipynb @@ -65,7 +65,8 @@ "tmp_path.mkdir(parents=True, exist_ok=True)\n", "\n", "eamlis_path_in_s3 = (\n", - " settings.AWS_JUSTICE40_DATASOURCES_URL + \"/eAMLIS export of all data.tsv.zip\"\n", + " settings.AWS_JUSTICE40_DATASOURCES_URL\n", + " + \"/eAMLIS export of all data.tsv.zip\"\n", ")\n", "\n", "unzip_file_from_url(\n", diff --git a/data/data-pipeline/data_pipeline/ipython/generate_fuds_test_data.ipynb b/data/data-pipeline/data_pipeline/ipython/generate_fuds_test_data.ipynb index f14fdd6f..4e1109c3 100644 --- a/data/data-pipeline/data_pipeline/ipython/generate_fuds_test_data.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/generate_fuds_test_data.ipynb @@ -460,7 +460,9 @@ "outputs": [], "source": [ "object_ids_to_keep = set(\n", - " merged_exaple_data[merged_exaple_data[\"_merge\"] == \"both\"].OBJECTID.astype(\"int\")\n", + " merged_exaple_data[merged_exaple_data[\"_merge\"] == \"both\"].OBJECTID.astype(\n", + " \"int\"\n", + " )\n", ")\n", "features = []\n", "for feature in raw_fuds_geojson[\"features\"]:\n", @@ -476,7 +478,11 @@ "outputs": [], "source": [ "def make_fake_feature(\n", - " state: str, has_projects: bool, is_eligible: bool, latitude: float, longitude: float\n", + " state: str,\n", + " has_projects: bool,\n", + " is_eligible: bool,\n", + " latitude: float,\n", + " longitude: float,\n", "):\n", " \"\"\"For tracts where we don't have a FUDS, fake one.\"\"\"\n", " make_fake_feature._object_id += 1\n", @@ -537,7 +543,9 @@ "# Create FUDS in CA for each tract that doesn't have a FUDS\n", "for tract_id, point in points.items():\n", " for bools in [(True, True), (True, False), (False, False)]:\n", - " features.append(make_fake_feature(\"CA\", bools[0], bools[1], point.y, point.x))" + " features.append(\n", + " make_fake_feature(\"CA\", bools[0], bools[1], point.y, point.x)\n", + " )" ] }, { @@ -596,9 +604,9 @@ } ], "source": [ - "test_frame_with_tracts_full = test_frame_with_tracts = add_tracts_for_geometries(\n", - " test_frame\n", - ")" + "test_frame_with_tracts_full = (\n", + " test_frame_with_tracts\n", + ") = add_tracts_for_geometries(test_frame)" ] }, { @@ -680,7 +688,9 @@ } ], "source": [ - "tracts = test_frame_with_tracts_full[[\"GEOID10_TRACT\", \"geometry\"]].drop_duplicates()\n", + "tracts = test_frame_with_tracts_full[\n", + " [\"GEOID10_TRACT\", \"geometry\"]\n", + "].drop_duplicates()\n", "tracts[\"lat_long\"] = test_frame_with_tracts_full.geometry.apply(\n", " lambda point: (point.x, point.y)\n", ")\n", diff --git a/data/data-pipeline/data_pipeline/ipython/geojson_compare_tiles.ipynb b/data/data-pipeline/data_pipeline/ipython/geojson_compare_tiles.ipynb index f134f9a6..79fa3336 100644 --- a/data/data-pipeline/data_pipeline/ipython/geojson_compare_tiles.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/geojson_compare_tiles.ipynb @@ -13,7 +13,7 @@ "import geopandas as gpd\n", "\n", "# Read in the above json file\n", - "nation=gpd.read_file(\"/Users/vims/Downloads/usa-high-1822-637b.json\")" + "nation = gpd.read_file(\"/Users/vims/Downloads/usa-high-1822-637b.json\")" ] }, { @@ -45,7 +45,7 @@ } ], "source": [ - "nation['FUDS_RAW']" + "nation[\"FUDS_RAW\"]" ] }, { @@ -248,7 +248,18 @@ } ], "source": [ - "nation_new_ind = nation[['GEOID10', 'SF', 'CF', 'HRS_ET', 'AML_ET', 'AML_RAW','FUDS_ET', 'FUDS_RAW']]\n", + "nation_new_ind = nation[\n", + " [\n", + " \"GEOID10\",\n", + " \"SF\",\n", + " \"CF\",\n", + " \"HRS_ET\",\n", + " \"AML_ET\",\n", + " \"AML_RAW\",\n", + " \"FUDS_ET\",\n", + " \"FUDS_RAW\",\n", + " ]\n", + "]\n", "nation_new_ind" ] }, @@ -270,7 +281,7 @@ } ], "source": [ - "nation_new_ind['HRS_ET'].unique()" + "nation_new_ind[\"HRS_ET\"].unique()" ] }, { @@ -293,7 +304,7 @@ } ], "source": [ - "nation_new_ind['HRS_ET'].value_counts()" + "nation_new_ind[\"HRS_ET\"].value_counts()" ] }, { @@ -314,7 +325,7 @@ } ], "source": [ - "nation_new_ind['AML_ET'].unique()" + "nation_new_ind[\"AML_ET\"].unique()" ] }, { @@ -337,7 +348,7 @@ } ], "source": [ - "nation_new_ind['AML_ET'].value_counts()" + "nation_new_ind[\"AML_ET\"].value_counts()" ] }, { @@ -358,7 +369,7 @@ } ], "source": [ - "nation_new_ind['AML_RAW'].unique()" + "nation_new_ind[\"AML_RAW\"].unique()" ] }, { @@ -380,7 +391,7 @@ } ], "source": [ - "nation_new_ind['AML_RAW'].value_counts()" + "nation_new_ind[\"AML_RAW\"].value_counts()" ] }, { @@ -401,7 +412,7 @@ } ], "source": [ - "nation_new_ind['FUDS_ET'].unique()" + "nation_new_ind[\"FUDS_ET\"].unique()" ] }, { @@ -424,7 +435,7 @@ } ], "source": [ - "nation_new_ind['FUDS_ET'].value_counts()" + "nation_new_ind[\"FUDS_ET\"].value_counts()" ] }, { @@ -445,7 +456,7 @@ } ], "source": [ - "nation_new_ind['FUDS_RAW'].unique()" + "nation_new_ind[\"FUDS_RAW\"].unique()" ] }, { @@ -468,7 +479,7 @@ } ], "source": [ - "nation_new_ind['FUDS_RAW'].value_counts()" + "nation_new_ind[\"FUDS_RAW\"].value_counts()" ] } ], diff --git a/data/data-pipeline/data_pipeline/ipython/geopandas_speed_test.ipynb b/data/data-pipeline/data_pipeline/ipython/geopandas_speed_test.ipynb index 568e0a49..01e121ef 100644 --- a/data/data-pipeline/data_pipeline/ipython/geopandas_speed_test.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/geopandas_speed_test.ipynb @@ -36,8 +36,8 @@ " engine=\"pyogrio\",\n", ")\n", "end = time.time()\n", - " \n", - "print(\"Time taken to execute the function using pyogrio is\", end-begin)" + "\n", + "print(\"Time taken to execute the function using pyogrio is\", end - begin)" ] }, { @@ -59,11 +59,13 @@ "census_tract_gdf = gpd.read_file(\n", " CensusETL.NATIONAL_TRACT_JSON_PATH,\n", " engine=\"fiona\",\n", - " include_fields=[\"GEOID10\"]\n", + " include_fields=[\"GEOID10\"],\n", ")\n", "end2 = time.time()\n", - " \n", - "print(\"Time taken to execute the function using include fields is\", end2-begin2)" + "\n", + "print(\n", + " \"Time taken to execute the function using include fields is\", end2 - begin2\n", + ")" ] }, { diff --git a/data/data-pipeline/data_pipeline/ipython/tribal_and_tracts_overlap.ipynb b/data/data-pipeline/data_pipeline/ipython/tribal_and_tracts_overlap.ipynb index effeed69..0560b545 100644 --- a/data/data-pipeline/data_pipeline/ipython/tribal_and_tracts_overlap.ipynb +++ b/data/data-pipeline/data_pipeline/ipython/tribal_and_tracts_overlap.ipynb @@ -1369,7 +1369,9 @@ "\n", "results = results.reset_index()\n", "\n", - "results.to_csv(\"~/Downloads/tribal_area_as_a_share_of_tract_area.csv\", index=False)\n", + "results.to_csv(\n", + " \"~/Downloads/tribal_area_as_a_share_of_tract_area.csv\", index=False\n", + ")\n", "\n", "# Printing results\n", "print(results)" diff --git a/data/data-pipeline/data_pipeline/tests/score/test_output.py b/data/data-pipeline/data_pipeline/tests/score/test_output.py index a443a4b1..3297e883 100644 --- a/data/data-pipeline/data_pipeline/tests/score/test_output.py +++ b/data/data-pipeline/data_pipeline/tests/score/test_output.py @@ -11,7 +11,6 @@ from data_pipeline.etl.score.constants import TILES_ISLAND_AREA_FIPS_CODES from data_pipeline.score import field_names from data_pipeline.score.field_names import GEOID_TRACT_FIELD - pytestmark = pytest.mark.smoketest UNMATCHED_TRACT_THRESHOLD = 1000 diff --git a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py index bb24ba3e..dc10cd42 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/doe_energy_burden/test_etl.py @@ -1,9 +1,7 @@ # pylint: disable=protected-access import pathlib -from data_pipeline.etl.sources.doe_energy_burden.etl import ( - DOEEnergyBurden, -) +from data_pipeline.etl.sources.doe_energy_burden.etl import DOEEnergyBurden from data_pipeline.tests.sources.example.test_etl import TestETL from data_pipeline.utils import get_module_logger diff --git a/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py index 6d0400f0..f9994977 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py @@ -3,9 +3,7 @@ import pathlib from unittest import mock from data_pipeline.etl.base import ValidGeoLevel -from data_pipeline.etl.sources.eamlis.etl import ( - AbandonedMineETL, -) +from data_pipeline.etl.sources.eamlis.etl import AbandonedMineETL from data_pipeline.tests.sources.example.test_etl import TestETL from data_pipeline.utils import get_module_logger diff --git a/data/data-pipeline/data_pipeline/tests/sources/us_army_fuds/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/us_army_fuds/test_etl.py index d178f95b..58e75566 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/us_army_fuds/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/us_army_fuds/test_etl.py @@ -3,9 +3,7 @@ import pathlib from unittest import mock from data_pipeline.etl.base import ValidGeoLevel -from data_pipeline.etl.sources.us_army_fuds.etl import ( - USArmyFUDS, -) +from data_pipeline.etl.sources.us_army_fuds.etl import USArmyFUDS from data_pipeline.tests.sources.example.test_etl import TestETL from data_pipeline.utils import get_module_logger diff --git a/data/data-pipeline/data_pipeline/utils.py b/data/data-pipeline/data_pipeline/utils.py index b8927e40..55289475 100644 --- a/data/data-pipeline/data_pipeline/utils.py +++ b/data/data-pipeline/data_pipeline/utils.py @@ -20,7 +20,6 @@ from data_pipeline.content.schemas.download_schemas import ExcelConfig from marshmallow import ValidationError from marshmallow_dataclass import class_schema - ## zlib is not available on all systems try: import zlib # noqa # pylint: disable=unused-import diff --git a/data/data-pipeline/pyproject.toml b/data/data-pipeline/pyproject.toml index ab7a7110..8e1a40bc 100644 --- a/data/data-pipeline/pyproject.toml +++ b/data/data-pipeline/pyproject.toml @@ -93,6 +93,8 @@ disable = [ "R0801", # Disables duplicate code. There are a couple places we have similar code and # unfortunately you can't disable this rule for individual lines or files, it's a # known bug. https://github.com/PyCQA/pylint/issues/214# + "C0411", # Disables wrong-import-order. Import order is now enforced by isort as a + # pre-commit hook. ] [tool.pylint.FORMAT]