{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "a664f981", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import pandas as pd\n", "import csv\n", "\n", "data_path = Path.cwd().parent / \"data\"\n", "fips_csv_path = data_path / \"fips_states_2010.csv\"\n", "csv_path = data_path / \"score\" / \"csv\"" ] }, { "cell_type": "code", "execution_count": null, "id": "7df430cb", "metadata": {}, "outputs": [], "source": [ "# EJSCreen csv Load\n", "ejscreen_csv = data_path / \"dataset\" / \"ejscreen_2020\" / \"usa.csv\"\n", "df = pd.read_csv(ejscreen_csv, dtype={'ID': 'string'}, low_memory=False)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "27677132", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# calculate percentiles\n", "df['lesshs_percentile'] = df.LESSHSPCT.rank(pct = True)\n", "df['lowin_percentile'] = df.LOWINCPCT.rank(pct = True)" ] }, { "cell_type": "code", "execution_count": null, "id": "1f7b864f", "metadata": {}, "outputs": [], "source": [ "# calculate scores\n", "df['score_a'] = df[['lesshs_percentile', 'lowin_percentile']].mean(axis=1)\n", "df['score_b'] = df.lesshs_percentile * df.lowin_percentile\n", "\n", "# Create percentiles for the scores \n", "df['score_a_percentile'] = df.score_a.rank(pct = True)\n", "df['score_b_percentile'] = df.score_b.rank(pct = True)\n", "df['score_a_top_percentile_25'] = df['score_a_percentile'] >= 0.75\n", "df['score_b_top_percentile_25'] = df['score_b_percentile'] >= 0.75\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "91755bcf", "metadata": {}, "outputs": [], "source": [ "# strip calculations\n", "df = df[[\"ID\", \"score_a_percentile\", \"score_b_percentile\",\"score_a_top_percentile_25\",\"score_b_top_percentile_25\"]]" ] }, { "cell_type": "code", "execution_count": null, "id": "b3a65af4", "metadata": {}, "outputs": [], "source": [ "# write nationwide csv\n", "df.to_csv(csv_path / f\"usa.csv\", index = False)" ] }, { "cell_type": "code", "execution_count": null, "id": "58ddd8b3", "metadata": {}, "outputs": [], "source": [ "# write per state csvs\n", "with open(fips_csv_path) as csv_file:\n", " csv_reader = csv.reader(csv_file, delimiter=\",\")\n", " line_count = 0\n", "\n", " for row in csv_reader:\n", " if line_count == 0:\n", " line_count += 1\n", " else:\n", " fips = row[0].strip()\n", " print(f\"Generating data{fips} csv\")\n", " df1 = df[df.ID.str[:2] == fips]\n", " # we need to name the file data01.csv for ogr2ogr csv merge to work\n", " df1.to_csv(csv_path / f\"data{fips}.csv\", index = False)" ] }, { "cell_type": "code", "execution_count": null, "id": "bce50823", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 5 }