In [None]:
from pathlib import Path
import pandas as pd
import csv

data_path = Path.cwd().parent / "data"
fips_csv_path = data_path / "fips_states_2010.csv"
csv_path = data_path / "score" / "csv"

In [None]:
# EJSCreen csv Load
ejscreen_csv = data_path / "dataset" / "ejscreen_2020" / "usa.csv"
df = pd.read_csv(ejscreen_csv, dtype={'ID': 'string'}, low_memory=False)
df.head()

In [None]:
# calculate percentiles
df['lesshs_percentile'] = df.LESSHSPCT.rank(pct = True)
df['lowin_percentile'] = df.LOWINCPCT.rank(pct = True)

In [None]:
# calculate scores
df['score_a'] = df[['lesshs_percentile', 'lowin_percentile']].mean(axis=1)
df['score_b'] = df.lesshs_percentile * df.lowin_percentile

# Create percentiles for the scores 
df['score_a_percentile'] = df.score_a.rank(pct = True)
df['score_b_percentile'] = df.score_b.rank(pct = True)
df['score_a_top_percentile_25'] = df['score_a_percentile'] >= 0.75
df['score_b_top_percentile_25'] = df['score_b_percentile'] >= 0.75
df.head()

In [None]:
# strip calculations
df = df[["ID", "score_a_percentile", "score_b_percentile","score_a_top_percentile_25","score_b_top_percentile_25"]]

In [None]:
# write nationwide csv
df.to_csv(csv_path / f"usa.csv", index = False)

In [None]:
# write per state csvs
with open(fips_csv_path) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=",")
    line_count = 0

    for row in csv_reader:
        if line_count == 0:
            line_count += 1
        else:
            fips = row[0].strip()
            print(f"Generating data{fips} csv")
            df1 = df[df.ID.str[:2] == fips]
            # we need to name the file data01.csv for ogr2ogr csv merge to work
            df1.to_csv(csv_path / f"data{fips}.csv", index = False)