In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import csv
import sys
import os

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
 sys.path.append(module_path)

from etl.sources.census.etl_utils import get_state_fips_codes
from utils import unzip_file_from_url, remove_all_from_dir

DATA_PATH = Path.cwd().parent / "data"
TMP_PATH = DATA_PATH / "tmp"
EJSCREEN_FTP_URL = (
 "https://gaftp.epa.gov/EJSCREEN/2020/EJSCREEN_2020_StatePctile.csv.zip"
)
EJSCREEN_CSV = TMP_PATH / "EJSCREEN_2020_StatePctile.csv"
CSV_PATH = DATA_PATH / "dataset" / "ejscreen_2020"
print(DATA_PATH)

In [None]:
# download file from ejscreen ftp
unzip_file_from_url(EJSCREEN_FTP_URL, TMP_PATH, TMP_PATH)

In [None]:
df = pd.read_csv(
 EJSCREEN_CSV,
 dtype={"ID": "string"},
 # EJSCREEN writes the word "None" for NA data.
 na_values=["None"],
 low_memory=False,
)

In [None]:
# write nationwide csv
CSV_PATH.mkdir(parents=True, exist_ok=True)
df.to_csv(CSV_PATH / f"usa.csv", index=False)

In [None]:
# write per state csvs
for fips in get_state_fips_codes(DATA_PATH):
 print(f"Generating data{fips} csv")
 df1 = df[df.ID.str[:2] == fips]
 # we need to name the file data01.csv for ogr2ogr csv merge to work
 df1.to_csv(CSV_PATH / f"data{fips}.csv", index=False)

In [None]:
# cleanup
remove_all_from_dir(TMP_PATH)