[ISS-751] Updating comments for geocorr ETL (#913)

This commit is contained in:
Vincent La 2021-12-03 10:10:05 -05:00 committed by GitHub
parent 0873d79254
commit 84874ee4a5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 8 additions and 0 deletions

View file

@ -15,6 +15,12 @@ class GeoCorrETL(ExtractTransformLoad):
self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr"
# Need to change hyperlink to S3
# Note, that this CSV was generated by this notebook:
# https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/ipython/urban_vs_rural.ipynb
# The source data for this notebook was downloaded from GeoCorr;
# the instructions for generating the source data is here:
# https://github.com/usds/justice40-tool/issues/355#issuecomment-920241787
self.GEOCORR_PLACES_URL = "https://justice40-data.s3.amazonaws.com/data-sources/geocorr_urban_rural.csv.zip"
self.GEOCORR_GEOID_FIELD_NAME = "GEOID10_TRACT"
self.URBAN_HEURISTIC_FIELD_NAME = "Urban Heuristic Flag"

View file

@ -100,6 +100,8 @@
"metadata": {},
"outputs": [],
"source": [
"# CSV was manually generated\n",
"# Instructions for how to generate the CSV from Geocorr are here: https://github.com/usds/justice40-tool/issues/355#issuecomment-920241787\n",
"geocorr_urban_rural_map = pd.read_csv(\n",
" os.path.join(GEOCORR_DATA_DIR, \"geocorr2014_2125804280.csv\"),\n",
" encoding=\"ISO-8859-1\",\n",