diff --git a/data/data-pipeline/data_pipeline/etl/sources/geocorr/etl.py b/data/data-pipeline/data_pipeline/etl/sources/geocorr/etl.py index c9c45993..58e296fd 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/geocorr/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/geocorr/etl.py @@ -15,6 +15,9 @@ class GeoCorrETL(ExtractTransformLoad): self.OUTPUT_PATH = self.DATA_PATH / "dataset" / "geocorr" # Need to change hyperlink to S3 + + # Note, that this CSV was generated by this notebook: https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/ipython/urban_vs_rural.ipynb + # The source data for this notebook was downloaded from GeoCorr; the instructions for generating the source data is here: https://github.com/usds/justice40-tool/issues/355#issuecomment-920241787 self.GEOCORR_PLACES_URL = "https://justice40-data.s3.amazonaws.com/data-sources/geocorr_urban_rural.csv.zip" self.GEOCORR_GEOID_FIELD_NAME = "GEOID10_TRACT" self.URBAN_HERUISTIC_FIELD_NAME = "Urban Heuristic Flag"