mirror of
https://github.com/DOI-DO/j40-cejst-2.git
synced 2025-02-23 01:54:18 -08:00
Editing etl script
This commit is contained in:
parent
d49c28ca25
commit
c2081937b3
2 changed files with 26 additions and 28 deletions
|
@ -12,7 +12,6 @@ class EJScreenAreasOfConcernETL(ExtractTransformLoad):
|
|||
|
||||
# TO DO: Load from actual source; the issue is that this dataset is not public for now
|
||||
self.LOCAL_CSV_PATH = self.DATA_PATH / "local"
|
||||
self.GEOID_CBG_FIELD_NAME = "GEOID10_CBG"
|
||||
self.df: pd.DataFrame
|
||||
|
||||
def extract(self) -> None:
|
||||
|
@ -22,7 +21,7 @@ class EJScreenAreasOfConcernETL(ExtractTransformLoad):
|
|||
filepath_or_buffer=self.LOCAL_CSV_PATH
|
||||
/ "ejscreen_areas_of_concerns_indicators.csv",
|
||||
dtype={
|
||||
self.GEOID_CBG_FIELD_NAME: "string",
|
||||
self.GEOID_FIELD_NAME: "string",
|
||||
},
|
||||
low_memory=False,
|
||||
)
|
||||
|
|
|
@ -74,7 +74,6 @@
|
|||
"GEOID_FIELD_NAME = \"GEOID10\"\n",
|
||||
"GEOID_TRACT_FIELD_NAME = \"GEOID10_TRACT\"\n",
|
||||
"GEOID_STATE_FIELD_NAME = \"GEOID10_STATE\"\n",
|
||||
"GEOID_CBG_FIELD_NAME = \"GEOID10_CBG\"\n",
|
||||
"COUNTRY_FIELD_NAME = \"Country\"\n",
|
||||
"CENSUS_BLOCK_GROUP_POPULATION_FIELD = \"Total population\"\n",
|
||||
"\n",
|
||||
|
@ -96,7 +95,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "67cf1993",
|
||||
"id": "186c15bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### National"
|
||||
|
@ -105,7 +104,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "c6d89263",
|
||||
"id": "4843efbd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -123,7 +122,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "3110606b",
|
||||
"id": "0a146972",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -152,9 +151,9 @@
|
|||
" )\n",
|
||||
" df['EXCEED_COUNT'] = pd.to_numeric(df['EXCEED_COUNT'])\n",
|
||||
"\n",
|
||||
" df.rename(columns={'ID': GEOID_CBG_FIELD_NAME}, inplace=True)\n",
|
||||
" df.rename(columns={'ID': GEOID_FIELD_NAME}, inplace=True)\n",
|
||||
" df['percentile'] = percentile\n",
|
||||
" df = df[[GEOID_CBG_FIELD_NAME, 'percentile', 'EXCEED_COUNT']]\n",
|
||||
" df = df[[GEOID_FIELD_NAME, 'percentile', 'EXCEED_COUNT']]\n",
|
||||
" dfs.append(df)\n",
|
||||
" \n",
|
||||
"df = pd.concat(dfs)"
|
||||
|
@ -163,7 +162,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "7f3bd094",
|
||||
"id": "65622cbd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -248,11 +247,11 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "f85dc722",
|
||||
"id": "75e2d572",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_reshaped_nat = df.pivot(index=GEOID_CBG_FIELD_NAME, columns='percentile', values='EXCEED_COUNT')\n",
|
||||
"df_reshaped_nat = df.pivot(index=GEOID_FIELD_NAME, columns='percentile', values='EXCEED_COUNT')\n",
|
||||
"df_reshaped_nat.columns = \\\n",
|
||||
" ['EJSCREEN Areas of Concern, National, {}th percentile'.format(p) for p in df_reshaped_nat.columns]\n",
|
||||
"df_reshaped_nat.fillna(0, inplace=True)\n",
|
||||
|
@ -487,7 +486,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "1a1c0ef9",
|
||||
"id": "7eedff74",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -784,7 +783,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "ecb5a9da",
|
||||
"id": "428b94f3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -977,7 +976,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eca4ebf1",
|
||||
"id": "7bc0f71c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### State"
|
||||
|
@ -986,7 +985,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "d8d21a3d",
|
||||
"id": "2de68aa5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -1004,7 +1003,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "7c4f0a6d",
|
||||
"id": "fccb416e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1033,9 +1032,9 @@
|
|||
" )\n",
|
||||
" df['EXCEED_COUNT'] = pd.to_numeric(df['EXCEED_COUNT'])\n",
|
||||
"\n",
|
||||
" df.rename(columns={'ID': GEOID_CBG_FIELD_NAME}, inplace=True)\n",
|
||||
" df.rename(columns={'ID': GEOID_FIELD_NAME}, inplace=True)\n",
|
||||
" df['percentile'] = percentile\n",
|
||||
" df = df[[GEOID_CBG_FIELD_NAME, 'percentile', 'EXCEED_COUNT']]\n",
|
||||
" df = df[[GEOID_FIELD_NAME, 'percentile', 'EXCEED_COUNT']]\n",
|
||||
" dfs.append(df)\n",
|
||||
" \n",
|
||||
"df = pd.concat(dfs)"
|
||||
|
@ -1044,7 +1043,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "0c40712a",
|
||||
"id": "8300e454",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1129,11 +1128,11 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "acce11ae",
|
||||
"id": "5be30b4f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_reshaped_sta = df.pivot(index=GEOID_CBG_FIELD_NAME, columns='percentile', values='EXCEED_COUNT')\n",
|
||||
"df_reshaped_sta = df.pivot(index=GEOID_FIELD_NAME, columns='percentile', values='EXCEED_COUNT')\n",
|
||||
"df_reshaped_sta.columns = ['EJSCREEN Areas of Concern, State, {}th percentile'.format(p) for p in df_reshaped_sta.columns]\n",
|
||||
"df_reshaped_sta.fillna(0, inplace=True)\n",
|
||||
"\n",
|
||||
|
@ -1145,7 +1144,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "5160b8cd",
|
||||
"id": "9206132b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1367,7 +1366,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "533b81a9",
|
||||
"id": "b551a4df",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1664,7 +1663,7 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "e1cc26bc",
|
||||
"id": "c3cb5696",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -1858,19 +1857,19 @@
|
|||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "ee632477",
|
||||
"id": "099cca8c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_reshaped = df_reshaped_nat.merge(\n",
|
||||
" df_reshaped_sta,\n",
|
||||
" on=GEOID_CBG_FIELD_NAME)"
|
||||
" on=GEOID_FIELD_NAME)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "36905865",
|
||||
"id": "23097787",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -2259,7 +2258,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4260b002",
|
||||
"id": "403dfbc6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Next Steps / Questions\n",
|
||||
|
|
Loading…
Add table
Reference in a new issue