8000 update model card example to create better pre-processing function · davidtrinhsas/python-sasctl@5306aee · GitHub
[go: up one dir, main page]

Skip to content

Commit 5306aee

Browse files
committed
update model card example to create better pre-processing function
1 parent e2bd4e5 commit 5306aee

File tree

1 file changed

+16
-2
lines changed

1 file changed

+16
-2
lines changed

examples/pzmm_generate_complete_model_card.ipynb

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,8 @@
568568
" df.columns = df.columns.str.replace(' ', '')\n",
569569
" df.columns = df.columns.str.replace('-', '_')\n",
570570
" df = df.drop(['Sex_Male'], axis=1)\n",
571-
" df = pd.concat([df, cat_vals], axis=1).drop('index', axis=1)\n",
571+
" if 'index' in df.columns or 'index' in cat_vals.columns:\n",
572+
" df = pd.concat([df, cat_vals], axis=1).drop('index', axis=1)\n",
572573
" # For the model to score correctly, all OHE columns must exist\n",
573574
" input_cols = [\n",
574575
" \"Education_9th\", \"Education_10th\", \"Education_11th\", \"Education_12th\", \"Education_Assoc_voc\", \"Education_Assoc_acdm\", \"Education_Masters\", \"Education_Prof_school\",\n",
@@ -579,9 +580,20 @@
579580
" 'Relationship_Not_in_family', 'Relationship_Own_child', 'Relationship_Unmarried', 'Relationship_Wife', 'Relationship_Other_relative', 'WorkClass_Private',\n",
580581
" 'Education_Bachelors'\n",
581582
" ]\n",
583+
" # OHE columns must be removed after data combination\n",
584+
" predictor_columns = ['Age', 'HoursPerWeek', 'WorkClass_Private', 'WorkClass_Self', 'WorkClass_Gov', \n",
585+
" 'WorkClass_Other', 'Education_HS_grad', 'Education_Some_HS', 'Education_Assoc', 'Education_Some_college',\n",
586+
" 'Education_Bachelors', 'Education_Adv_Degree', 'Education_No_HS', 'MartialStatus_Married_civ_spouse',\n",
587+
" 'MartialStatus_Never_married', 'MartialStatus_Divorced', 'MartialStatus_Separated', 'MartialStatus_Widowed',\n",
588+
" 'MartialStatus_Other', 'Relationship_Husband', 'Relationship_Not_in_family', 'Relationship_Own_child', 'Relationship_Unmarried',\n",
589+
" 'Relationship_Wife', 'Relationship_Other_relative', 'Race_White', 'Race_Black', 'Race_Asian_Pac_Islander',\n",
590+
" 'Race_Amer_Indian_Eskimo', 'Race_Other', 'Sex_Female']\n",
591+
"\n",
582592
" for col in input_cols:\n",
583593
" if col not in df.columns:\n",
584594
" df[col] = 0\n",
595+
" \n",
596+
"\n",
585597
" df[\"Education_Some_HS\"] = df[\"Education_9th\"] | df[\"Education_10th\"] | df[\"Education_11th\"] | df[\"Education_12th\"]\n",
586598
" df[\"Education_Assoc\"] = df[\"Education_Assoc_voc\"] | df[\"Education_Assoc_acdm\"]\n",
587599
" df[\"Education_Adv_Degree\"] = df[\"Education_Masters\"] | df[\"Education_Prof_school\"] | df[\"Education_Doctorate\"]\n",
@@ -593,6 +605,8 @@
593605
"\n",
594606
" df[\"MartialStatus_Other\"] = df[\"MartialStatus_Married_spouse_absent\"] | df[\"MartialStatus_Married_AF_spouse\"]\n",
595607
"\n",
608+
" df = df[predictor_columns]\n",
609+
"\n",
596610
" return df"
597611
]
598612
},
@@ -1772,7 +1786,7 @@
17721786
],
17731787
"metadata": {
17741788
"kernelspec": {
1775-
"display_name": "Python 3",
1789+
"display_name": "pandatwo",
17761790
"language": "python",
17771791
"name": "python3"
17781792
},

0 commit comments

Comments
 (0)
0