-
-
Notifications
You must be signed in to change notification settings - Fork 26.4k
DOC use notebook-style in ensemble/plot_adaboost_hastie_10_2.py #23184
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
23251ad
fbe5421
9686f2f
8e42548
c2b08bc
c5b5f30
3977ed1
1142b97
ede0d30
7703a6d
6831eaa
8e5d069
eee33be
2400509
0374455
016b862
35ad84e
e1c3418
8a11fbb
712753d
8e6f491
3b5ec4d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,7 +3,7 @@ | |
| Discrete versus Real AdaBoost | ||
| ============================= | ||
|
|
||
| This example is based on Figure 10.2 from Hastie et al 2009 [1]_ and | ||
| This notebook is based on Figure 10.2 from Hastie et al 2009 [1]_ and | ||
| illustrates the difference in performance between the discrete SAMME [2]_ | ||
| boosting algorithm and real SAMME.R boosting algorithm. Both algorithms are | ||
| evaluated on a binary classification task where the target Y is a non-linear | ||
|
|
@@ -15,32 +15,44 @@ | |
| .. [1] T. Hastie, R. Tibshirani and J. Friedman, "Elements of Statistical | ||
| Learning Ed. 2", Springer, 2009. | ||
|
|
||
| .. [2] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009. | ||
| .. [2] J Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", | ||
| Statistics and Its Interface, 2009. | ||
|
|
||
| """ | ||
|
|
||
| # Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>, | ||
| # Noel Dawe <noel.dawe@gmail.com> | ||
| # %% | ||
| # Preparing the data and baseline models | ||
| # -------------------------------------- | ||
| # We start by generating the binary classification dataset | ||
| # used in Hastie et al. 2009, Example 10.2. | ||
|
|
||
| # Authors: Peter Prettenhofer <peter.prettenhofer@gmail.com>, | ||
| # Noel Dawe <noel.dawe@gmail.com> | ||
| # | ||
| # License: BSD 3 clause | ||
|
|
||
| import numpy as np | ||
| import matplotlib.pyplot as plt | ||
|
|
||
| from sklearn import datasets | ||
| from sklearn.tree import DecisionTreeClassifier | ||
| from sklearn.metrics import zero_one_loss | ||
| from sklearn.ensemble import AdaBoostClassifier | ||
|
|
||
| X, y = datasets.make_hastie_10_2(n_samples=12_000, random_state=1) | ||
|
|
||
| # %% | ||
| # Now, we set the hyperparameters for our AdaBoost classifiers. | ||
| # Be aware, a learning rate of 1.0 may not be optimal for both SAMME and SAMME.R | ||
|
|
||
| n_estimators = 400 | ||
| # A learning rate of 1. may not be optimal for both SAMME and SAMME.R | ||
| learning_rate = 1.0 | ||
|
|
||
| X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1) | ||
| # %% | ||
| # We split the data into a training and a test set. | ||
| # Then, we train our baseline classifiers, a `DecisionTreeClassifier` with `depth=9` | ||
| # and a "stump" `DecisionTreeClassifier` with `depth=1` and compute the test error. | ||
|
|
||
| X_test, y_test = X[2000:], y[2000:] | ||
| X_train, y_train = X[:2000], y[:2000] | ||
| from sklearn.model_selection import train_test_split | ||
| from sklearn.tree import DecisionTreeClassifier | ||
|
|
||
| X_train, X_test, y_train, y_test = train_test_split( | ||
| X, y, test_size=2_000, shuffle=False | ||
| ) | ||
|
|
||
| dt_stump = DecisionTreeClassifier(max_depth=1, min_samples_leaf=1) | ||
| dt_stump.fit(X_train, y_train) | ||
|
|
@@ -50,6 +62,14 @@ | |
| dt.fit(X_train, y_train) | ||
| dt_err = 1.0 - dt.score(X_test, y_test) | ||
|
|
||
| # %% | ||
| # Adaboost with discrete SAMME and real SAMME.R | ||
| # --------------------------------------------- | ||
| # We now define the discrete and real AdaBoost classifiers | ||
| # and fit them to the training set. | ||
|
|
||
| from sklearn.ensemble import AdaBoostClassifier | ||
|
|
||
| ada_discrete = AdaBoostClassifier( | ||
| base_estimator=dt_stump, | ||
| learning_rate=learning_rate, | ||
|
|
@@ -58,6 +78,8 @@ | |
| ) | ||
| ada_discrete.fit(X_train, y_train) | ||
|
|
||
| # %% | ||
|
|
||
| ada_real = AdaBoostClassifier( | ||
| base_estimator=dt_stump, | ||
| learning_rate=learning_rate, | ||
|
|
@@ -66,11 +88,13 @@ | |
| ) | ||
| ada_real.fit(X_train, y_train) | ||
|
|
||
| fig = plt.figure() | ||
| ax = fig.add_subplot(111) | ||
| # %% | ||
| # Now, let's compute the test error of the discrete and | ||
| # real AdaBoost classifiers for each new stump in `n_estimators` | ||
| # added to the ensemble. | ||
|
|
||
| ax.plot([1, n_estimators], [dt_stump_err] * 2, "k-", label="Decision Stump Error") | ||
| ax.plot([1, n_estimators], [dt_err] * 2, "k--", label="Decision Tree Error") | ||
| import numpy as np | ||
| from sklearn.metrics import zero_one_loss | ||
|
|
||
| ada_discrete_err = np.zeros((n_estimators,)) | ||
| for i, y_pred in enumerate(ada_discrete.staged_predict(X_test)): | ||
|
|
@@ -88,36 +112,60 @@ | |
| for i, y_pred in enumerate(ada_real.staged_predict(X_train)): | ||
| ada_real_err_train[i] = zero_one_loss(y_pred, y_train) | ||
|
|
||
| # %% | ||
| # Plotting the results | ||
| # -------------------- | ||
| # Finally, we plot the train and test errors of our baselines | ||
| # and of the discrete and real AdaBoost classifiers | ||
|
|
||
| import matplotlib.pyplot as plt | ||
| import seaborn as sns | ||
|
|
||
| fig = plt.figure() | ||
| ax = fig.add_subplot(111) | ||
|
|
||
| ax.plot([1, n_estimators], [dt_stump_err] * 2, "k-", label="Decision Stump Error") | ||
| ax.plot([1, n_estimators], [dt_err] * 2, "k--", label="Decision Tree Error") | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the figure below, we can remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have to improvise a bit here since the default colors include both red and green, which is the most frequent type of color-blindness according to matplotlib There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for looking at this. |
||
| colors = sns.color_palette("colorblind") | ||
|
|
||
| ax.plot( | ||
| np.arange(n_estimators) + 1, | ||
| ada_discrete_err, | ||
| label="Discrete AdaBoost Test Error", | ||
| color="red", | ||
| color=colors[0], | ||
| ) | ||
| ax.plot( | ||
| np.arange(n_estimators) + 1, | ||
| ada_discrete_err_train, | ||
| label="Discrete AdaBoost Train Error", | ||
| color="blue", | ||
| color=colors[1], | ||
| ) | ||
| ax.plot( | ||
| np.arange(n_estimators) + 1, | ||
| ada_real_err, | ||
| label="Real AdaBoost Test Error", | ||
| color="orange", | ||
| color=colors[2], | ||
| ) | ||
| ax.plot( | ||
| np.arange(n_estimators) + 1, | ||
| ada_real_err_train, | ||
| label="Real AdaBoost Train Error", | ||
| color="green", | ||
| color=colors[4], | ||
| ) | ||
|
|
||
| ax.set_ylim((0.0, 0.5)) | ||
| ax.set_xlabel("n_estimators") | ||
| ax.set_xlabel("Number of weak learners") | ||
| ax.set_ylabel("error rate") | ||
|
|
||
| leg = ax.legend(loc="upper right", fancybox=True) | ||
| leg.get_frame().set_alpha(0.7) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In l.151, can you change ax.set_xlabel("Number of weak learners")There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, will do |
||
|
|
||
| plt.show() | ||
| # %% | ||
| # | ||
| # Concluding remarks | ||
| # ------------------ | ||
| # | ||
| # We observe that the error rate for both train and test sets of real AdaBoost | ||
| # is lower than that of discrete AdaBoost. | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a marker
# %%in l.78 to get a diagram for both models?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch. Yes