8000 DOC Address numpydoc validation to GradientBoosting* (#15477) · sstalley/scikit-learn@7b69794 · GitHub
[go: up one dir, main page]

Skip to content

Commit 7b69794

Browse files
xun-tangrthmvdocthomasjpfan
authored
DOC Address numpydoc validation to GradientBoosting* (scikit-learn#15477)
Co-Authored-By: Roman Yurchak <rth.yurchak@gmail.com> Co-authored-by: Matteo Visconti dOC <mvdoc@berkeley.edu> Co-authored-by: Roman Yurchak <rth.yurchak@gmail.com> Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
1 parent 0fa3263 commit 7b69794

File tree

2 files changed

+49
-33
lines changed

2 files changed

+49
-33
lines changed

maint_tools/test_docstrings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
"LogisticRegression.decision_function",
1515
"Birch.predict",
1616
"Birch.transform",
17+
"GradientBoostingClassifier",
18+
"GradientBoostingRegressor",
1719
"LinearDiscriminantAnalysis.decision_function",
1820
"LinearSVC.decision_function",
1921
"LogisticRegressionCV.decision_function",

sklearn/ensemble/_gb.py

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -771,13 +771,13 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
771771
Parameters
772772
----------
773773
loss : {'deviance', 'exponential'}, default='deviance'
774-
loss function to be optimized. 'deviance' refers to
774+
The loss function to be optimized. 'deviance' refers to
775775
deviance (= logistic regression) for classification
776776
with probabilistic outputs. For loss 'exponential' gradient
777777
boosting recovers the AdaBoost algorithm.
778778
779779
learning_rate : float, default=0.1
780-
learning rate shrinks the contribution of each tree by `learning_rate`.
780+
Learning rate shrinks the contribution of each tree by `learning_rate`.
781781
There is a trade-off between learning_rate and n_estimators.
782782
783783
n_estimators : int, default=100
@@ -834,7 +834,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
834834
equal weight when sample_weight is not provided.
835835
836836
max_depth : int, default=3
837-
maximum depth of the individual regression estimators. The maximum
837+
The maximum depth of the individual regression estimators. The maximum
838838
depth limits the number of nodes in the tree. Tune this parameter
839839
for best performance; the best value depends on the interaction
840840
of the input variables.
@@ -1006,6 +1006,23 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
10061006
max_features_ : int
10071007
The inferred value of max_features.
10081008
1009+
1010+
See Also
1011+
--------
1012+
sklearn.ensemble.HistGradientBoostingClassifier : Histogram-based Gradient
1013+
Boosting Classification Tree.
1014+
sklearn.tree.DecisionTreeClassifier : A decision tree classifier.
1015+
sklearn.ensemble.RandomForestClassifier : A meta-estimator that fits a
1016+
number of decision tree classifiers on various sub-samples of the
1017+
dataset and uses averaging to improve the predictive accuracy and
1018+
control over-fitting.
1019+
sklearn.ensemble.AdaBoostClassifier : A meta-estimator that begins by
1020+
fitting a classifier on the original dataset and then fits additional
1021+
copies of the classifier on the same dataset where the weights of
1022+
incorrectly classified instances are adjusted such that subsequent
1023+
classifiers focus more on difficult cases.
1024+
1025+
10091026
Notes
10101027
-----
10111028 The features are always randomly permuted at each split. Therefore,
@@ -1015,28 +1032,6 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
10151032
split. To obtain a deterministic behaviour during fitting,
10161033
``random_state`` has to be fixed.
10171034
1018-
Examples
1019-
--------
1020-
>>> from sklearn.datasets import make_classification
1021-
>>> from sklearn.ensemble import GradientBoostingClassifier
1022-
>>> from sklearn.model_selection import train_test_split
1023-
>>> X, y = make_classification(random_state=0)
1024-
>>> X_train, X_test, y_train, y_test = train_test_split(
1025-
... X, y, random_state=0)
1026-
>>> clf = GradientBoostingClassifier(random_state=0)
1027-
>>> clf.fit(X_train, y_train)
1028-
GradientBoostingClassifier(random_state=0)
1029-
>>> clf.predict(X_test[:2])
1030-
array([1, 0])
1031-
>>> clf.score(X_test, y_test)
1032-
0.88
1033-
1034-
See also
1035-
--------
1036-
sklearn.ensemble.HistGradientBoostingClassifier,
1037-
sklearn.tree.DecisionTreeClassifier, RandomForestClassifier
1038-
AdaBoostClassifier
1039-
10401035
References
10411036
----------
10421037
J. Friedman, Greedy Function Approximation: A Gradient Boosting
@@ -1046,6 +1041,23 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
10461041
10471042
T. Hastie, R. Tibshirani and J. Friedman.
10481043
Elements of Statistical Learning Ed. 2, Springer, 2009.
1044+
1045+
Examples
1046+
--------
1047+
The following example shows how to fit a gradient boosting classifier with
1048+
100 decision stumps as weak learners.
1049+
1050+
>>> from sklearn.datasets import make_hastie_10_2
1051+
>>> from sklearn.ensemble import GradientBoostingClassifier
1052+
1053+
>>> X, y = make_hastie_10_2(random_state=0)
1054+
>>> X_train, X_test = X[:2000], X[2000:]
1055+
>>> y_train, y_test = y[:2000], y[2000:]
1056+
1057+
>>> clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
1058+
... max_depth=1, random_state=0).fit(X_train, y_train)
1059+
>>> clf.score(X_test, y_test)
1060+
0.913...
10491061
"""
10501062

10511063
_SUPPORTED_LOSS = ('deviance', 'exponential')
@@ -1274,14 +1286,14 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
12741286
Parameters
12751287
----------
12761288
loss : {'ls', 'lad', 'huber', 'quantile'}, default='ls'
1277-
loss function to be optimized. 'ls' refers to least squares
1289+
Loss function to be optimized. 'ls' refers to least squares
12781290
regression. 'lad' (least absolute deviation) is a highly robust
12791291
loss function solely based on order information of the input
12801292
variables. 'huber' is a combination of the two. 'quantile'
12811293
allows quantile regression (use `alpha` to specify the quantile).
12821294
12831295
learning_rate : float, default=0.1
1284-
learning rate shrinks the contribution of each tree by `learning_rate`.
1296+
Learning rate shrinks the contribution of each tree by `learning_rate`.
12851297
There is a trade-off between learning_rate and n_estimators.
12861298
12871299
n_estimators : int, default=100
@@ -1338,7 +1350,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
13381350
equal weight when sample_weight is not provided.
13391351
13401352
max_depth : int, default=3
1341-
maximum depth of the individual regression estimators. The maximum
1353+
Maximum depth of the individual regression estimators. The maximum
13421354
depth limits the number of nodes in the tree. Tune this parameter
13431355
for best performance; the best value depends on the interaction
13441356
of the input variables.
@@ -1508,6 +1520,13 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
15081520
max_features_ : int
15091521
The inferred value of max_features.
15101522
1523+
See Also
1524+
--------
1525+
sklearn.ensemble.HistGradientBoostingRegressor : Histogram-based
1526+
Gradient Boosting Classification Tree.
1527+
sklearn.tree.DecisionTreeRegressor : A decision tree regressor.
1528+
sklearn.tree.RandomForestRegressor : A random forest regressor.
1529+
15111530
Notes
15121531
-----
15131532
The features are always randomly permuted at each split. Therefore,
@@ -1533,11 +1552,6 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
15331552
>>> reg.score(X_test, y_test)
15341553
0.4...
15351554
1536-
See also
1537-
--------
1538-
sklearn.ensemble.HistGradientBoostingRegressor,
1539-
sklearn.tree.DecisionTreeRegressor, RandomForestRegressor
1540-
15411555
References
15421556
----------
15431557
J. Friedman, Greedy Function Approximation: A Gradient Boosting

0 commit comments

Comments
 (0)
0