-
-
Notifications
You must be signed in to change notification settings - Fork 25.9k
[MRG+3] Add mean absolute error splitting criterion to DecisionTreeRegressor #6667
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
783f433
68ae519
c7b640a
2fb7651
a3f2f76
c40a54b
19e811d
31f04b4
ffff616
bfde38d
8b77de0
adb244d
ca5149a
1e5a969
99132ac
9655fb0
e0476b9
04dfc7e
a61782f
33af0fb
5844d81
134eb92
115df19
6fa918f
8d00594
d51e091
a9ccf18
f46b3c2
5635c97
97d44e3
58949f7
0be994f
492ea7d
00fbe6e
f03cf38
2fdb56d
b9aef43
39e693c
20d6107
f73ac8e
5b8d665
9920cfc
53207d4
6907227
8d55097
b465abc
df9e64a
32c1fef
5e2cd1a
9f1b5fd
802e1fd
c0401a5
0bfc2c3
702bb6b
327ea19
469274d
560f6fa
87b0180
ecae675
6c28358
0db9965
e373416
448bb6e
c44f327
8d442cf
a008538
929153c
f383c94
6a1f3d4
e25a52c
bd0c71d
d3245ae
dbaa57b
f668ab9
04d3b8a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -947,8 +947,10 @@ class RandomForestRegressor(ForestRegressor): | |
The number of trees in the forest. | ||
|
||
criterion : string, optional (default="mse") | ||
The function to measure the quality of a split. The only supported | ||
criterion is "mse" for the mean squared error. | ||
The function to measure the quality of a split. Supported criteria | ||
are "mse" for the mean squared error, which is equal to variance | ||
reduction as feature selection criterion, and "mae" for the mean | ||
absolute error. | ||
|
||
max_features : int, float, string or None, optional (default="auto") | ||
The number of features to consider when looking for the best split: | ||
|
@@ -1299,8 +1301,10 @@ class ExtraTreesRegressor(ForestRegressor): | |
The number of trees in the forest. | ||
|
||
criterion : string, optional (default="mse") | ||
The function to measure the quality of a split. The only supported | ||
criterion is "mse" for the mean squared error. | ||
The function to measure the quality of a split. Supported criteria | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should have versionadded for |
||
are "mse" for the mean squared error, which is equal to variance | ||
reduction as feature selection criterion, and "mae" for the mean | ||
absolute error. | ||
|
||
max_features : int, float, string or None, optional (default="auto") | ||
The number of features to consider when looking for the best split: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -720,15 +720,16 @@ class BaseGradientBoosting(six.with_metaclass(ABCMeta, BaseEnsemble, | |
"""Abstract base class for Gradient Boosting. """ | ||
|
||
@abstractmethod | ||
def __init__(self, loss, learning_rate, n_estimators, min_samples_split, | ||
min_samples_leaf, min_weight_fraction_leaf, | ||
def __init__(self, loss, learning_rate, n_estimators, criterion, | ||
min_samples_split, min_samples_leaf, min_weight_fraction_leaf, | ||
max_depth, init, subsample, max_features, | ||
random_state, alpha=0.9, verbose=0, max_leaf_nodes=None, | ||
warm_start=False, presort='auto'): | ||
|
||
self.n_estimators = n_estimators | ||
self.learning_rate = learning_rate | ||
self.loss = loss | ||
self.criterion = criterion | ||
self.min_samples_split = min_samples_split | ||
self.min_samples_leaf = min_samples_leaf | ||
self.min_weight_fraction_leaf = min_weight_fraction_leaf | ||
|
@@ -762,7 +763,7 @@ def _fit_stage(self, i, X, y, y_pred, sample_weight, sample_mask, | |
|
||
# induce regression tree on residuals | ||
tree = DecisionTreeRegressor( | ||
criterion='friedman_mse', | ||
criterion=self.criterion, | ||
splitter='best', | ||
max_depth=self.max_depth, | ||
min_samples_split=self.min_samples_split, | ||
|
@@ -1296,6 +1297,14 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin): | |
of the input variables. | ||
Ignored if ``max_leaf_nodes`` is not None. | ||
|
||
criterion : string, optional (default="friedman_mse") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for being late to the party, but this should have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes it should, i'll add that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks :) |
||
The function to measure the quality of a split. Supported criteria | ||
are "friedman_mse" for the mean squared error with improvement | ||
score by Friedman, "mse" for mean squared error, and "mae" for | ||
the mean absolute error. The default value of "friedman_mse" is | ||
generally the best as it can provide a better approximation in | ||
some cases. | ||
|
||
min_samples_split : int, float, optional (default=2) | ||
The minimum number of samples required to split an internal node: | ||
|
||
|
@@ -1426,7 +1435,7 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin): | |
_SUPPORTED_LOSS = ('deviance', 'exponential') | ||
|
||
def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100, | ||
subsample=1.0, min_samples_split=2, | ||
subsample=1.0, criterion='friedman_mse', min_samples_split=2, | ||
min_samples_leaf=1, min_weight_fraction_leaf=0., | ||
max_depth=3, init=None, random_state=None, | ||
max_features=None, verbose=0, | ||
|
@@ -1435,7 +1444,7 @@ def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100, | |
|
||
super(GradientBoostingClassifier, self).__init__( | ||
loss=loss, learning_rate=learning_rate, n_estimators=n_estimators, | ||
min_samples_split=min_samples_split, | ||
criterion=criterion, min_samples_split=min_samples_split, | ||
min_samples_leaf=min_samples_leaf, | ||
min_weight_fraction_leaf=min_weight_fraction_leaf, | ||
max_depth=max_depth, init=init, subsample=subsample, | ||
|
@@ -1643,6 +1652,14 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin): | |
of the input variables. | ||
Ignored if ``max_leaf_nodes`` is not None. | ||
|
||
criterion : string, optional (default="friedman_mse") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. versionadded |
||
The function to measure the quality of a split. Supported criteria | ||
are "friedman_mse" for the mean squared error with improvement | ||
score by Friedman, "mse" for mean squared error, and "mae" for | ||
the mean absolute error. The default value of "friedman_mse" is | ||
generally the best as it can provide a better approximation in | ||
some cases. | ||
|
||
min_samples_split : int, float, optional (default=2) | ||
The minimum number of samples required to split an internal node: | ||
|
||
|
@@ -1772,15 +1789,15 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin): | |
_SUPPORTED_LOSS = ('ls', 'lad', 'huber', 'quantile') | ||
|
||
def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100, | ||
subsample=1.0, min_samples_split=2, | ||
subsample=1.0, criterion='friedman_mse', min_samples_split=2, | ||
min_samples_leaf=1, min_weight_fraction_leaf=0., | ||
max_depth=3, init=None, random_state=None, | ||
max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None, | ||
warm_start=False, presort='auto'): | ||
|
||
super(GradientBoostingRegressor, self).__init__( | ||
loss=loss, learning_rate=learning_rate, n_estimators=n_estimators, | ||
min_samples_split=min_samples_split, | ||
criterion=criterion, min_samples_split=min_samples_split, | ||
min_samples_leaf=min_samples_leaf, | ||
min_weight_fraction_leaf=min_weight_fraction_leaf, | ||
max_depth=max_depth, init=init, subsample=subsample, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should have versionadded for
mae