8000 FIX reverted method order · scikit-learn/scikit-learn@d2cf102 · GitHub
[go: up one dir, main page]

Skip to content

Commit d2cf102

Browse files
committed
FIX reverted method order
1 parent 830cb13 commit d2cf102

File tree

1 file changed

+167
-167
lines changed

1 file changed

+167
-167
lines changed

sklearn/ensemble/gradient_boosting.py

Lines changed: 167 additions & 167 deletions
A851
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,173 @@ def __init__(self, loss, learning_rate, n_estimators, min_samples_split,
732732

733733
self.estimators_ = np.empty((0, 0), dtype=np.object)
734734

735+
def _fit_stage(self, i, X, y, y_pred, sample_weight, sample_mask,
736+
criterion, splitter, random_state):
737+
"""Fit another stage of ``n_classes_`` trees to the boosting model. """
738+
739+
assert sample_mask.dtype == np.bool
740+
loss = self.loss_
741+
original_y = y
742+
743+
for k in range(loss.K):
744+
if loss.is_multi_class:
745+
y = np.array(original_y == k, dtype=np.float64)
746+
747+
residual = loss.negative_gradient(y, y_pred, k=k,
748+
sample_weight=sample_weight)
749+
750+
# induce regression tree on residuals
751+
tree = DecisionTreeRegressor(
752+
criterion=criterion,
753+
splitter=splitter,
754+
max_depth=self.max_depth,
755+
min_samples_split=self.min_samples_split,
756+
min_samples_leaf=self.min_samples_leaf,
757+
min_weight_fraction_leaf=self.min_weight_fraction_leaf,
758+
max_features=self.max_features,
759+
max_leaf_nodes=self.max_leaf_nodes,
760+
random_state=random_state)
761+
762+
if self.subsample < 1.0:
763+
# no inplace multiplication!
764+
sample_weight = sample_weight * sample_mask.astype(np.float64)
765+
766+
tree.fit(X, residual, sample_weight=sample_weight,
767+
check_input=False)
768+
769+
# update tree leaves
770+
loss.update_terminal_regions(tree.tree_, X, y, residual, y_pred,
771+
sample_weight, sample_mask,
772+
self.learning_rate, k=k)
773+
774+
# add tree to ensemble
775+
self.estimators_[i, k] = tree
776+
777+
return y_pred
778+
779+
def _check_params(self):
780+
"""Check validity of parameters and raise ValueError if not valid. """
781+
if self.n_estimators <= 0:
782+
raise ValueError("n_estimators must be greater than 0 but "
783+
"was %r" % self.n_estimators)
784+
785+
if self.learning_rate <= 0.0:
786+
raise ValueError("learning_rate must be greater than 0 but "
787+
"was %r" % self.learning_rate)
788+
789+
if (self.loss not in self._SUPPORTED_LOSS
790+
or self.loss not in LOSS_FUNCTIONS):
791+
raise ValueError("Loss '{0:s}' not supported. ".format(self.loss))
792+
793+
if self.loss == 'deviance':
794+
loss_class = (MultinomialDeviance
795+
if len(self.classes_) > 2
796+
else BinomialDeviance)
797+
else:
798+
loss_class = LOSS_FUNCTIONS[self.loss]
799+
800+
if self.loss in ('huber', 'quantile'):
801+
self.loss_ = loss_class(self.n_classes_, self.alpha)
802+
else:
803+
self.loss_ = loss_class(self.n_classes_)
804+
805+
if not (0.0 < self.subsample <= 1.0):
806+
raise ValueError("subsample must be in (0,1] but "
807+
"was %r" % self.subsample)
808+
809+
if self.init is not None:
810+
if isinstance(self.init, six.string_types):
811+
if self.init not in INIT_ESTIMATORS:
812+
raise ValueError('init="%s" is not supported' % self.init)
813+
else:
814+
if (not hasattr(self.init, 'fit')
815+
or not hasattr(self.init, 'predict')):
816+
raise ValueError("init=%r must be valid BaseEstimator "
817+
"and support both fit and "
818+
"predict" % self.init)
819+
820+
if not (0.0 < self.alpha < 1.0):
821+
raise ValueError("alpha must be in (0.0, 1.0) but "
822+
"was %r" % self.alpha)
823+
824+
if isinstance(self.max_features, six.string_types):
825+
if self.max_features == "auto":
826+
# if is_classification
827+
if self.n_classes_ > 1:
828+
max_features = max(1, int(np.sqrt(self.n_features)))
829+
else:
830+
# is regression
831+
max_features = self.n_features
832+
elif self.max_features == "sqrt":
833+
max_features = max(1, int(np.sqrt(self.n_features)))
834+
elif self.max_features == "log2":
835+
max_features = max(1, int(np.log2(self.n_features)))
836+
else:
837+
raise ValueError("Invalid value for max_features: %r. "
838+
"Allowed string values are 'auto', 'sqrt' "
839+
"or 'log2'." % self.max_features)
840+
elif self.max_features is None:
841+
max_features = self.n_features
842+
elif isinstance(self.max_features, (numbers.Integral, np.integer)):
843+
max_features = self.max_features
844+
else: # float
845+
if 0. < self.max_features <= 1.:
846+
max_features = max(int(self.max_features * self.n_features), 1)
847+
else:
848+
raise ValueError("max_features must be in (0, n_features]")
849+
850+
self.max_features_ = max_features
851+
852+
def _init_state(self):
853+
"""Initialize model state and allocate model state data structures. """
854+
855+
if self.init is None:
856+
self.init_ = self.loss_.init_estimator()
857+
elif isinstance(self.init, six.string_types):
858+
self.init_ = INIT_ESTIMATORS[self.init]()
859+
else:
860+
self.init_ = self.init
861+
862+
self.estimators_ = np.empty((self.n_estimators, self.loss_.K),
863+
dtype=np.object)
864+
self.train_score_ = np.zeros((self.n_estimators,), dtype=np.float64)
865+
# do oob?
866+
if self.subsample < 1.0:
867+
self.oob_improvement_ = np.zeros((self.n_estimators),
868+
dtype=np.float64)
869+
870+
def _clear_state(self):
871+
"""Clear the state of the gradient boosting model. """
872+
if hasattr(self, 'estimators_'):
873+
self.estimators_ = np.empty((0, 0), dtype=np.object)
874+
if hasattr(self, 'train_score_'):
875+
del self.train_score_
876+
if hasattr(self, 'oob_improvement_'):
877+
del self.oob_improvement_
878+
if hasattr(self, 'init_'):
879+
del self.init_
880+
881+
def _resize_state(self):
882+
"""Add additional ``n_estimators`` entries to all attributes. """
883+
# self.n_estimators is the number of additional est to fit
884+
total_n_estimators = self.n_estimators
885+
if total_n_estimators < self.estimators_.shape[0]:
886+
raise ValueError('resize with smaller n_estimators %d < %d' %
887+
(total_n_estimators, self.estimators_[0]))
888+
889+
self.estimators_.resize((total_n_estimators, self.loss_.K))
890+
self.train_score_.resize(total_n_estimators)
891+
if (self.subsample < 1 or hasattr(self, 'oob_improvement_')):
892+
# if do oob resize arrays or create new if not available
893+
if hasattr(self, 'oob_improvement_'):
894+
self.oob_improvement_.resize(total_n_estimators)
895+
else:
896+
self.oob_improvement_ = np.zeros((total_n_estimators,),
897+
dtype=np.float64)
898+
899+
def _is_initialized(self):
900+
return len(getattr(self, 'estimators_', [])) > 0
901+
735902
def fit(self, X, y, sample_weight=None, monitor=None):
736903
"""Fit the gradient boosting model.
737904
@@ -935,173 +1102,6 @@ def _fit_stages(self, X, y, y_pred, sample_weight, random_state,
9351102
break
9361103
return i + 1
9371104

938-
def _fit_stage(self, i, X, y, y_pred, sample_weight, sample_mask,
939-
criterion, splitter, random_state):
940-
"""Fit another stage of ``n_classes_`` trees to the boosting model. """
941-
942-
assert sample_mask.dtype == np.bool
943-
loss = self.loss_
944-
original_y = y
945-
946-
for k in range(loss.K):
947-
if loss.is_multi_class:
948-
y = np.array(original_y == k, dtype=np.float64)
949-
950-
residual = loss.negative_gradient(y, y_pred, k=k,
951-
sample_weight=sample_weight)
952-
953-
# induce regression tree on residuals
954-
tree = DecisionTreeRegressor(
955-
criterion=criterion,
956-
splitter=splitter,
957-
max_depth=self.max_depth,
958-
min_samples_split=self.min_samples_split,
959-
min_samples_leaf=self.min_samples_leaf,
960-
min_weight_fraction_leaf=self.min_weight_fraction_leaf,
961-
max_features=self.max_features,
962-
max_leaf_nodes=self.max_leaf_nodes,
963-
random_state=random_state)
964-
965-
if self.subsample < 1.0:
966-
# no inplace multiplication!
967-
sample_weight = sample_weight * sample_mask.astype(np.float64)
968-
969-
tree.fit(X, residual, sample_weight=sample_weight,
970-
check_input=False)
971-
972-
# update tree leaves
973-
loss.update_terminal_regions(tree.tree_, X, y, residual, y_pred,
974-
sample_weight, sample_mask,
975-
self.learning_rate, k=k)
976-
977-
# add tree to ensemble
978-
self.estimators_[i, k] = tree
979-
980-
return y_pred
981-
982-
def _check_params(self):
983-
"""Check validity of parameters and raise ValueError if not valid. """
984-
if self.n_estimators <= 0:
985-
raise ValueError("n_estimators must be greater than 0 but "
986-
"was %r" % self.n_estimators)
987-
988-
if self.learning_rate <= 0.0:
989-
raise ValueError("learning_rate must be greater than 0 but "
990-
"was %r" % self.learning_rate)
991-
992-
if (self.loss not in self._SUPPORTED_LOSS
993-
or self.loss not in LOSS_FUNCTIONS):
994-
raise ValueError("Loss '{0:s}' not supported. ".format(self.loss))
995-
996-
if self.loss == 'deviance':
997-
loss_class = (MultinomialDeviance
998-
if len(self.classes_) > 2
999-
else BinomialDeviance)
1000-
else:
1001-
loss_class = LOSS_FUNCTIONS[self.loss]
1002-
1003-
if self.loss in ('huber', 'quantile'):
1004-
self.loss_ = loss_class(self.n_classes_, self.alpha)
1005-
else:
1006-
self.loss_ = loss_class(self.n_classes_)
1007-
1008-
if not (0.0 < self.subsample <= 1.0):
1009-
raise ValueError("subsample must be in (0,1] but "
1010-
"was %r" % self.subsample)
1011-
1012-
if self.init is not None:
1013-
if isinstance(self.init, six.string_types):
1014-
if self.init not in INIT_ESTIMATORS:
1015-
raise ValueError('init="%s" is not supported' % self.init)
1016-
else:
1017-
if (not hasattr(self.init, 'fit')
1018-
or not hasattr(self.init, 'predict')):
1019-
raise ValueError("init=%r must be valid BaseEstimator "
1020-
"and support both fit and "
1021-
"predict" % self.init)
1022-
1023-
if not (0.0 < self.alpha < 1.0):
1024-
raise ValueError("alpha must be in (0.0, 1.0) but "
1025-
"was %r" % self.alpha)
1026-
1027-
if isinstance(self.max_features, six.string_types):
1028-
if self.max_features == "auto":
1029-
# if is_classification
1030-
if self.n_classes_ > 1:
1031-
max_features = max(1, int(np.sqrt(self.n_features)))
1032-
else:
1033-
# is regression
1034-
max_features = self.n_features
1035-
elif self.max_features == "sqrt":
1036-
max_features = max(1, int(np.sqrt(self.n_features)))
1037-
elif self.max_features == "log2":
1038-
max_features = max(1, int(np.log2(self.n_features)))
1039-
else:
1040-
raise ValueError("Invalid value for max_features: %r. "
1041-
"Allowed string values are 'auto', 'sqrt' "
1042-
"or 'log2'." % self.max_features)
1043-
elif self.max_features is None:
1044-
max_features = self.n_features
1045-
elif isinstance(self.max_features, (numbers.Integral, np.integer)):
1046-
max_features = self.max_features
1047-
else: # float
1048-
if 0. < self.max_features <= 1.:
1049-
max_features = max(int(self.max_features * self.n_features), 1)
1050-
else:
1051-
raise ValueError("max_features must be in (0, n_features]")
1052-
1053-
self.max_features_ = max_features
1054-
1055-
def _init_state(self):
1056-
"""Initialize model state and allocate model state data structures. """
1057-
1058-
if self.init is None:
1059-
self.init_ = self.loss_.init_estimator()
1060-
elif isinstance(self.init, six.string_types):
1061-
self.init_ = INIT_ESTIMATORS[self.init]()
1062-
else:
1063-
self.init_ = self.init
1064-
1065-
self.estimators_ = np.empty((self.n_estimators, self.loss_.K),
1066-
dtype=np.object)
1067-
self.train_score_ = np.zeros((self.n_estimators,), dtype=np.float64)
1068-
# do oob?
1069-
if self.subsample < 1.0:
1070-
self.oob_improvement_ = np.zeros((self.n_estimators),
1071-
dtype=np.float64)
1072-
1073-
def _clear_state(self):
1074-
"""Clear the state of the gradient boosting model. """
1075-
if hasattr(self, 'estimators_'):
1076-
self.estimators_ = np.empty((0, 0), dtype=np.object)
1077-
if hasattr(self, 'train_score_'):
1078-
del self.train_score_
1079-
if hasattr(self, 'oob_improvement_'):
1080-
del self.oob_improvement_
1081-
if hasattr(self, 'init_'):
1082-
del self.init_
1083-
1084-
def _resize_state(self):
1085-
"""Add additional ``n_estimators`` entries to all attributes. """
1086-
# self.n_estimators is the number of additional est to fit
1087-
total_n_estimators = self.n_estimators
1088-
if total_n_estimators < self.estimators_.shape[0]:
1089-
raise ValueError('resize with smaller n_estimators %d < %d' %
1090-
(total_n_estimators, self.estimators_[0]))
1091-
1092-
self.estimators_.resize((total_n_estimators, self.loss_.K))
1093-
self.train_score_.resize(total_n_estimators)
1094-
if (self.subsample < 1 or hasattr(self, 'oob_improvement_')):
1095-
# if do oob resize arrays or create new if not available
1096-
if hasattr(self, 'oob_improvement_'):
1097-
self.oob_improvement_.resize(total_n_estimators)
1098-
else:
1099-
self.oob_improvement_ = np.zeros((total_n_estimators,),
1100-
dtype=np.float64)
1101-
1102-
def _is_initialized(self):
1103-
return len(getattr(self, 'estimators_', [])) > 0
1104-
11051105
def _make_estimator(self, append=True):
11061106
# we don't need _make_estimator
11071107
raise NotImplementedError()

0 commit comments

Comments
 (0)
0