8000 ENH: precompute X_argsorted when possible · seckcoder/scikit-learn@4a76b06 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4a76b06

Browse files
committed
ENH: precompute X_argsorted when possible
1 parent 9f1c346 commit 4a76b06

File tree

1 file changed

+31
-15
lines changed

1 file changed

+31
-15
lines changed

sklearn/ensemble/weight_boosting.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
from abc import ABCMeta, abstractmethod
2323

24+
import inspect
2425
import numpy as np
2526
from numpy.core.umath_tests import inner1d
2627

@@ -108,12 +109,20 @@ def fit(self, X, y, sample_weight=None):
108109
self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float)
109110
self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float)
110111

112+
# Create argsorted X for fast tree induction
113+
X_argsorted = None
114+
115+
if "X_argsorted" in inspect.getargspec(self.base_estimator.fit).args:
116+
X_argsorted = np.asfortranarray(
117+
np.argsort(X.T, axis=1).astype(np.int32).T)
118+
111119
for iboost in xrange(self.n_estimators):
112< 10000 /td>120
# Boosting step
113121
sample_weight, estimator_weight, estimator_error = self._boost(
114122
iboost,
115123
X, y,
116-
sample_weight)
124+
sample_weight,
125+
X_argsorted=X_argsorted)
117126

118127
# Early termination
119128
if sample_weight is None:
@@ -139,7 +148,7 @@ def fit(self, X, y, sample_weight=None):
139148
return self
140149

141150
@abstractmethod
142-
def _boost(self, iboost, X, y, sample_weight):
151+
def _boost(self, iboost, X, y, sample_weight, X_argsorted=None):
143152
"""Implement a single boost.
144153
145154
Warning: This method needs to be overriden by subclasses.
@@ -367,7 +376,7 @@ def fit(self, X, y, sample_weight=None):
367376

368377
return super(AdaBoostClassifier, self).fit(X, y, sample_weight)
369378

370-
def _boost(self, iboost, X, y, sample_weight):
379+
def _boost(self, iboost, X, y, sample_weight, X_argsorted=None):
371380
"""Implement a single boost.
372381
373382
Perform a single boost according to the real multi-class SAMME.R
@@ -403,17 +412,21 @@ def _boost(self, iboost, X, y, sample_weight):
403412
If None then boosting has terminated early.
404413
"""
405414
if self.algorithm == 'SAMME.R':
406-
return self._boost_real(iboost, X, y, sample_weight)
415+
return self._boost_real(iboost, X, y, sample_weight, X_argsorted=X_argsorted)
407416

408417
else: # elif self.algorithm == "SAMME":
409-
return self._boost_discrete(iboost, X, y, sample_weight)
418+
return self._boost_discrete(iboost, X, y, sample_weight, X_argsorted=X_argsorted)
410419

411-
def _boost_real(self, iboost, X, y, sample_weight):
420+
def _boost_real(self, iboost, X, y, sample_weight, X_argsorted=None):
412421
"""Implement a single boost using the SAMME.R real algorithm."""
413422
estimator = self._make_estimator()
414423

415-
y_predict_proba = estimator.fit(
416-
X, y, sample_weight=sample_weight).predict_proba(X)
424+
if X_argsorted is not None:
425+
estimator.fit(X, y, sample_weight=sample_weight, X_argsorted=X_argsorted)
426+
else:
427+
estimator.fit(X, y, sample_weight=sample_weight)
428+
429+
y_predict_proba = estimator.predict_proba(X)
417430

418431
if iboost == 0:
419432
self.classes_ = getattr(estimator, 'classes_', None)
@@ -464,12 +477,16 @@ def _boost_real(self, iboost, X, y, sample_weight):
464477

465478
return sample_weight, 1., estimator_error
466479

467-
def _boost_discrete(self, iboost, X, y, sample_weight):
480+
def _boost_discrete(self, iboost, X, y, sample_weight, X_argsorted=None):
468481
"""Implement a single boost using the SAMME discrete algorithm."""
469482
estimator = self._make_estimator()
470483

471-
y_predict = estimator.fit(
472-
X, y, sample_weight=sample_weight).predict(X)
484+
if X_argsorted is not None:
485+
estimator.fit(X, y, sample_weight=sample_weight, X_argsorted=X_argsorted)
486+
else:
487+
estimator.fit(X, y, sample_weight=sample_weight)
488+
489+
y_predict = estimator.predict(X)
473490

474491
if iboost == 0:
475492
self.classes_ = getattr(estimator, 'classes_', None)
@@ -875,7 +892,7 @@ def fit(self, X, y, sample_weight=None):
875892
# Fit
876893
return super(AdaBoostRegressor, self).fit(X, y, sample_weight)
877894

878-
def _boost(self, iboost, X, y, sample_weight):
895+
def _boost(self, iboost, X, y, sample_weight, X_argsorted=None):
879896
"""Implement a single boost for regression
880897
881898
Perform a single boost according to the AdaBoost.R2 algorithm and
@@ -925,8 +942,8 @@ def _boost(self, iboost, X, y, sample_weight):
925942

926943
# Fit on the bootstrapped sample and obtain a prediction
927944
# for all samples in the training set
928-
y_predict = estimator.fit(
929-
X[bootstrap_idx], y[bootstrap_idx]).predict(X)
945+
estimator.fit(X[bootstrap_idx], y[bootstrap_idx])
946+
y_predict = estimator.predict(X)
930947

931948
error_vect = np.abs(y_predict - y)
932949
error_max = error_vect.max()
@@ -965,7 +982,6 @@ def _boost(self, iboost, X, y, sample_weight):
965982
return sample_weight, estimator_weight, estimator_error
966983

967984
def _get_median_predict(self, X, limit=-1):
968-
969985
if not self.estimators_:
970986
raise RuntimeError(
971987
("{0} is not initialized. "

0 commit comments

Comments
 (0)
0