21
21
22
22
from abc import ABCMeta , abstractmethod
23
23
24
+ import inspect
24
25
import numpy as np
25
26
from numpy .core .umath_tests import inner1d
26
27
@@ -108,12 +109,20 @@ def fit(self, X, y, sample_weight=None):
108
109
self .estimator_weights_ = np .zeros (self .n_estimators , dtype = np .float )
109
110
self .estimator_errors_ = np .ones (self .n_estimators , dtype = np .float )
110
111
112
+ # Create argsorted X for fast tree induction
113
+ X_argsorted = None
114
+
115
+ if "X_argsorted" in inspect .getargspec (self .base_estimator .fit ).args :
116
+ X_argsorted = np .asfortranarray (
117
+ np .argsort (X .T , axis = 1 ).astype (np .int32 ).T )
118
+
111
119
for iboost in xrange (self .n_estimators ):
112
<
10000
/td>120
# Boosting step
113
121
sample_weight , estimator_weight , estimator_error = self ._boost (
114
122
iboost ,
115
123
X , y ,
116
- sample_weight )
124
+ sample_weight ,
125
+ X_argsorted = X_argsorted )
117
126
118
127
# Early termination
119
128
if sample_weight is None :
@@ -139,7 +148,7 @@ def fit(self, X, y, sample_weight=None):
139
148
return self
140
149
141
150
@abstractmethod
142
- def _boost (self , iboost , X , y , sample_weight ):
151
+ def _boost (self , iboost , X , y , sample_weight , X_argsorted = None ):
143
152
"""Implement a single boost.
144
153
145
154
Warning: This method needs to be overriden by subclasses.
@@ -367,7 +376,7 @@ def fit(self, X, y, sample_weight=None):
367
376
368
377
return super (AdaBoostClassifier , self ).fit (X , y , sample_weight )
369
378
370
- def _boost (self , iboost , X , y , sample_weight ):
379
+ def _boost (self , iboost , X , y , sample_weight , X_argsorted = None ):
371
380
"""Implement a single boost.
372
381
373
382
Perform a single boost according to the real multi-class SAMME.R
@@ -403,17 +412,21 @@ def _boost(self, iboost, X, y, sample_weight):
403
412
If None then boosting has terminated early.
404
413
"""
405
414
if self .algorithm == 'SAMME.R' :
406
- return self ._boost_real (iboost , X , y , sample_weight )
415
+ return self ._boost_real (iboost , X , y , sample_weight , X_argsorted = X_argsorted )
407
416
408
417
else : # elif self.algorithm == "SAMME":
409
- return self ._boost_discrete (iboost , X , y , sample_weight )
418
+ return self ._boost_discrete (iboost , X , y , sample_weight , X_argsorted = X_argsorted )
410
419
411
- def _boost_real (self , iboost , X , y , sample_weight ):
420
+ def _boost_real (self , iboost , X , y , sample_weight , X_argsorted = None ):
412
421
"""Implement a single boost using the SAMME.R real algorithm."""
413
422
estimator = self ._make_estimator ()
414
423
415
- y_predict_proba = estimator .fit (
416
- X , y , sample_weight = sample_weight ).predict_proba (X )
424
+ if X_argsorted is not None :
425
+ estimator .fit (X , y , sample_weight = sample_weight , X_argsorted = X_argsorted )
426
+ else :
427
+ estimator .fit (X , y , sample_weight = sample_weight )
428
+
429
+ y_predict_proba = estimator .predict_proba (X )
417
430
418
431
if iboost == 0 :
419
432
self .classes_ = getattr (estimator , 'classes_' , None )
@@ -464,12 +477,16 @@ def _boost_real(self, iboost, X, y, sample_weight):
464
477
465
478
return sample_weight , 1. , estimator_error
466
479
467
- def _boost_discrete (self , iboost , X , y , sample_weight ):
480
+ def _boost_discrete (self , iboost , X , y , sample_weight , X_argsorted = None ):
468
481
"""Implement a single boost using the SAMME discrete algorithm."""
469
482
estimator = self ._make_estimator ()
470
483
471
- y_predict = estimator .fit (
472
- X , y , sample_weight = sample_weight ).predict (X )
484
+ if X_argsorted is not None :
485
+ estimator .fit (X , y , sample_weight = sample_weight , X_argsorted = X_argsorted )
486
+ else :
487
+ estimator .fit (X , y , sample_weight = sample_weight )
488
+
489
+ y_predict = estimator .predict (X )
473
490
474
491
if iboost == 0 :
475
492
self .classes_ = getattr (estimator , 'classes_' , None )
@@ -875,7 +892,7 @@ def fit(self, X, y, sample_weight=None):
875
892
# Fit
876
893
return super (AdaBoostRegressor , self ).fit (X , y , sample_weight )
877
894
878
- def _boost (self , iboost , X , y , sample_weight ):
895
+ def _boost (self , iboost , X , y , sample_weight , X_argsorted = None ):
879
896
"""Implement a single boost for regression
880
897
881
898
Perform a single boost according to the AdaBoost.R2 algorithm and
@@ -925,8 +942,8 @@ def _boost(self, iboost, X, y, sample_weight):
925
942
926
943
# Fit on the bootstrapped sample and obtain a prediction
927
944
# for all samples in the training set
928
- y_predict = estimator .fit (
929
- X [ bootstrap_idx ], y [ bootstrap_idx ]) .predict (X )
945
+ estimator .fit (X [ bootstrap_idx ], y [ bootstrap_idx ])
946
+ y_predict = estimator .predict (X )
930
947
931
948
error_vect = np .abs (y_predict - y )
932
949
error_max = error_vect .max ()
@@ -965,7 +982,6 @@ def _boost(self, iboost, X, y, sample_weight):
965
982
return sample_weight , estimator_weight , estimator_error
966
983
967
984
def _get_median_predict (self , X , limit = - 1 ):
968
-
969
985
if not self .estimators_ :
970
986
raise RuntimeError (
971
987
("{0} is not initialized. "
0 commit comments