@@ -76,22 +76,6 @@ def predict(self, X):
76
76
return y
77
77
78
78
79
- class ClassPrior2Predictor (object ):
80
- """A simple initial estimator that predicts the mean
81
- of the training targets.
82
- """
83
-
84
- prior = None
85
-
86
- def fit (self , X , y ):
87
- self .prior = np .log (y .sum () / float (y .shape [0 ] - y .sum ()))
88
-
89
- def predict (self , X ):
90
- y = np .empty ((X .shape [0 ],), dtype = np .float64 )
91
- y .fill (self .prior )
92
- return y
93
-
94
-
95
79
class ClassPriorPredictor (object ):
96
80
"""A simple initial estimator that predicts the mean
97
81
of the training targets.
@@ -100,8 +84,7 @@ class ClassPriorPredictor(object):
100
84
prior = None
101
85
102
86
def fit (self , X , y ):
103
- pos_prior = y [y == - 1 ].shape [0 ] / float (y .shape [0 ])
104
- self .prior = 0.5 * np .log2 (pos_prior / (1.0 - pos_prior ))
87
+ self .prior = np .log (y .sum () / float (y .shape [0 ] - y .sum ()))
105
88
106
89
def predict (self , X ):
107
90
y = np .empty ((X .shape [0 ],), dtype = np .float64 )
@@ -170,8 +153,8 @@ def negative_gradient(self, y, pred):
170
153
171
154
def _update_terminal_region (self , node , X , y , residual , pred ):
172
155
"""LAD updates terminal regions to median estimates. """
173
- node .value = np .asanyarray (np .median (y .take (node .sample_mask , axis = 0 ) - \
174
- pred .take (node .sample_mask , axis = 0 )))
156
+ node .value = np .asanyarray (np .median (y .take (node .terminal_region , axis = 0 ) - \
157
+ pred .take (node .terminal_region , axis = 0 )))
175
158
176
159
177
160
## class HuberError(LossFunction):
@@ -189,14 +172,14 @@ def _update_terminal_region(self, node, X, y, residual, pred):
189
172
## def _update_terminal_region(self, node, X, y, residual, pred):
190
173
## """LAD updates terminal regions to median estimates. """
191
174
## ## FIXME copied from LAD, still TODO
192
- ## node.value = np.asanyarray(np.median(y.take(node.sample_mask , axis=0) - \
193
- ## pred.take(node.sample_mask , axis=0)))
175
+ ## node.value = np.asanyarray(np.median(y.take(node.terminal_region , axis=0) - \
176
+ ## pred.take(node.terminal_region , axis=0)))
194
177
195
178
196
- class BernoulliDeviance (LossFunction ):
179
+ class BinomialDeviance (LossFunction ):
197
180
198
181
def init_estimator (self ):
199
- return ClassPrior2Predictor ()
182
+ return ClassPriorPredictor ()
200
183
201
184
def __call__ (self , y , pred ):
202
185
"""Compute the deviance (= negative log-likelihood). """
@@ -207,45 +190,26 @@ def negative_gradient(self, y, pred):
207
190
208
191
def _update_terminal_region (self , node , X , y , residual , pred ):
209
192
"""Make a single Newton-Raphson step. """
210
- residual = residual .take (node .sample_mask , axis = 0 )
211
- y = y .take (node .sample_mask , axis = 0 )
212
-
213
- node .value = np .asanyarray (residual .sum () / \
214
- np .sum ((y - residual ) * (1.0 - y + residual )),
215
- dtype = np .float64 )
216
193
217
- # FIXME free mem - rename `sample_mask` since its actually an index arr
218
- del node .sample_mask
219
- node .sample_mask = None
220
-
221
-
222
- class BinomialDeviance (LossFunction ):
223
-
224
- def init_estimator (self ):
225
- return ClassPriorPredictor ()
226
-
227
- def __call__ (self , y , pred ):
228
- return np .log2 (1.0 + np .exp (- 2.0 * y * pred ))
194
+ residual = residual .take (node .terminal_region , axis = 0 )
195
+ y = y .take (node .terminal_region , axis = 0 )
229
196
230
- def negative_gradient ( self , y , pred ):
231
- return ( 2.0 * y ) / (1.0 + np . exp ( 2.0 * y * pred ))
197
+ numerator = residual . sum ()
198
+ denominator = np . sum (( y - residual ) * (1.0 - y + residual ))
232
199
233
- def _update_terminal_region (self , node , X , y , residual , pred ):
234
- """Make a single Newton-Raphson step. """
235
- targets = residual .take (node .sample_mask , axis = 0 )
236
- abs_targets = np .abs (targets )
237
- node .value = np .asanyarray (targets .sum () / np .sum (abs_targets * \
238
- (2.00000001 - abs_targets )))
200
+ if denominator == 0.0 :
201
+ node .value = np .array (0.0 , dtype = np .float64 )
202
+ else :
203
+ node .value = np .asanyarray (numerator / denominator , dtype = np .float64 )
239
204
240
205
# FIXME free mem - rename `sample_mask` since its actually an index arr
241
- del node .sample_mask
242
- node .sample_mask = None
206
+ del node .terminal_region
207
+ node .terminal_region = None
243
208
244
209
245
210
LOSS_FUNCTIONS = {'ls' : LeastSquaresError ,
246
211
'lad' : LeastAbsoluteError ,
247
- 'deviance' : BinomialDeviance ,
248
- 'bernoulli' : BernoulliDeviance }
212
+ 'deviance' : BinomialDeviance }
249
213
250
214
251
215
class BaseGradientBoosting (BaseEstimator ):
@@ -357,14 +321,14 @@ def fit(self, X, y, monitor=None):
357
321
#print "Iteration %d - build_tree - in %fs" % (i, time() - t0)
358
322
359
323
360
- # assert tree.is_leaf == False
361
-
324
+ assert tree .is_leaf != True
325
+
362
326
loss .update_terminal_regions (tree , X , y , residual , y_pred )
363
327
#print "Iteration %d - update - in %fs" % (i, time() - t0)
364
328
self .trees .append (tree )
365
329
366
330
y_pred = self ._predict (X , old_pred = y_pred )
367
-
331
+
368
332
if monitor :
369
333
monitor (self , i )
370
334
0 commit comments