@@ -136,8 +136,8 @@ def _parallel_predict_proba(trees, X, n_classes, n_outputs):
136
136
137
137
def _parallel_predict_paths (trees , X ):
138
138
"""Private function used to compute a batch of prediction paths within a job."""
139
- return [tree .predict ( X , return_paths = True ) for tree in trees ]
140
-
139
+ return [tree .decision_paths ( X ) for tree in trees ]
140
+
141
141
142
142
def _parallel_predict_regression (trees , X ):
143
143
"""Private function used to compute a batch of predictions within a job."""
@@ -303,6 +303,40 @@ def _validate_y(self, y):
303
303
# Default implementation
304
304
return y
305
305
306
+ def decision_paths (self , X ):
307
+ """Predict class or regression value for X and return decision paths leading to the prediction, from every tree.
308
+
309
+
310
+ Parameters
311
+ ----------
312
+ X : array-like of shape = [n_samples, n_features]
313
+ The input samples.
314
+
315
+ Returns
316
+ -------
317
+ y : list of arrays with shape = [n_estimators, n_samples, max_depth + 1]
318
+ Decision paths for each each tree and for eachprediction.
319
+ Each path is an array of node ids, starting with the root node id.
320
+ If a path is shorter than max_depth + 1, it is padded with -1 on the right.
321
+ """
322
+
323
+ # Check data
324
+ if getattr (X , "dtype" , None ) != DTYPE or X .ndim != 2 :
325
+ X = array2d (X , dtype = DTYPE )
326
+
327
+ # Assign chunk of trees to jobs
328
+ n_jobs , n_trees , starts = _partition_estimators (self )
329
+
330
+ # Parallel loop
331
+ path_list = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
332
+ backend = "threading" )(
333
+ delayed (_parallel_predict_paths )(
334
+ self .estimators_ [starts [i ]:starts [i + 1 ]], X )
335
+ for i in range (n_jobs ))
336
+ #unpack the nested list and return
337
+ return [lst for med_lst in path_list for lst in med_lst ]
338
+
339
+
306
340
@property
307
341
def feature_importances_ (self ):
308
342
"""Return the feature importances (the higher, the more important the
@@ -404,7 +438,7 @@ def _validate_y(self, y):
404
438
405
439
return y
406
440
407
- def predict (self , X , return_paths = True ):
441
+ def predict (self , X ):
408
442
"""Predict class for X.
409
443
410
444
The predicted class of an input sample is computed as the majority
@@ -420,25 +454,6 @@ def predict(self, X, return_paths = True):
420
454
y : array of shape = [n_samples] or [n_samples, n_outputs]
421
455
The predicted classes.
422
456
"""
423
-
424
-
425
- if return_paths :
426
- # Check data
427
- if getattr (X , "dtype" , None ) != DTYPE or X .ndim != 2 :
428
- X = array2d (X , dtype = DTYPE )
429
-
430
- # Assign chunk of trees to jobs
431
- n_jobs , n_trees , starts = _partition_estimators (self )
432
-
433
- # Parallel loop
434
- path_list = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
435
- backend = "threading" )(
436
- delayed (_parallel_predict_paths )(
437
- self .estimators_ [starts [i ]:starts [i + 1 ]], X )
438
- for i in range (n_jobs ))
439
- #unpack the nested list and return
440
- return [lst for med_lst in path_list for lst in med_lst ]
441
-
442
457
n_samples = len (X )
443
458
proba = self .predict_proba (X )
444
459
@@ -567,7 +582,7 @@ def __init__(self,
567
582
random_state = random_state ,
568
583
verbose = verbose )
569
584
570
- def predict (self , X , return_paths = False ):
585
+ def predict (self , X ):
571
586
"""Predict regression target for X.
572
587
573
588
The predicted regression target of an input sample is computed as the
@@ -591,27 +606,16 @@ def predict(self, X, return_paths = False):
591
606
n_jobs , n_trees , starts = _partition_estimators (self )
592
607
593
608
# Parallel loop
594
- if return_paths :
595
- path_list = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
596
- backend = "threading" )(
597
- delayed (_parallel_predict_paths )(
598
- self .estimators_ [starts [i ]:starts [i + 1 ]], X )
599
- for i in range (n_jobs ))
600
- #unpack the nested list and return
601
- return [lst for med_lst in path_list for lst in med_lst ]
602
- else :
603
- all_y_hat = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
604
- backend = "threading" )(
605
- delayed (_parallel_predict_regression )(
606
- self .estimators_ [starts [i ]:starts [i + 1 ]], X )
607
- for i in range (n_jobs ))
608
- # Reduce
609
- y_hat = sum (all_y_hat ) / len (self .estimators_ )
610
-
611
- return y_hat
609
+ all_y_hat = Parallel (n_jobs = n_jobs , verbose = self .verbose ,
610
+ backend = "threading" )(
611
+ delayed (_parallel_predict_regression )(
612
+ self .estimators_ [starts [i ]:starts [i + 1 ]], X )
613
+ for i in range (n_jobs ))
612
614
613
-
614
-
615
+ # Reduce
616
+ y_hat = sum (all_y_hat ) / len (self .estimators_ )
617
+
618
+ return y_hat
615
619
616
620
def _set_oob_score (self , X , y ):
617
621
n_samples = y .shape [0 ]
0 commit comments