scikit-learn
diff --git a/‎sklearn/ensemble/forest.py
Lines changed: 47 additions & 43 deletions b/‎sklearn/ensemble/forest.py
Lines changed: 47 additions & 43 deletions
@@ -136,8 +136,8 @@ def _parallel_predict_proba(trees, X, n_classes, n_outputs):
 
 def _parallel_predict_paths(trees, X):
     """Private function used to compute a batch of prediction paths within a job."""
-    return [tree.predict(X, return_paths = True) for tree in trees]
-      
+    return [tree.decision_paths(X) for tree in trees]
+
 
 def _parallel_predict_regression(trees, X):
     """Private function used to compute a batch of predictions within a job."""
@@ -303,6 +303,40 @@ def _validate_y(self, y):
         # Default implementation
         return y
 
+    def decision_paths(self, X):
+        """Predict class or regression value for X and return decision paths leading to the prediction, from every tree.
+        
+
+        Parameters
+        ----------
+        X : array-like of shape = [n_samples, n_features]
+            The input samples.
+
+        Returns
+        -------
+        y : list of arrays with shape = [n_estimators, n_samples, max_depth + 1]
+            Decision paths for each each tree and for eachprediction.
+            Each path is an array of node ids, starting with the root node id.
+            If a path is shorter than max_depth + 1, it is padded with -1 on the right.    
+        """
+                    
+        # Check data
+        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
+            X = array2d(X, dtype=DTYPE)
+
+        # Assign chunk of trees to jobs
+        n_jobs, n_trees, starts = _partition_estimators(self)
+
+        # Parallel loop
+        path_list = Parallel(n_jobs=n_jobs, verbose=self.verbose,
+                                 backend="threading")(
+                delayed(_parallel_predict_paths)(
+                    self.estimators_[starts[i]:starts[i + 1]], X)
+                for i in range(n_jobs))
+        #unpack the nested list and return
+        return [lst for med_lst in path_list for lst in med_lst]
+
+        
     @property
     def feature_importances_(self):
         """Return the feature importances (the higher, the more important the
@@ -404,7 +438,7 @@ def _validate_y(self, y):
 
         return y
 
-    def predict(self, X, return_paths = True):
+    def predict(self, X):
         """Predict class for X.
 
         The predicted class of an input sample is computed as the majority
@@ -420,25 +454,6 @@ def predict(self, X, return_paths = True):
         y : array of shape = [n_samples] or [n_samples, n_outputs]
             The predicted classes.
         """
-        
-        
-        if return_paths:
-            # Check data
-            if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
-                X = array2d(X, dtype=DTYPE)
-
-            # Assign chunk of trees to jobs
-            n_jobs, n_trees, starts = _partition_estimators(self)
-
-            # Parallel loop
-            path_list = Parallel(n_jobs=n_jobs, verbose=self.verbose,
-                                     backend="threading")(
-                    delayed(_parallel_predict_paths)(
-                        self.estimators_[starts[i]:starts[i + 1]], X)
-                    for i in range(n_jobs))
-            #unpack the nested list and return
-            return [lst for med_lst in path_list for lst in med_lst]
-        
         n_samples = len(X)
         proba = self.predict_proba(X)
 
@@ -567,7 +582,7 @@ def __init__(self,
             random_state=random_state,
             verbose=verbose)
 
-    def predict(self, X, return_paths = False):
+    def predict(self, X):
         """Predict regression target for X.
 
         The predicted regression target of an input sample is computed as the
@@ -591,27 +606,16 @@ def predict(self, X, return_paths = False):
         n_jobs, n_trees, starts = _partition_estimators(self)
 
         # Parallel loop
-        if return_paths:
-            path_list = Parallel(n_jobs=n_jobs, verbose=self.verbose,
-                                     backend="threading")(
-                    delayed(_parallel_predict_paths)(
-                        self.estimators_[starts[i]:starts[i + 1]], X)
-                    for i in range(n_jobs))
-            #unpack the nested list and return
-            return [lst for med_lst in path_list for lst in med_lst]
-        else:
-            all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose,
-                                 backend="threading")(
-                delayed(_parallel_predict_regression)(
-                    self.estimators_[starts[i]:starts[i + 1]], X)
-                for i in range(n_jobs))
-                # Reduce    
-            y_hat = sum(all_y_hat) / len(self.estimators_)
-    
-            return y_hat
+        all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose,
+                             backend="threading")(
+            delayed(_parallel_predict_regression)(
+                self.estimators_[starts[i]:starts[i + 1]], X)
+            for i in range(n_jobs))
 
-        
-        
+        # Reduce
+        y_hat = sum(all_y_hat) / len(self.estimators_)
+
+        return y_hat
 
     def _set_oob_score(self, X, y):
         n_samples = y.shape[0]