scikit-learn
diff --git a/‎sklearn/tree/_classes.py
Lines changed: 3 additions & 1 deletion b/‎sklearn/tree/_classes.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎sklearn/tree/_oblique_splitter.pxd
Lines changed: 14 additions & 0 deletions b/‎sklearn/tree/_oblique_splitter.pxd
Lines changed: 14 additions & 0 deletions
diff --git a/‎sklearn/tree/_oblique_splitter.pyx
Lines changed: 18 additions & 3 deletions b/‎sklearn/tree/_oblique_splitter.pyx
Lines changed: 18 additions & 3 deletions
diff --git a/‎sklearn/tree/_oblique_tree.pxd
Lines changed: 2 additions & 1 deletion b/‎sklearn/tree/_oblique_tree.pxd
Lines changed: 2 additions & 1 deletion
diff --git a/‎sklearn/tree/_oblique_tree.pyx
Lines changed: 6 additions & 5 deletions b/‎sklearn/tree/_oblique_tree.pyx
Lines changed: 6 additions & 5 deletions
diff --git a/‎sklearn/tree/_split_record.pxd
Lines changed: 0 additions & 25 deletions b/‎sklearn/tree/_split_record.pxd
Lines changed: 0 additions & 25 deletions
diff --git a/‎sklearn/tree/_splitter.pxd
Lines changed: 1 addition & 37 deletions b/‎sklearn/tree/_splitter.pxd
Lines changed: 1 addition & 37 deletions
diff --git a/‎sklearn/tree/_splitter.pyx
Lines changed: 6 additions & 0 deletions b/‎sklearn/tree/_splitter.pyx
Lines changed: 6 additions & 0 deletions
diff --git a/‎sklearn/tree/_tree.pxd
Lines changed: 2 additions & 2 deletions b/‎sklearn/tree/_tree.pxd
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/tree/_tree.pyx
Lines changed: 25 additions & 8 deletions b/‎sklearn/tree/_tree.pyx
Lines changed: 25 additions & 8 deletions
diff --git a/‎sklearn/tree/test_tree.py
Lines changed: 6 additions & 2 deletions b/‎sklearn/tree/test_tree.py
Lines changed: 6 additions & 2 deletions
@@ -438,6 +438,7 @@ def fit(self, X, y, sample_weight=None, check_input=True):
         # Use BestFirst if max_leaf_nodes given; use DepthFirst otherwise
         if max_leaf_nodes < 0:
             builder = DepthFirstTreeBuilder(
+                splitter,
                 min_samples_split,
                 min_samples_leaf,
                 min_weight_leaf,
@@ -446,6 +447,7 @@ def fit(self, X, y, sample_weight=None, check_input=True):
             )
         else:
             builder = BestFirstTreeBuilder(
+                splitter,
                 min_samples_split,
                 min_samples_leaf,
                 min_weight_leaf,
@@ -454,7 +456,7 @@ def fit(self, X, y, sample_weight=None, check_input=True):
                 self.min_impurity_decrease,
             )
 
-        builder.build(self.tree_, splitter, X, y, sample_weight)
+        builder.build(self.tree_, X, y, sample_weight)
 
         if self.n_outputs_ == 1 and is_classifier(self):
             self.n_classes_ = self.n_classes_[0]
 
@@ -24,6 +24,20 @@ from ._splitter cimport sort
 from ._split_record cimport SplitRecord
 from libcpp.vector cimport vector
 
+cdef struct ObliqueSplitRecord:
+    # Data to track sample split
+    SIZE_t feature         # Which feature to split on.
+    SIZE_t pos             # Split samples array at the given position,
+                           # i.e. count of samples below threshold for feature.
+                           # pos is >= end if the node is a leaf.
+    double threshold       # Threshold to split at.
+    double improvement     # Impurity improvement given parent node.
+    double impurity_left   # Impurity of the left split.
+    double impurity_right  # Impurity of the right split.
+
+    vector[DTYPE_t]* proj_vec_weights   # weights of the vector
+    vector[SIZE_t]* proj_vec_indices    # indices of the features
+
 
 cdef class ObliqueSplitter(Splitter):
     # The splitter searches in the input space for a combination of features and a threshold
 
@@ -42,7 +42,7 @@ cdef DTYPE_t FEATURE_THRESHOLD = 1e-7
 cdef DTYPE_t EXTRACT_NNZ_SWITCH = 0.1
 
 
-cdef inline void _init_split(SplitRecord* self, SIZE_t start_pos) nogil:
+cdef inline void _init_split(ObliqueSplitRecord* self, SIZE_t start_pos) nogil:
     self.impurity_left = INFINITY
     self.impurity_right = INFINITY
     self.pos = start_pos
@@ -164,6 +164,10 @@ cdef class ObliqueSplitter(Splitter):
 
         pass
 
+    cdef int pointer_size(self) nogil:
+        """Get size of a pointer to record for ObliqueSplitter."""
+
+        return sizeof(ObliqueSplitRecord)
 
 cdef class BaseDenseObliqueSplitter(ObliqueSplitter):
 
@@ -243,6 +247,9 @@ cdef class BestObliqueSplitter(BaseDenseObliqueSplitter):
         Returns -1 in case of failure to allocate memory (and raise MemoryError)
         or 0 otherwise.
         """
+        # typecast the pointer to an ObliqueSplitRecord
+        cdef ObliqueSplitRecord* oblique_split = <ObliqueSplitRecord*>(split)
+
         cdef SIZE_t* samples = self.samples
         cdef SIZE_t start = self.start
         cdef SIZE_t end = self.end
@@ -262,7 +269,7 @@ cdef class BestObliqueSplitter(BaseDenseObliqueSplitter):
 
         # keep track of split record for current node and the best split
         # found among the sampled projection vectors
-        cdef SplitRecord best, current
+        cdef ObliqueSplitRecord best, current
 
         cdef double current_proxy_improvement = -INFINITY
         cdef double best_proxy_improvement = -INFINITY
@@ -365,6 +372,14 @@ cdef class BestObliqueSplitter(BaseDenseObliqueSplitter):
                 impurity, best.impurity_left, best.impurity_right)
 
         # Return values
-        split[0] = best
+        deref(oblique_split).proj_vec_indices = best.proj_vec_indices
+        deref(oblique_split).proj_vec_weights = best.proj_vec_weights
+        deref(oblique_split).feature = best.feature
+        deref(oblique_split).pos = best.pos
+        deref(oblique_split).threshold = best.threshold
+        deref(oblique_split).improvement = best.improvement
+        deref(oblique_split).impurity_left = best.impurity_left
+        deref(oblique_split).impurity_right = best.impurity_right
+
         # n_constant_features[0] = n_total_constants
         return 0
@@ -21,11 +21,12 @@ from ._tree cimport UINT32_t         # Unsigned 32 bit integer
 from ._tree cimport Tree, Node, TreeBuilder
 
 from ._split_record cimport SplitRecord
+from ._oblique_splitter cimport ObliqueSplitRecord
 
 cdef class ObliqueTree(Tree):
     cdef vector[vector[DTYPE_t]] proj_vec_weights # (capacity, n_features) array of projection vectors
     cdef vector[vector[SIZE_t]] proj_vec_indices  # (capacity, n_features) array of projection vectors
 
-    cdef int _set_node_values(self, SplitRecord split_node, Node *node)  nogil except -1
+    cdef int _set_node_values(self, SplitRecord* split_node, Node *node)  nogil except -1
     cdef DTYPE_t _compute_feature(self, const DTYPE_t[:] X_ndarray, Node *node, SIZE_t node_id) nogil
     cdef int _resize_c(self, SIZE_t capacity=*) nogil except -1
@@ -211,18 +211,19 @@ cdef class ObliqueTree(Tree):
         self.capacity = capacity
         return 0
 
-    cdef int _set_node_values(self, SplitRecord split_node, Node *node) nogil except -1:
+    cdef int _set_node_values(self, SplitRecord* split_node, Node *node) nogil except -1:
         """Set node data.
         """
+        cdef ObliqueSplitRecord* oblique_split_node = <ObliqueSplitRecord*>(split_node)
         cdef SIZE_t node_id = self.node_count
 
-        node.feature = split_node.feature
-        node.threshold = split_node.threshold
+        node.feature = deref(oblique_split_node).feature
+        node.threshold = deref(oblique_split_node).threshold
 
         # oblique trees store the projection indices and weights
         # inside the tree itself
-        self.proj_vec_weights[node_id] = deref(split_node.proj_vec_weights)
-        self.proj_vec_indices[node_id] = deref(split_node.proj_vec_indices)
+        self.proj_vec_weights[node_id] = deref(deref(oblique_split_node).proj_vec_weights)
+        self.proj_vec_indices[node_id] = deref(deref(oblique_split_node).proj_vec_indices)
         return 1
 
     cdef DTYPE_t _compute_feature(self, const DTYPE_t[:] X_ndarray, Node *node, SIZE_t node_id) nogil:
 
@@ -83,42 +83,6 @@ cdef class Splitter:
 
     cdef double node_impurity(self) nogil
 
+    cdef int pointer_size(self) nogil
 
 cdef inline void sort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil
-cdef inline void swap(DTYPE_t* Xf, SIZE_t* samples, SIZE_t i, SIZE_t j) nogil
-cdef inline DTYPE_t median3(DTYPE_t* Xf, SIZE_t n) nogil
-cdef void introsort(DTYPE_t* Xf, SIZE_t *samples, SIZE_t n, int maxd) nogil
-cdef inline void sift_down(DTYPE_t* Xf, SIZE_t* samples,
-                           SIZE_t start, SIZE_t end) nogil
-cdef void heapsort(DTYPE_t* Xf, SIZE_t* samples, SIZE_t n) nogil
-cdef int compare_SIZE_t(const void* a, const void* b) nogil
-cdef inline void binary_search(INT32_t* sorted_array,
-                               INT32_t start, INT32_t end,
-                               SIZE_t value, SIZE_t* index,
-                               INT32_t* new_start) nogil
-cdef inline void extract_nnz_index_to_samples(INT32_t* X_indices,
-                                              DTYPE_t* X_data,
-                                              INT32_t indptr_start,
-                                              INT32_t indptr_end,
-                                              SIZE_t* samples,
-                                              SIZE_t start,
-                                              SIZE_t end,
-                                              SIZE_t* index_to_samples,
-                                              DTYPE_t* Xf,
-                                              SIZE_t* end_negative,
-                                              SIZE_t* start_positive) nogil
-cdef inline void extract_nnz_binary_search(INT32_t* X_indices,
-                                           DTYPE_t* X_data,
-                                           INT32_t indptr_start,
-                                           INT32_t indptr_end,
-                                           SIZE_t* samples,
-                                           SIZE_t start,
-                                           SIZE_t end,
-                                           SIZE_t* index_to_samples,
-                                           DTYPE_t* Xf,
-                                           SIZE_t* end_negative,
-                                           SIZE_t* start_positive,
-                                           SIZE_t* sorted_samples,
-                                           bint* is_samples_sorted) nogil
-cdef inline void sparse_swap(SIZE_t* index_to_samples, SIZE_t* samples,
-                             SIZE_t pos_1, SIZE_t pos_2) nogil
@@ -29,6 +29,7 @@ from ._utils cimport rand_int
 from ._utils cimport rand_uniform
 from ._utils cimport RAND_R_MAX
 from ._utils cimport safe_realloc
+from libc.stdlib cimport malloc
 
 cdef double INFINITY = np.inf
 
@@ -227,6 +228,11 @@ cdef class Splitter:
 
         return self.criterion.node_impurity()
 
+    cdef int pointer_size(self) nogil:
+        """Get size of a pointer to record for Splitter."""
+        
+        return sizeof(SplitRecord)
+
 
 cdef class BaseDenseSplitter(Splitter):
     cdef const DTYPE_t[:, :] X
 
@@ -58,11 +58,11 @@ cdef class Tree:
 
     # Methods
     cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf,
-                          SplitRecord split_node,
+                          SplitRecord* split_node,
                           double impurity,
                           SIZE_t n_node_samples,
                           double weighted_n_node_samples) nogil except -1
-    cdef int _set_node_values(self, SplitRecord split_node,
+    cdef int _set_node_values(self, SplitRecord* split_node,
                               Node *node)  nogil except -1
     cdef DTYPE_t _compute_feature(self, const DTYPE_t[:] X_ndarray,
                             Node *node, SIZE_t node_id) nogil
 
@@ -22,6 +22,8 @@ from libc.stdint cimport SIZE_MAX
 from libcpp.algorithm cimport pop_heap
 from libcpp.algorithm cimport push_heap
 from libcpp cimport bool
+from cython.operator cimport dereference as deref
+from libc.stdlib cimport malloc, free
 
 import struct
 
@@ -188,6 +190,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         cdef SIZE_t node_id
 
         cdef SplitRecord split
+        cdef SplitRecord* split_ptr = <SplitRecord *>malloc(splitter.pointer_size())
 
         cdef double impurity = INFINITY
         cdef SIZE_t n_constant_features
@@ -238,15 +241,20 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                 is_leaf = is_leaf or impurity <= EPSILON
 
                 if not is_leaf:
-                    splitter.node_split(impurity, &split, &n_constant_features)
+                    splitter.node_split(impurity, split_ptr, &n_constant_features)
+
+                    # assign local copy of SplitRecord to assign
+                    # pos, improvement, and impurity scores
+                    split = deref(split_ptr)
+
                     # If EPSILON=0 in the below comparison, float precision
                     # issues stop splitting, producing trees that are
                     # dissimilar to v0.18
                     is_leaf = (is_leaf or split.pos >= end or
                                (split.improvement + EPSILON <
                                 min_impurity_decrease))
 
-                node_id = tree._add_node(parent, is_left, is_leaf, split,
+                node_id = tree._add_node(parent, is_left, is_leaf, split_ptr,
                                          impurity, n_node_samples,
                                          weighted_n_node_samples)
 
@@ -287,7 +295,10 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
 
             if rc >= 0:
                 tree.max_depth = max_depth_seen
-                
+        
+        # free the memory created for the SplitRecord pointer
+        free(split_ptr)
+
         if rc == -1:
             raise MemoryError()
 
@@ -455,6 +466,8 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
                                     FrontierRecord* res) nogil except -1:
         """Adds node w/ partition ``[start, end)`` to the frontier. """
         cdef SplitRecord split
+        cdef SplitRecord* split_ptr = <SplitRecord *>malloc(splitter.pointer_size())
+        
         cdef SIZE_t node_id
         cdef SIZE_t n_node_samples
         cdef SIZE_t n_constant_features = 0
@@ -479,7 +492,11 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
                    )
 
         if not is_leaf:
-            splitter.node_split(impurity, &split, &n_constant_features)
+            splitter.node_split(impurity, split_ptr, &n_constant_features)
+            # assign local copy of SplitRecord to assign
+            # pos, improvement, and impurity scores
+            split = deref(split_ptr)
+
             # If EPSILON=0 in the below comparison, float precision issues stop
             # splitting early, producing trees that are dissimilar to v0.18
             is_leaf = (is_leaf or split.pos >= end or
@@ -489,7 +506,7 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
                                  if parent != NULL
                                  else _TREE_UNDEFINED,
                                  is_left, is_leaf,
-                                 split, impurity, n_node_samples,
+                                 split_ptr, impurity, n_node_samples,
                                  weighted_n_node_samples)
         if node_id == SIZE_MAX:
             return -1
@@ -749,7 +766,7 @@ cdef class Tree:
         self.capacity = capacity
         return 0
 
-    cdef int _set_node_values(self, SplitRecord split_node,
+    cdef int _set_node_values(self, SplitRecord* split_node,
             Node *node) nogil except -1:
         """Set node data.
         """
@@ -769,7 +786,7 @@ cdef class Tree:
         return feature
 
     cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf,
-                          SplitRecord split_node, double impurity,
+                          SplitRecord* split_node, double impurity,
                           SIZE_t n_node_samples,
                           double weighted_n_node_samples) nogil except -1:
         """Add a node to the tree.
@@ -1812,7 +1829,7 @@ cdef _build_pruned_tree(
             split.threshold = node.threshold
 
             new_node_id = tree._add_node(
-                parent, is_left, is_leaf, split,
+                parent, is_left, is_leaf, &split,
                 node.impurity, node.n_node_samples,
                 node.weighted_n_node_samples)
 
 
@@ -13,7 +13,9 @@
 X, y = iris.data, iris.target
 
 # either axis-aligned
-clf = DecisionTreeClassifier(random_state=random_state)
+clf = DecisionTreeClassifier(random_state=random_state,
+    # max_leaf_nodes=5,
+)
 
 cv_scores = cross_val_score(clf, X, y, scoring='accuracy', cv=10)
 
@@ -26,7 +28,9 @@
 # or oblique
 n_features = X.shape[1]
 clf = ObliqueDecisionTreeClassifier(max_features=n_features,
-                                    random_state=random_state)
+                                    random_state=random_state,
+                                    # max_leaf_nodes=5,
+                                    )
 
 print('About to fit...')
 clf = clf.fit(X, y)