8000 [MRG + 1] MAINT Move heapify_up/heapify_down into PriorityHeap as cla… · scikit-learn/scikit-learn@f95e5b1 · GitHub
[go: up one dir, main page]

Skip to content

Commit f95e5b1

Browse files
nelson-liuraghavrv
authored andcommitted
[MRG + 1] MAINT Move heapify_up/heapify_down into PriorityHeap as class methods + COSMITs (#7034)
* RFC: move heap methods to class and remove trailing spaces * spurious comment to force recythonization of boosting * [ci skip] remove spurious comment * remove trailing whitespace on line * style: fix trailing whitespace in _criterion.pxd * add spurious comments to try to force recythonizing * remove changes for recythonization
1 parent 91ff942 commit f95e5b1

File tree

4 files changed

+42
-42
lines changed

4 files changed

+42
-42
lines changed

sklearn/tree/_criterion.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ cdef class Criterion:
4545
# weighted count of each label. For regression,
4646
# the sum of w*y. sum_total[k] is equal to
4747
# sum_{i=start}^{end-1} w[samples[i]]*y[samples[i], k],
48-
# where k is output index.
48+
# where k is output index.
4949
cdef double* sum_left # Same as above, but for the left side of the split
5050
cdef double* sum_right # same as above, but for the right side of the split
5151

sklearn/tree/_criterion.pyx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ cdef class ClassificationCriterion(Criterion):
266266
self.sum_left = <double*> calloc(n_elements, sizeof(double))
267267
self.sum_right = <double*> calloc(n_elements, sizeof(double))
268268

269-
if (self.sum_total == NULL or
269+
if (self.sum_total == NULL or
270270
self.sum_left == NULL or
271271
self.sum_right == NULL):
272272
raise MemoryError()
@@ -853,7 +853,7 @@ cdef class RegressionCriterion(Criterion):
853853

854854
self.weighted_n_left -= w
855855

856-
self.weighted_n_right = (self.weighted_n_node_samples -
856+
self.weighted_n_right = (self.weighted_n_node_samples -
857857
self.weighted_n_left)
858858
for k in range(self.n_outputs):
859859
sum_right[k] = sum_total[k] - sum_left[k]
@@ -964,7 +964,7 @@ cdef class MSE(RegressionCriterion):
964964

965965
for k in range(self.n_outputs):
966966
impurity_left[0] -= (sum_left[k] / self.weighted_n_left) ** 2.0
967-
impurity_right[0] -= (sum_right[k] / self.weighted_n_right) ** 2.0
967+
impurity_right[0] -= (sum_right[k] / self.weighted_n_right) ** 2.0
968968

969969
impurity_left[0] /= self.n_outputs
970970
impurity_right[0] /= self.n_outputs
@@ -1267,7 +1267,7 @@ cdef class MAE(RegressionCriterion):
12671267
cdef class FriedmanMSE(MSE):
12681268
"""Mean squared error impurity criterion with improvement score by Friedman
12691269
1270-
Uses the formula (35) in Friedmans original Gradient Boosting paper:
1270+
Uses the formula (35) in Friedman's original Gradient Boosting paper:
12711271
12721272
diff = mean_left - mean_right
12731273
improvement = n_left * n_right * diff^2 / (n_left + n_right)
@@ -1320,5 +1320,5 @@ cdef class FriedmanMSE(MSE):
13201320
diff = (self.weighted_n_right * total_sum_left -
13211321
self.weighted_n_left * total_sum_right) / self.n_outputs
13221322

1323-
return (diff * diff / (self.weighted_n_left * self.weighted_n_right *
1323+
return (diff * diff / (self.weighted_n_left * self.weighted_n_right *
13241324
self.weighted_n_node_samples))

sklearn/tree/_utils.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ cdef class PriorityHeap:
106106
cdef PriorityHeapRecord* heap_
107107

108108
cdef bint is_empty(self) nogil
109+
cdef void heapify_up(self, PriorityHeapRecord* heap, SIZE_t pos) nogil
110+
cdef void heapify_down(self, PriorityHeapRecord* heap, SIZE_t pos, SIZE_t heap_length) nogil
109111
cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
110112
SIZE_t depth, bint is_leaf, double improvement,
111113
double impurity, double impurity_left,

sklearn/tree/_utils.pyx

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -173,40 +173,6 @@ cdef class Stack:
173173
# PriorityHeap data structure
174174
# =============================================================================
175175

176-
cdef void heapify_up(PriorityHeapRecord* heap, SIZE_t pos) nogil:
177-
"""Restore heap invariant parent.improvement > child.improvement from
178-
``pos`` upwards. """
179-
if pos == 0:
180-
return
181-
182-
cdef SIZE_t parent_pos = (pos - 1) / 2
183-
184-
if heap[parent_pos].improvement < heap[pos].improvement:
185-
heap[parent_pos], heap[pos] = heap[pos], heap[parent_pos]
186-
heapify_up(heap, parent_pos)
187-
188-
189-
cdef void heapify_down(PriorityHeapRecord* heap, SIZE_t pos,
190-
SIZE_t heap_length) nogil:
191-
"""Restore heap invariant parent.improvement > children.improvement from
192-
``pos`` downwards. """
193-
cdef SIZE_t left_pos = 2 * (pos + 1) - 1
194-
cdef SIZE_t right_pos = 2 * (pos + 1)
195-
cdef SIZE_t largest = pos
196-
197-
if (left_pos < heap_length and
198-
heap[left_pos].improvement > heap[largest].improvement):
199-
largest = left_pos
200-
201-
if (right_pos < heap_length and
202-
heap[right_pos].improvement > heap[largest].improvement):
203-
largest = right_pos
204-
205-
if largest != pos:
206-
heap[pos], heap[largest] = heap[largest], heap[pos]
207-
heapify_down(heap, largest, heap_length)
208-
209-
210176
cdef class PriorityHeap:
211177
"""A priority queue implemented as a binary heap.
212178
@@ -240,6 +206,38 @@ cdef class PriorityHeap:
240206
cdef bint is_empty(self) nogil:
241207
return self.heap_ptr <= 0
242208

209+
cdef void heapify_up(self, PriorityHeapRecord* heap, SIZE_t pos) nogil:
210+
"""Restore heap invariant parent.improvement > child.improvement from
211+
``pos`` upwards. """
212+
if pos == 0:
213+
return
214+
215+
cdef SIZE_t parent_pos = (pos - 1) / 2
216+
217+
if heap[parent_pos].improvement < heap[pos].improvement:
218+
heap[parent_pos], heap[pos] = heap[pos], heap[parent_pos]
219+
self.heapify_up(heap, parent_pos)
220+
221+
cdef void heapify_down(self, PriorityHeapRecord* heap, SIZE_t pos,
222+
SIZE_t heap_length) nogil:
223+
"""Restore heap invariant parent.improvement > children.improvement from
224+
``pos`` downwards. """
225+
cdef SIZE_t left_pos = 2 * (pos + 1) - 1
226+
cdef SIZE_t right_pos = 2 * (pos + 1)
227+
cdef SIZE_t largest = pos
228+
229+
if (left_pos < heap_length and
230+
heap[left_pos].improvement > heap[largest].improvement):
231+
largest = left_pos
232+
233+
if (right_pos < heap_length and
234+
heap[right_pos].improvement > heap[largest].improvement):
235+
largest = right_pos
236+
237+
if largest != pos:
238+
heap[pos], heap[largest] = heap[largest], heap[pos]
239+
self.heapify_down(heap, largest, heap_length)
240+
243241
cdef int push(self, SIZE_t node_id, SIZE_t start, SIZE_t end, SIZE_t pos,
244242
SIZE_t depth, bint is_leaf, double improvement,
245243
double impurity, double impurity_left,
@@ -276,7 +274,7 @@ cdef class PriorityHeap:
276274
heap[heap_ptr].improvement = improvement
277275

278276
# Heapify up
279-
heapify_up(heap, heap_ptr)
277+
self.heapify_up(heap, heap_ptr)
280278

281279
# Increase element count
282280
self.heap_ptr = heap_ptr + 1
@@ -298,7 +296,7 @@ cdef class PriorityHeap:
298296

299297
# Restore heap invariant
300298
if heap_ptr > 1:
301-
heapify_down(heap, 0, heap_ptr - 1)
299+
self.heapify_down(heap, 0, heap_ptr - 1)
302300

303301
self.heap_ptr = heap_ptr - 1
304302

0 commit comments

Comments
 (0)
0