-
-
Notifications
You must be signed in to change notification settings - Fork 11.1k
BUG: handle introsort depth limit properly #7871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
the larger partition needs its own depth limit else it grows faster than logarithmic. Also increase the stack size to hold up to 64 instead of 50 pointer pairs else it might overflow on arrays larger than than 2^50 elements.
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,15 +15,36 @@ | |
*/ | ||
|
||
/* | ||
* Quick sort is usually the fastest, but the worst case scenario can | ||
* be slower than the merge and heap sorts. The merge sort requires | ||
* extra memory and so for large arrays may not be useful. | ||
* Quick sort is usually the fastest, but the worst case scenario is O(N^2) so | ||
* the code switches to the O(NlogN) worst case heapsort if not enough progress | ||
* is made on the large side of the two quicksort partitions. This improves the | ||
* worst case while still retaining the speed of quicksort for the common case. | ||
* This is variant known as introsort. | ||
* | ||
* The merge sort is *stable*, meaning that equal components | ||
* are unmoved from their entry versions, so it can be used to | ||
* implement lexigraphic sorting on multiple keys. | ||
* | ||
* The heap sort is included for completeness. | ||
* def introsort(lower, higher, recursion_limit=log2(higher - lower + 1) * 2): | ||
* # sort remainder with heapsort if we are not making enough progress | ||
* # we arbitrarily choose 2 * log(n) as the cutoff point | ||
* if recursion_limit < 0: | ||
* heapsort(lower, higher) | ||
* return | ||
* | ||
* if lower < higher: | ||
* pivot_pos = partition(lower, higher) | ||
* # recurse into smaller first and leave larger on stack | ||
* # this limits the required stack space | ||
* if (pivot_pos - lower > higher - pivot_pos): | ||
* quicksort(pivot_pos + 1, higher, recursion_limit - 1) | ||
* quicksort(lower, pivot_pos, recursion_limit - 1) | ||
* else: | ||
* quicksort(lower, pivot_pos, recursion_limit - 1) | ||
* quicksort(pivot_pos + 1, higher, recursion_limit - 1) | ||
* | ||
* | ||
* the below code implements this converted to an iteration and as an | ||
* additional minor optimization skips the recursion depth checking on the | ||
* smaller partition as it is always less than half of the remaining data and | ||
* will thus terminate fast enough | ||
*/ | ||
|
||
#define NPY_NO_DEPRECATED_API NPY_API_VERSION | ||
|
@@ -33,7 +54,11 @@ | |
#include <stdlib.h> | ||
|
||
#define NOT_USED NPY_UNUSED(unused) | ||
#define PYA_QS_STACK 100 | ||
/* | ||
* pushing largest partition has upper bound of log2(n) space | ||
* we store two pointers each time | ||
*/ | ||
#define PYA_QS_STACK (NPY_BITSOF_INTP * 2) | ||
#define SMALL_QUICKSORT 15 | ||
#define SMALL_MERGESORT 20 | ||
#define SMALL_STRING 16 | ||
|
@@ -69,10 +94,12 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED) | |
@type@ *stack[PYA_QS_STACK]; | ||
@type@ **sptr = stack; | ||
@type@ *pm, *pi, *pj, *pk; | ||
npy_intp depth_limit = npy_get_msb(num) * 2; | ||
int depth[PYA_QS_STACK]; | ||
int * psdepth = depth; | ||
int cdepth = npy_get_msb(num) * 2; | ||
|
||
for (;;) { | ||
if (depth_limit-- < 0) { | ||
if (NPY_UNLIKELY(cdepth < 0)) { | ||
heapsort_@suff@(pl, pr - pl + 1, NULL); | ||
goto stack_pop; | ||
} | ||
|
@@ -107,6 +134,7 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED) | |
*sptr++ = pi - 1; | ||
pl = pi + 1; | ||
} | ||
*psdepth++ = --cdepth; | ||
} | ||
|
||
/* insertion sort */ | ||
|
@@ -125,6 +153,7 @@ stack_pop: | |
} | ||
pr = *(--sptr); | ||
pl = *(--sptr); | ||
cdepth = *(--psdepth); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For instance
which gets rid of the goto. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but this switches everything to heapsort on the first bad partition, a bad partition does not mean the full array needs to be heapsorted only the current part There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No it doesn't, if EDIT: and heapsorts at most once. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that the logic is basically what you have now, just no There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm right I misread the break condition. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, it has the advantage the need to increment depth on every push is made clear. I think it makes the logic a bit clearer because everything takes place in a few lines. Note that you do need the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel the opposite, I have to think about your code is doing while the goto is comes natural to me, but that might be from my close to the machine code perspective where a function return is a jump to the stack pop. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, the goto goes back instead of up. I don't think either is crystalline clear, having the stack the way we do breaks the obviousness of the doubly recursive pseudo code. Even pushing the largest partition is a bit subtle. |
||
} | ||
|
||
return 0; | ||
|
@@ -141,14 +170,16 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED) | |
npy_intp *stack[PYA_QS_STACK]; | ||
npy_intp **sptr = stack; | ||
npy_intp *pm, *pi, *pj, *pk, vi; | ||
npy_intp depth_limit = npy_get_msb(num) * 2; | ||
int depth[PYA_QS_STACK]; | ||
int * psdepth = depth; | ||
int cdepth = npy_get_msb(num) * 2; | ||
|
||
for (;;) { | ||
if (NPY_UNLIKELY(cdepth < 0)) { | ||
aheapsort_@suff@(vv, pl, pr - pl + 1, NULL); | ||
goto stack_pop; | ||
} | ||
while ((pr - pl) > SMALL_QUICKSORT) { | ||
if (depth_limit-- < 0) { | ||
aheapsort_@suff@(vv, pl, pr - pl + 1, NULL); | ||
goto stack_pop; | ||
} | ||
/* quicksort partition */ | ||
pm = pl + ((pr - pl) >> 1); | ||
if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl); | ||
|
@@ -179,6 +210,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED) | |
*sptr++ = pi - 1; | ||
pl = pi + 1; | ||
} | ||
*psdepth++ = --cdepth; | ||
} | ||
|
||
/* insertion sort */ | ||
|
@@ -198,6 +230,7 @@ stack_pop: | |
} | ||
pr = *(--sptr); | ||
pl = *(--sptr); | ||
cdepth = *(--psdepth); | ||
} | ||
|
||
return 0; | ||
|
@@ -229,14 +262,16 @@ quicksort_@suff@(void *start, npy_intp num, void *varr) | |
@type@ *pl = start; | ||
@type@ *pr = pl + (num - 1)*len; | ||
@type@ *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk; | ||
npy_intp depth_limit = npy_get_msb(num) * 2; | ||
int depth[PYA_QS_STACK]; | ||
int * psdepth = depth; | ||
int cdepth = npy_get_msb(num) * 2; | ||
|
||
if (vp == NULL) { | ||
return -NPY_ENOMEM; | ||
} | ||
|
||
for (;;) { | ||
if (depth_limit-- < 0) { | ||
if (NPY_UNLIKELY(cdepth < 0)) { | ||
heapsort_@suff@(pl, (pr - pl) / len + 1, varr); | ||
goto stack_pop; | ||
} | ||
|
@@ -271,6 +306,7 @@ quicksort_@suff@(void *start, npy_intp num, void *varr) | |
*sptr++ = pi - len; | ||
pl = pi + len; | ||
} | ||
*psdepth++ = --cdepth; | ||
} | ||
|
||
/* insertion sort */ | ||
|
@@ -291,6 +327,7 @@ stack_pop: | |
} | ||
pr = *(--sptr); | ||
pl = *(--sptr); | ||
cdepth = *(--psdepth); | ||
} | ||
|
||
< 28BE /td> | free(vp); | |
|
@@ -310,10 +347,12 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr) | |
npy_intp *stack[PYA_QS_STACK]; | ||
npy_intp **sptr=stack; | ||
npy_intp *pm, *pi, *pj, *pk, vi; | ||
npy_intp depth_limit = npy_get_msb(num) * 2; | ||
int depth[PYA_QS_STACK]; | ||
int * psdepth = depth; | ||
int cdepth = npy_get_msb(num) * 2; | ||
|
||
for (;;) { | ||
if (depth_limit-- < 0) { | ||
if (NPY_UNLIKELY(cdepth < 0)) { | ||
aheapsort_@suff@(vv, pl, pr - pl + 1, varr); | ||
goto stack_pop; | ||
} | ||
|
@@ -348,6 +387,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr) | |
*sptr++ = pi - 1; | ||
pl = pi + 1; | ||
} | ||
*psdepth++ = --cdepth; | ||
} | ||
|
||
/* insertion sort */ | ||
|
@@ -367,6 +407,7 @@ stack_pop: | |
} | ||
pr = *(--sptr); | ||
pl = *(--sptr); | ||
cdepth = *(--psdepth); | ||
} | ||
|
||
return 0; | ||
|
@@ -394,14 +435,16 @@ npy_quicksort(void *start, npy_intp num, void *varr) | |
char *stack[PYA_QS_STACK]; | ||
char **sptr = stack; | ||
char *pm, *pi, *pj, *pk; | ||
npy_intp depth_limit = npy_get_msb(num) * 2; | ||
int depth[PYA_QS_STACK]; | ||
int * psdepth = depth; | ||
int cdepth = npy_get_msb(num) * 2; | ||
|
||
if (vp == NULL) { | ||
return -NPY_ENOMEM; | ||
} | ||
|
||
for (;;) { | ||
if (depth_limit-- < 0) { | ||
if (NPY_UNLIKELY(cdepth < 0)) { | ||
npy_heapsort(pl, (pr - pl) / elsize + 1, varr); | ||
goto stack_pop; | ||
} | ||
|
@@ -446,6 +489,7 @@ npy_quicksort(void *start, npy_intp num, void *varr) | |
*sptr++ = pi - elsize; | ||
pl = pi + elsize; | ||
} | ||
*psdepth++ = --cdepth; | ||
} | ||
|
||
/* insertion sort */ | ||
|
@@ -466,6 +510,7 @@ stack_pop: | |
} | ||
pr = *(--sptr); | ||
pl = *(--sptr); | ||
cdepth = *(--psdepth); | ||
} | ||
|
||
free(vp); | ||
|
@@ -486,10 +531,12 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr) | |
npy_intp *stack[PYA_QS_STACK]; | ||
npy_intp **sptr = stack; | ||
npy_intp *pm, *pi, *pj, *pk, vi; | ||
npy_intp depth_limit = npy_get_msb(num) * 2; | ||
int depth[PYA_QS_STACK]; | ||
int * psdepth = depth; | ||
int cdepth = npy_get_msb(num) * 2; | ||
|
||
for (;;) { | ||
if (depth_limit-- < 0) { | ||
if (NPY_UNLIKELY(cdepth < 0)) { | ||
npy_aheapsort(vv, pl, pr - pl + 1, varr); | ||
goto stack_pop; | ||
} | ||
|
@@ -534,6 +581,7 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr) | |
*sptr++ = pi - 1; | ||
pl = pi + 1; | ||
} | ||
*psdepth++ = --cdepth; | ||
} | ||
|
||
/* insertion sort */ | ||
|
@@ -553,6 +601,7 @@ stack_pop: | |
} | ||
pr = *(--sptr); | ||
pl = *(--sptr); | ||
cdepth = *(--psdepth); | ||
} | ||
|
||
return 0; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well, this is large by a factor of 2, but it certainly doesn't hurt.