diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst index ffab3ccd41b2..0de733ff432c 100644 --- a/doc/release/1.12.0-notes.rst +++ b/doc/release/1.12.0-notes.rst @@ -266,6 +266,12 @@ numpy.sctypes now includes bytes on Python3 too Previously, it included str (bytes) and unicode on Python2, but only str (unicode) on Python3. +quicksort has been changed to an introsort +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The quicksort kind of ``np.sort`` and ``np.argsort`` is now an introsort which +is regular quicksort but changing to a heapsort when not enough progress is +made. This retains the good quicksort performance while changing the worst case +runtime from ``O(N^2)`` to ``O(N*log(N))``. Deprecations ============ diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 7e02ff2c2146..d07c5c08b429 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -776,6 +776,12 @@ def sort(a, axis=-1, kind='quicksort', order=None): placements are sorted according to the non-nan part if it exists. Non-nan values are sorted as before. + .. versionadded:: 1.12.0 + + quicksort has been changed to an introsort which will switch + heapsort when it does not make enough progress. This makes its + worst case O(n*log(n)). + Examples -------- >>> a = np.array([[1,4],[3,1]]) diff --git a/numpy/core/src/npysort/quicksort.c.src b/numpy/core/src/npysort/quicksort.c.src index 91b5e67f5bc7..60c905a4e93c 100644 --- a/numpy/core/src/npysort/quicksort.c.src +++ b/numpy/core/src/npysort/quicksort.c.src @@ -69,8 +69,13 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED) @type@ *stack[PYA_QS_STACK]; @type@ **sptr = stack; @type@ *pm, *pi, *pj, *pk; + npy_intp depth_limit = npy_get_msb(num) * 2; for (;;) { + if (depth_limit-- < 0) { + heapsort_@suff@(pl, pr - pl + 1, NULL); + goto stack_pop; + } while ((pr - pl) > SMALL_QUICKSORT) { /* quicksort partition */ pm = pl + ((pr - pl) >> 1); @@ -114,6 +119,7 @@ quicksort_@suff@(void *start, npy_intp num, void *NOT_USED) } *pj = vp; } +stack_pop: if (sptr == stack) { break; } @@ -135,9 +141,14 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED) npy_intp *stack[PYA_QS_STACK]; npy_intp **sptr = stack; npy_intp *pm, *pi, *pj, *pk, vi; + npy_intp depth_limit = npy_get_msb(num) * 2; for (;;) { while ((pr - pl) > SMALL_QUICKSORT) { + if (depth_limit-- < 0) { + aheapsort_@suff@(vv, pl, pr - pl + 1, NULL); + goto stack_pop; + } /* quicksort partition */ pm = pl + ((pr - pl) >> 1); if (@TYPE@_LT(v[*pm],v[*pl])) INTP_SWAP(*pm, *pl); @@ -181,6 +192,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *NOT_USED) } *pj = vi; } +stack_pop: if (sptr == stack) { break; } @@ -217,12 +229,17 @@ quicksort_@suff@(void *start, npy_intp num, void *varr) @type@ *pl = start; @type@ *pr = pl + (num - 1)*len; @type@ *stack[PYA_QS_STACK], **sptr = stack, *pm, *pi, *pj, *pk; + npy_intp depth_limit = npy_get_msb(num) * 2; if (vp == NULL) { return -NPY_ENOMEM; } for (;;) { + if (depth_limit-- < 0) { + heapsort_@suff@(pl, (pr - pl) / len + 1, varr); + goto stack_pop; + } while ((size_t)(pr - pl) > SMALL_QUICKSORT*len) { /* quicksort partition */ pm = pl + (((pr - pl)/len) >> 1)*len; @@ -268,6 +285,7 @@ quicksort_@suff@(void *start, npy_intp num, void *varr) } @TYPE@_COPY(pj, vp, len); } +stack_pop: if (sptr == stack) { break; } @@ -292,8 +310,13 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr) npy_intp *stack[PYA_QS_STACK]; npy_intp **sptr=stack; npy_intp *pm, *pi, *pj, *pk, vi; + npy_intp depth_limit = npy_get_msb(num) * 2; for (;;) { + if (depth_limit-- < 0) { + aheapsort_@suff@(vv, pl, pr - pl + 1, varr); + goto stack_pop; + } while ((pr - pl) > SMALL_QUICKSORT) { /* quicksort partition */ pm = pl + ((pr - pl) >> 1); @@ -338,6 +361,7 @@ aquicksort_@suff@(void *vv, npy_intp* tosort, npy_intp num, void *varr) } *pj = vi; } +stack_pop: if (sptr == stack) { break; } @@ -370,12 +394,17 @@ npy_quicksort(void *start, npy_intp num, void *varr) char *stack[PYA_QS_STACK]; char **sptr = stack; char *pm, *pi, *pj, *pk; + npy_intp depth_limit = npy_get_msb(num) * 2; if (vp == NULL) { return -NPY_ENOMEM; } for (;;) { + if (depth_limit-- < 0) { + npy_heapsort(pl, (pr - pl) / elsize + 1, varr); + goto stack_pop; + } while(pr - pl > SMALL_QUICKSORT*elsize) { /* quicksort partition */ pm = pl + (((pr - pl) / elsize) >> 1) * elsize; @@ -431,6 +460,7 @@ npy_quicksort(void *start, npy_intp num, void *varr) } GENERIC_COPY(pj, vp, elsize); } +stack_pop: if (sptr == stack) { break; } @@ -456,8 +486,13 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr) npy_intp *stack[PYA_QS_STACK]; npy_intp **sptr = stack; npy_intp *pm, *pi, *pj, *pk, vi; + npy_intp depth_limit = npy_get_msb(num) * 2; for (;;) { + if (depth_limit-- < 0) { + npy_aheapsort(vv, pl, pr - pl + 1, varr); + goto stack_pop; + } while ((pr - pl) > SMALL_QUICKSORT) { /* quicksort partition */ pm = pl + ((pr - pl) >> 1); @@ -512,6 +547,7 @@ npy_aquicksort(void *vv, npy_intp* tosort, npy_intp num, void *varr) } *pj = vi; } +stack_pop: if (sptr == stack) { break; } diff --git a/numpy/core/src/npysort/selection.c.src b/numpy/core/src/npysort/selection.c.src index bae10e85e5a1..1e0934558a5c 100644 --- a/numpy/core/src/npysort/selection.c.src +++ b/numpy/core/src/npysort/selection.c.src @@ -292,7 +292,7 @@ int { npy_intp low = 0; npy_intp high = num - 1; - npy_intp depth_limit; + int depth_limit; if (npiv == NULL) pivots = NULL; @@ -338,15 +338,7 @@ int return 0; } - /* dumb integer msb, float npy_log2 too slow for small parititions */ - { - npy_uintp unum = num; - depth_limit = 0; - while (unum >>= 1) { - depth_limit++; - } - depth_limit *= 2; - } + depth_limit = npy_get_msb(num) * 2; /* guarantee three elements */ for (;low + 1 < high;) { diff --git a/numpy/core/src/private/npy_sort.h b/numpy/core/src/private/npy_sort.h index 511d71b0199e..8c6f056231c6 100644 --- a/numpy/core/src/private/npy_sort.h +++ b/numpy/core/src/private/npy_sort.h @@ -9,6 +9,14 @@ #define NPY_ENOMEM 1 #define NPY_ECOMP 2 +static NPY_INLINE int npy_get_msb(npy_uintp unum) +{ + int depth_limit = 0; + while (unum >>= 1) { + depth_limit++; + } + return depth_limit; +} int quicksort_bool(void *vec, npy_intp cnt, void *null); int heapsort_bool(void *vec, npy_intp cnt, void *null); diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index dd51077b9824..9446f0ac2b90 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -1349,6 +1349,21 @@ def test_sort(self): msg = 'test empty array sort with axis=None' assert_equal(np.sort(a, axis=None), a.ravel(), msg) + def test_sort_degraded(self): + # test degraded dataset would take minutes to run with normal qsort + d = np.arange(1000000) + do = d.copy() + x = d + # create a median of 3 killer where each median is the sorted second + # last element of the quicksort partition + while x.size > 3: + mid = x.size // 2 + x[mid], x[-2] = x[-2], x[mid] + x = x[:-2] + + assert_equal(np.sort(d), do) + assert_equal(d[np.argsort(d)], do) + def test_copy(self): def assert_fortran(arr): assert_(arr.flags.fortran)