8000 Merge pull request #12586 from hameerabbasi/radix-sort · numpy/numpy@3bdc915 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3bdc915

Browse files
authored
Merge pull request #12586 from hameerabbasi/radix-sort
ENH: Implement radix sort
2 parents 4def09a + 7f58678 commit 3bdc915

13 files changed

+327
-45
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ numpy/core/src/npysort/binsearch.c
142142
numpy/core/src/npysort/heapsort.c
143143
numpy/core/src/npysort/mergesort.c
144144
numpy/core/src/npysort/quicksort.c
145+
numpy/core/src/npysort/radixsort.c
145146
numpy/core/src/npysort/selection.c
146147
numpy/core/src/npysort/timsort.c
147148
numpy/core/src/npysort/sort.c

doc/release/1.17.0-notes.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ random data. The algorithm is stable and requires O(n/2) working space. For
121121
details of the algorithm, refer to
122122
`CPython listsort.txt <https://github.com/python/cpython/blob/3.7/Objects/listsort.txt>`_.
123123

124+
In addition, for very small dtypes, radix sort is used instead of timsort. In
125+
general, we attempt to use the fastest possible implementation.
126+
124127
``np.unpackbits`` now accepts a ``count`` parameter
125128
---------------------------------------------------
126129
``count`` allows subsetting the number of bits that will be unpacked up-front,
@@ -182,6 +185,14 @@ maintains `O(N log N)` run time complexity instead of deteriorating towards
182185
`O(N*N)` for prime lengths. Also, accuracy for real-valued FFTs with near-prime
183186
lengths has improved and is on par with complex-valued FFTs.
184187

188+
Performance improvements for integer sorts
189+
------------------------------------------
190+
191+
``sort``, ``argsort``, ``ndarray.sort`` and ``ndarray.argsort`` now use radix
192+
sort as the default stable sort for integers and booleans. This is faster than
193+
the old default, mergesort, in the vast majority of cases.
194+
195+
185196
Further improvements to ``ctypes`` support in ``np.ctypeslib``
186197
--------------------------------------------------------------
187198
A new `numpy.ctypeslib.as_ctypes_type` function has been added, which can be

numpy/core/_add_newdocs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2614,7 +2614,7 @@
26142614

26152615
add_newdoc('numpy.core.multiarray', 'ndarray', ('argsort',
26162616
"""
2617-
a.argsort(axis=-1, kind='quicksort', order=None)
2617+
a.argsort(axis=-1, kind=None, order=None)
26182618
26192619
Returns the indices that would sort this array.
26202620
@@ -3800,7 +3800,7 @@
38003800

38013801
add_newdoc('numpy.core.multiarray', 'ndarray', ('sort',
38023802
"""
3803-
a.sort(axis=-1, kind='quicksort', order=None)
3803+
a.sort(axis=-1, kind=None, order=None)
38043804
38053805
Sort an array in-place. Refer to `numpy.sort` for full documentation.
38063806

numpy/core/defchararray.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2124,7 +2124,7 @@ def __mod__(self, i):
21242124
def __rmod__(self, other):
21252125
return NotImplemented
21262126

2127-
def argsort(self, axis=-1, kind='quicksort', order=None):
2127+
def argsort(self, axis=-1, kind=None, order=None):
21282128
"""
21292129
Return the indices that sort the array lexicographically.
21302130

numpy/core/fromnumeric.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,7 @@ def _sort_dispatcher(a, axis=None, kind=None, order=None):
824824

825825

826826
@array_function_dispatch(_sort_dispatcher)
827-
def sort(a, axis=-1, kind='quicksort', order=None):
827+
def sort(a, axis=-1, kind=None, order=None):
828828
"""
829829
Return a sorted copy of an array.
830830
@@ -837,8 +837,8 @@ def sort(a, axis=-1, kind='quicksort', order=None):
837837
sorting. The default is -1, which sorts along the last axis.
838838
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
839839
Sorting algorithm. The default is 'quicksort'. Note that both 'stable'
840-
and 'mergesort' use timsort under the covers and, in general, the
841-
actual implementation will vary with data type. The 'mergesort' option
840+
and 'mergesort' use timsort or radix sort under the covers and, in general,
841+
the actual implementation will vary with data type. The 'mergesort' option
842842
is retained for backwards compatibility.
843843
844844
.. versionchanged:: 1.15.0.
@@ -914,7 +914,8 @@ def sort(a, axis=-1, kind='quicksort', order=None):
914914
915915
'stable' automatically choses the best stable sorting algorithm
916916
for the data type being sorted. It, along with 'mergesort' is
917-
currently mapped to timsort. API forward compatibility currently limits the
917+
currently mapped to timsort or radix sort depending on the
918+
data type. API forward compatibility currently limits the
918919
ability to select the implementation and it is hardwired for the different
919920
data types.
920921
@@ -925,7 +926,8 @@ def sort(a, axis=-1, kind='quicksort', order=None):
925926
mergesort. It is now used for stable sort while quicksort is still the
926927
default sort if none is chosen. For details of timsort, refer to
927928
`CPython listsort.txt <https://github.com/python/cpython/blob/3.7/Objects/listsort.txt>`_.
928-
929+
'mergesort' and 'stable' are mapped to radix sort for integer data types. Radix sort is an
930+
O(n) sort instead of O(n log n).
929931
930932
Examples
931933
--------
@@ -974,7 +976,7 @@ def _argsort_dispatcher(a, axis=None, kind=None, order=None):
974976

975977

976978
@array_function_dispatch(_argsort_dispatcher)
977-
def argsort(a, axis=-1, kind='quicksort', order=None):
979+
def argsort(a, axis=-1, kind=None, order=None):
978980
"""
979981
Returns the indices that would sort an array.
980982
@@ -997,8 +999,6 @@ def argsort(a, axis=-1, kind='quicksort', order=None):
997999
9981000
.. versionchanged:: 1.15.0.
9991001
The 'stable' option was added.
1000-
1001-
10021002
order : str or list of str, optional
10031003
When `a` is an array with fields defined, this argument specifies
10041004
which fields to compare first, second, etc. A single field can

numpy/core/setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,7 @@ def get_mathlib_info(*args):
710710
join('src', 'npysort', 'mergesort.c.src'),
711711
join('src', 'npysort', 'timsort.c.src'),
712712
join('src', 'npysort', 'heapsort.c.src'),
713+
join('src', 'npysort', 'radixsort.c.src'),
713714
join('src', 'common', 'npy_partition.h.src'),
714715
join('src', 'npysort', 'selection.c.src'),
715716
join('src', 'common', 'npy_binsearch.h.src'),

numpy/core/src/common/npy_sort.h.src

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,17 @@ int atimsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null);
4444

4545
/**end repeat**/
4646

47+
/**begin repeat
48+
*
49+
* #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
50+
* longlong, ulonglong#
51+
*/
52+
53+
int radixsort_@suff@(void *vec, npy_intp cnt, void *null);
54+
int aradixsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null);
55+
56+
/**end repeat**/
57+
4758

4859

4960
/*

numpy/core/src/multiarray/arraytypes.c.src

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4417,6 +4417,7 @@ static PyArray_Descr @from@_Descr = {
44174417
* npy_half, npy_float, npy_double, npy_longdouble,
44184418
* npy_cfloat, npy_cdouble, npy_clongdouble,
44194419
* PyObject *, npy_datetime, npy_timedelta#
4420+
* #rsort = 1*5, 0*16#
44204421
* #NAME = Bool,
44214422
* Byte, UByte, Short, UShort, Int, UInt,
44224423
* Long, ULong, LongLong, ULongLong,
@@ -4473,12 +4474,20 @@ static PyArray_ArrFuncs _Py@NAME@_ArrFuncs = {
44734474
{
44744475
quicksort_@suff@,
44754476
heapsort_@suff@,
4476-
timsort_@suff@
4477+
#if @rsort@
4478+
radixsort_@suff@
4479+
#else
4480+
timsort_@suff@
4481+
#endif
44774482
},
44784483
{
44794484
aquicksort_@suff@,
44804485
aheapsort_@suff@,
4481-
atimsort_@suff@
4486+
#if @rsort@
4487+
aradixsort_@suff@
4488+
#else
4489+
atimsort_@suff@
4490+
#endif
44824491
},
44834492
#else
44844493
{

numpy/core/src/multiarray/conversion_utils.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,11 @@ PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind)
393393
char *str;
394394
PyObject *tmp = NULL;
395395

396+
if (obj == Py_None) {
397+
*sortkind = NPY_QUICKSORT;
398+
return NPY_SUCCEED;
399+
}
400+
396401
if (PyUnicode_Check(obj)) {
397402
obj = tmp = PyUnicode_AsASCIIString(obj);
398403
if (obj == NULL) {
@@ -401,6 +406,8 @@ PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind)
401406
}
402407

403408
*sortkind = NPY_QUICKSORT;
409+
410+
404411
str = PyBytes_AsString(obj);
405412
if (!str) {
406413
Py_XDECREF(tmp);
@@ -424,7 +431,7 @@ PyArray_SortkindConverter(PyObject *obj, NPY_SORTKIND *sortkind)
424431
* That maintains backwards compatibility while
425432
* allowing other types of stable sorts to be used.
426433
*/
427-
*sortkind = NPY_STABLESORT;
434+
*sortkind = NPY_MERGESORT;
428435
}
429436
else if (str[0] == 's' || str[0] == 'S') {
430437
/*

numpy/core/src/multiarray/item_selection.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,7 +1126,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
11261126
NPY_NO_EXPORT int
11271127
PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which)
11281128
{
1129-
PyArray_SortFunc *sort;
1129+
PyArray_SortFunc *sort = NULL;
11301130
int n = PyArray_NDIM(op);
11311131

11321132
if (check_and_adjust_axis(&axis, n) < 0) {
@@ -1143,6 +1143,7 @@ PyArray_Sort(PyArrayObject *op, int axis, NPY_SORTKIND which)
11431143
}
11441144

11451145
sort = PyArray_DESCR(op)->f->sort[which];
1146+
11461147
if (sort == NULL) {
11471148
if (PyArray_DESCR(op)->f->compare) {
11481149
switch (which) {
@@ -1284,16 +1285,11 @@ NPY_NO_EXPORT PyObject *
12841285
PyArray_ArgSort(PyArrayObject *op, int axis, NPY_SORTKIND which)
12851286
{
12861287
PyArrayObject *op2;
1287-
PyArray_ArgSortFunc *argsort;
1288+
PyArray_ArgSortFunc *argsort = NULL;
12881289
PyObject *ret;
12891290

1290-
if (which < 0 || which >= NPY_NSORTS) {
1291-
PyErr_SetString(PyExc_ValueError,
1292-
"not a valid sort kind");
1293-
return NULL;
1294-
}
1295-
12961291
argsort = PyArray_DESCR(op)->f->argsort[which];
1292+
12971293
if (argsort == NULL) {
12981294
if (PyArray_DESCR(op)->f->compare) {
12991295
switch (which) {

0 commit comments

Comments
 (0)
0