8000 MAINT: rework release note, changes from review · numpy/numpy@3802b16 · GitHub
[go: up one dir, main page]

Skip to content

Commit 3802b16

Browse files
committed
MAINT: rework release note, changes from review
1 parent 6541cf5 commit 3802b16

File tree

6 files changed

+35
-33
lines changed

6 files changed

+35
-33
lines changed
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
``ufunc.at`` can be much faster
22
-------------------------------
3-
If called on ufuncs with appropriate indexed loops, ``ufunc.at`` can be up to
4-
60x faster. Generic ``ufunc.at`` can be up to 9x faster. The conditions for
5-
any speedup::
3+
Generic ``ufunc.at`` can be up to 9x faster. The conditions for this speedup:
64

75
- contiguous arguments
86
- no casting
97

10-
The conditions for the extra speedup::
11-
12-
- calling the ufuncs ``add``, ``subtract``, ``multiply``, ``divide`` (and
13-
``floor_divide``)
14-
- 1d arguments
8+
If ufuncs with appropriate indexed loops on 1d arguments with the above
9+
conditions, ``ufunc.at`` can be up to 60x faster (an additional 7x speedup).
10+
Appropriate indexed loops have been added to ``add``, ``subtract``,
11+
``multiply``, ``divide`` (and ``floor_divide``)
1512

1613
The internal logic is similar to the logic used for regular ufuncs, which also
1714
have a fast path for contiguous, non-casting, 1d arrays.
15+
16+
Thanks to the `D. E. Shaw group <https://deshaw.com/>`_ for sponsoring this
17+
work.

numpy/core/src/multiarray/argfunc.dispatch.c.src

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ simd_@func@_@sfx@(npyv_lanetype_@sfx@ *ip, npy_intp len)
194194
npyv_@bsfx@ nnan_ab = npyv_and_@bsfx@(nnan_a, nnan_b);
195195
npyv_@bsfx@ nnan_cd = npyv_and_@bsfx@(nnan_c, nnan_d);
196196
npy_uint64 nnan = npyv_tobits_@bsfx@(npyv_and_@bsfx@(nnan_ab, nnan_cd));
197-
if ((long long int)nnan != ((1LL << vstep) - 1)) {
197+
if ((unsigned long long int)nnan != ((1ULL << vstep) - 1)) {
198198
npy_uint64 nnan_4[4];
199199
nnan_4[0] = npyv_tobits_@bsfx@(nnan_a);
200200
nnan_4[1] = npyv_tobits_@bsfx@(nnan_b);
@@ -219,7 +219,7 @@ simd_@func@_@sfx@(npyv_lanetype_@sfx@ *ip, npy_intp len)
219219
#if @is_fp@
220220
npyv_@bsfx@ nnan_a = npyv_notnan_@sfx@(a);
221221
npy_uint64 nnan = npyv_tobits_@bsfx@(nnan_a);
222-
if ((long long int)nnan != ((1LL << vstep) - 1)) {
222+
if ((unsigned long long int)nnan != ((1ULL << vstep) - 1)) {
223223
for (int vi = 0; vi < vstep; ++vi) {
224224
if (!((nnan >> vi) & 1)) {
225225
return i + vi;

numpy/core/src/umath/loops.c.src

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ int
557557
@type@ *indexed;
558558
for(i = 0; i < n; i++, indx += isindex, value += isb) {
559559
indexed = (@type@ *)(ip1 + is1 * *(npy_intp *)indx);
560-
indexed[0] = indexed[0] @OP@ *(@type@ *)value;
560+
*indexed = *indexed @OP@ *(@type@ *)value;
561561
}
562562
return 0;
563563
}
@@ -1436,7 +1436,7 @@ NPY_NO_EXPORT int
14361436
@type@ *indexed;
14371437
for(i = 0; i < n; i++, indx += isindex, value += isb) {
14381438
indexed = (@type@ *)(ip1 + is1 * *(npy_intp *)indx);
1439-
indexed[0] = npy_floor_divide@c@(indexed[0], *(@type@ *)value);
1439+
*indexed = npy_floor_divide@c@(*indexed, *(@type@ *)value);
14401440
}
14411441
return 0;
14421442
}
@@ -1590,7 +1590,7 @@ LONGDOUBLE_@kind@_indexed(PyArrayMethod_Context *NPY_UNUSED(context),
15901590
npy_longdouble *indexed;
15911591
for(i = 0; i < n; i++, indx += isindex, value += isb) {
15921592
indexed = (npy_longdouble *)(ip1 + is1 * *(npy_intp *)indx);
1593-
indexed[0] = indexed[0] @OP@ *(npy_longdouble *)value;
1593+
*indexed = *indexed @OP@ *(npy_longdouble *)value;
15941594
}
15951595
return 0;
15961596
}
@@ -1714,7 +1714,7 @@ HALF_@kind@_indexed(void *NPY_UNUSED(context), char **args, npy_intp const *dime
17141714
for(i = 0; i < n; i++, indx += isindex, value += isb) {
17151715
indexed = (npy_half *)(ip1 + is1 * *(npy_intp *)indx);
17161716
const float v = npy_half_to_float(*(npy_half *)value);
1717-
indexed[0] = npy_float_to_half(npy_half_to_float(indexed[0]) @OP@ v);
1717+
*indexed = npy_float_to_half(npy_half_to_float(*indexed) @OP@ v);
17181718
}
17191719
return 0;
17201720
}
@@ -1869,8 +1869,8 @@ HALF_floor_divide_indexed(PyArrayMethod_Context *NPY_UNUSED(context),
18691869
for(i = 0; i < n; i++, indx += isindex, value += isb) {
18701870
indexed = (npy_half *)(ip1 + is1 * *(npy_intp *)indx);
18711871
float v = npy_half_to_float(*(npy_half *)value);
1872-
float div = npy_floor_dividef(npy_half_to_float(indexed[0]), v);
1873-
indexed[0] = npy_float_to_half(div);
1872+
float div = npy_floor_dividef(npy_half_to_float(*indexed), v);
1873+
*indexed = npy_float_to_half(div);
18741874
}
18751875
return 0;
18761876
}

numpy/core/src/umath/loops_arithm_fp.dispatch.c.src

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -551,15 +551,15 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@_indexed)
551551
(PyArrayMethod_Context *NPY_UNUSED(context), char * const*args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func))
552552
{
553553
char *ip1 = args[0];
554-
npy_intp *indx = (npy_intp *)args[1];
554+
char *indx = args[1];
555555
char *value = args[2];
556-
npy_intp is1 = steps[0], isindex = steps[1] / sizeof(npy_intp), isb = steps[2];
556+
npy_intp is1 = steps[0], isindex = steps[1], isb = steps[2];
557557
npy_intp n = dimensions[0];
558558
npy_intp i;
559559
@type@ *indexed;
560560
for(i = 0; i < n; i++, indx += isindex, value += isb) {
561-
indexed = (@type@ *)(ip1 + is1 * indx[0]);
562-
indexed[0] = indexed[0] @OP@ *(@type@ *)value;
561+
indexed = (@type@ *)(ip1 + is1 * *(npy_intp *)indx);
562+
*indexed = *indexed @OP@ *(@type@ *)value;
563563
}
564564
return 0;
565565
}

numpy/core/src/umath/loops_arithmetic.dispatch.c.src

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -400,15 +400,15 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(@TYPE@_divide_indexed)
400400
(PyArrayMethod_Context *NPY_UNUSED(context), char * const*args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func))
401401
{
402402
char *ip1 = args[0];
403-
npy_intp *indx = (npy_intp *)args[1];
403+
char *indx = args[1];
404404
char *value = args[2];
405-
npy_intp is1 = steps[0], isindex = steps[1] / sizeof(npy_intp), isb = steps[2];
405+
npy_intp is1 = steps[0], isindex = steps[1], isb = steps[2];
406406
npy_intp n = dimensions[0];
407407
npy_intp i;
408408
@type@ *indexed;
409409
for(i = 0; i < n; i++, indx += isindex, value += isb) {
410-
indexed = (@type@ *)(ip1 + is1 * indx[0]);
411-
indexed[0] = floor_div_@TYPE@(indexed[0], *(@type@ *)value);
410+
indexed = (@type@ *)(ip1 + is1 * *(npy_intp *)indx);
411+
*indexed = floor_div_@TYPE@(*indexed, *(@type@ *)value);
412412
}
413413
return 0;
414414
}
@@ -486,20 +486,20 @@ NPY_NO_EXPORT int NPY_CPU_DISPATCH_CURFX(@TYPE@_divide_indexed)
486486
(PyArrayMethod_Context *NPY_UNUSED(context), char * const*args, npy_intp const *dimensions, npy_intp const *steps, NpyAuxData *NPY_UNUSED(func))
487487
{
488488
char *ip1 = args[0];
489-
npy_intp *indx = (npy_intp *)args[1];
489+
char *indx = args[1];
490490
char *value = args[2];
491-
npy_intp is1 = steps[0], isindex = steps[1] / sizeof(npy_intp), isb = steps[2];
491+
npy_intp is1 = steps[0], isindex = steps[1], isb = steps[2];
492492
npy_intp n = dimensions[0];
493493
npy_intp i;
494494
@type@ *indexed;
495495
for(i = 0; i < n; i++, indx += isindex, value += isb) {
496-
indexed = (@type@ *)(ip1 + is1 * indx[0]);
496+
indexed = (@type@ *)(ip1 + is1 * *(npy_intp *)indx);
497497
@type@ in2 = *(@type@ *)value;
498498
if (NPY_UNLIKELY(in2 == 0)) {
499499
npy_set_floatstatus_divbyzero();
500-
indexed[0] = 0;
500+
*indexed = 0;
501501
} else {
502-
indexed[0] = indexed[0] / in2;
502+
*indexed = *indexed / in2;
503503
}
504504
}
505505
return 0;

numpy/core/src/umath/ufunc_object.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5906,10 +5906,6 @@ trivial_at_loop(PyArrayMethodObject *ufuncimpl, NPY_ARRAYMETHOD_FLAGS flags,
59065906
int buffersize=0, errormask = 0;
59075907
int res;
59085908
char *args[3];
5909-
const char * ufunc_name = ufunc_get_name_cstr((PyUFuncObject *)context->caller);
5910-
if (_get_bufsize_errmask(NULL, ufunc_name, &buffersize, &errormask) < 0) {
5911-
return -1;
5912-
}
59135909
npy_intp dimensions = iter->size;
59145910
npy_intp steps[3];
59155911
args[0] = (char *) iter->baseoffset;
@@ -5929,6 +5925,12 @@ trivial_at_loop(PyArrayMethodObject *ufuncimpl, NPY_ARRAYMETHOD_FLAGS flags,
59295925
}
59305926

59315927
if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
5928+
const char * ufunc_name =
5929+
ufunc_get_name_cstr((PyUFuncObject *)context->caller);
5930+
if (_get_bufsize_errmask(NULL, ufunc_name,
5931+
&buffersize, &errormask) < 0) {
5932+
return -1;
5933+
}
59325934
res = _check_ufunc_fperr(errormask, NULL, ufunc_name);
59335935
}
59345936
return res;

0 commit comments

Comments
 (0)
0