8000 Merge pull request #5501 from jaimefrio/faster_fastputmask · numpy/numpy@2e016ac · GitHub
[go: up one dir, main page]

Skip to content

Commit 2e016ac

Browse files
committed
Merge pull request #5501 from jaimefrio/faster_fastputmask
ENH: speed up putmask avoiding % in inner loop
2 parents 4d4fb0d + fc8db73 commit 2e016ac

File tree

2 files changed

+31
-20
lines changed

2 files changed

+31
-20
lines changed

numpy/core/src/multiarray/arraytypes.c.src

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3680,21 +3680,23 @@ static void
36803680
@name@_fastputmask(@type@ *in, npy_bool *mask, npy_intp ni, @type@ *vals,
36813681
npy_intp nv)
36823682
{
3683-
npy_intp i;
3684-
@type@ s_val;
3683+
npy_intp i, j;
36853684

36863685
if (nv == 1) {
3687-
s_val = *vals;
3686+
@type@ s_val = *vals;
36883687
for (i = 0; i < ni; i++) {
36893688
if (mask[i]) {
36903689
in[i] = s_val;
36913690
}
36923691
}
36933692
}
36943693
else {
3695-
for (i = 0; i < ni; i++) {
3694+
for (i = 0, j = 0; i < ni; i++, j++) {
3695+
if (j >= nv) {
3696+
j = 0;
3697+
}
36963698
if (mask[i]) {
3697-
in[i] = vals[i%nv];
3699+
in[i] = vals[j];
36983700
}
36993701
}
37003702
}

numpy/core/src/multiarray/item_selection.c

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -427,10 +427,11 @@ NPY_NO_EXPORT PyObject *
427427
PyArray_PutMask(PyArrayObject *self, PyObject* values0, PyObject* mask0)
428428
{
429429
PyArray_FastPutmaskFunc *func;
430-
PyArrayObject *mask, *values;
430+
PyArrayObject *mask, *values;
431431
PyArray_Descr *dtype;
432-
npy_intp i, chunk, ni, max_item, nv, tmp;
432+
npy_intp i, j, chunk, ni, max_item, nv;
433433
char *src, *dest;
434+
npy_bool *mask_data;
434435
int copied = 0;
435436

436437
mask = NULL;
@@ -469,6 +470,7 @@ PyArray_PutMask(PyArrayObject *self, PyObject* values0, PyObject* mask0)
469470
"the same size");
470471
goto fail;
471472
}
473+
mask_data = PyArray_DATA(mask);
472474
dtype = PyArray_DESCR(self);
473475
Py_INCREF(dtype);
474476
values = (PyArrayObject *)PyArray_FromAny(values0, dtype,
@@ -483,14 +485,20 @@ PyArray_PutMask(PyArrayObject *self, PyObject* values0, PyObject* mask0)
483485
Py_INCREF(Py_None);
484486
return Py_None;
485487
}
488+
src = PyArray_DATA(values);
489+
486490
if (PyDataType_REFCHK(PyArray_DESCR(self))) {
487-
for (i = 0; i < ni; i++) {
488-
tmp = ((npy_bool *)(PyArray_DATA(mask)))[i];
489-
if (tmp) {
490-
src = PyArray_BYTES(values) + chunk * (i % nv);
491-
PyArray_Item_INCREF(src, PyArray_DESCR(self));
492-
PyArray_Item_XDECREF(dest+i*chunk, PyArray_DESCR(self));
493-
memmove(dest + i * chunk, src, chunk);
491+
for (i = 0, j = 0; i < ni; i++, j++) {
492+
if (j >= nv) {
493+
j = 0;
494+
}
495+
if (mask_data[i]) {
496+
char *src_ptr = src + j*chunk;
497+
char *dest_ptr = dest + i*chunk;
498+
499+
PyArray_Item_INCREF(src_ptr, PyArray_DESCR(self));
500+
PyArray_Item_XDECREF(dest_ptr, PyArray_DESCR(self));
501+
memmove(dest_ptr, src_ptr, chunk);
494502
}
495503
}
496504
}
@@ -499,16 +507,17 @@ PyArray_PutMask(PyArrayObject *self, PyObject* values0, PyObject* mask0)
499507
NPY_BEGIN_THREADS_DESCR(PyArray_DESCR(self));
500508
func = PyArray_DESCR(self)->f->fastputmask;
501509
if (func == NULL) {
502-
for (i = 0; i < ni; i++) {
503-
tmp = ((npy_bool *)(PyArray_DATA(mask)))[i];
504-
if (tmp) {
505-
src = PyArray_BYTES(values) + chunk*(i % nv);
506-
memmove(dest + i*chunk, src, chunk);
510+
for (i = 0, j = 0; i < ni; i++, j++) {
511+
if (j >= nv) {
512+
j = 0;
513+
}
514+
if (mask_data[i]) {
515+
memmove(dest + i*chunk, src + j*chunk, chunk);
507516
}
508517
}
509518
}
510519
else {
511-
func(dest, PyArray_DATA(mask), ni, PyArray_DATA(values), nv);
520+
func(dest, mask_data, ni, src, nv);
512521
}
513522
NPY_END_THREADS;
514523
}

0 commit comments

Comments
 (0)
0