10000 ENH: avoid temporary arrays in expressions (again) by juliantaylor · Pull Request #7997 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

ENH: avoid temporary arrays in expressions (again) #7997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions benchmarks/benchmarks/bench_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,26 @@ def time_tril_l10x10(self):
np.tril(self.l10x10)


class Temporaries(Benchmark):
def setup(self):
self.amid = np.ones(50000)
self.bmid = np.ones(50000)
self.alarge = np.ones(1000000)
self.blarge = np.ones(1000000)

def time_mid(self):
(self.amid * 2) + self.bmid

def time_mid2(self):
(self.amid + self.bmid) - 2

def time_large(self):
(self.alarge * 2) + self.blarge

def time_large2(self):
(self.alarge + self.blarge) - 2


class MA(Benchmark):
def setup(self):
self.l100 = range(100)
Expand Down
10 changes: 10 additions & 0 deletions doc/release/1.13.0-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ This release supports Python 2.7 and 3.4 - 3.6.
Highlights
==========

* Operations like `a + b + c` will create less temporaries on some platforms


Dropped Support
===============
Expand Down Expand Up @@ -84,6 +86,14 @@ C API
New Features
============

Temporary elision
-----------------
On platforms providing the `backtrace` function NumPy will now not create
temporaries in expression when possible.
For example `d = a + b + c` is transformed to `d = a + b; d += c` which can
improve performance for large arrays as less memory bandwidth is required to
perform the operation.

``axes`` argument for ``unique``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In an N-dimensional array, the user can now choose the axis along which to look
Expand Down
1 change: 1 addition & 0 deletions numpy/core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,7 @@ def generate_multiarray_templated_sources(ext, build_dir):
join('src', 'multiarray', 'shape.c'),
join('src', 'multiarray', 'scalarapi.c'),
join('src', 'multiarray', 'scalartypes.c.src'),
join('src', 'multiarray', 'temp_elide.c'),
join('src', 'multiarray', 'usertypes.c'),
join('src', 'multiarray', 'ucsnarrow.c'),
join('src', 'multiarray', 'vdot.c'),
Expand Down
4 changes: 3 additions & 1 deletion numpy/core/setup_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ def check_api_version(apiversion, codegen_dir):
OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh",
"rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow",
"copysign", "nextafter", "ftello", "fseeko",
"strtoll", "strtoull", "cbrt", "strtold_l", "fallocate"]
"strtoll", "strtoull", "cbrt", "strtold_l", "fallocate",
"backtrace"]


OPTIONAL_HEADERS = [
Expand All @@ -116,6 +117,7 @@ def check_api_version(apiversion, codegen_dir):
"emmintrin.h", # SSE2
"features.h", # for glibc version linux
"xlocale.h" # see GH#8367
"dlfcn.h", # dladdr
]

# optional gcc compiler builtins and their call arguments and optional a
Expand Down
83 changes: 82 additions & 1 deletion numpy/core/src/multiarray/number.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "npy_import.h"
#include "common.h"
#include "number.h"
#include "temp_elide.h"

/*************************************************************************
**************** Implement Number Protocol ****************************
Expand Down Expand Up @@ -352,32 +353,73 @@ PyArray_GenericInplaceUnaryFunction(PyArrayObject *m1, PyObject *op)
return PyObject_CallFunctionObjArgs(op, m1, m1, NULL);
}

static PyObject *
array_inplace_add(PyArrayObject *m1, PyObject *m2);
static PyObject *
array_inplace_subtract(PyArrayObject *m1, PyObject *m2);
static PyObject *
array_inplace_multiply(PyArrayObject *m1, PyObject *m2);
#if !defined(NPY_PY3K)
static PyObject *
array_inplace_divide(PyArrayObject *m1, PyObject *m2);
#endif
static PyObject *
array_inplace_true_divide(PyArrayObject *m1, PyObject *m2);
static PyObject *
array_inplace_floor_divide(PyArrayObject *m1, PyObject *m2);
static PyObject *
array_inplace_bitwise_and(PyArrayObject *m1, PyObject *m2);
static PyObject *
array_inplace_bitwise_or(PyArrayObject *m1, PyObject *m2);
static PyObject *
array_inplace_bitwise_xor(PyArrayObject *m1, PyObject *m2);
static PyObject *
array_inplace_left_shift(PyArrayObject *m1, PyObject *m2);
static PyObject *
array_inplace_right_shift(PyArrayObject *m1, PyObject *m2);

static PyObject *
array_add(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__add__", "__radd__", 0, nb_add);
if (try_binary_elide(m1, m2, &array_inplace_add, &res, 1)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.add);
}

static PyObject *
array_subtract(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__sub__", "__rsub__", 0, nb_subtract);
if (try_binary_elide(m1, m2, &array_inplace_subtract, &res, 0)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.subtract);
}

static PyObject *
array_multiply(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__mul__", "__rmul__", 0, nb_multiply);
if (try_binary_elide(m1, m2, &array_inplace_multiply, &res, 1)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.multiply);
}

#if !defined(NPY_PY3K)
static PyObject *
array_divide(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__div__", "__rdiv__", 0, nb_divide);
if (try_binary_elide(m1, m2, &array_inplace_divide, &res, 0)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.divide);
}
#endif
Expand Down Expand Up @@ -529,7 +571,7 @@ fast_scalar_power(PyArrayObject *a1, PyObject *o2, int inplace)
return NULL;
}

if (inplace) {
if (inplace || can_elide_temp_unary(a1)) {
return PyArray_GenericInplaceUnaryFunction(a1, fastop);
} else {
return PyArray_GenericUnaryFunction(a1, fastop);
Expand Down Expand Up @@ -588,53 +630,82 @@ array_power(PyArrayObject *a1, PyObject *o2, PyObject *NPY_UNUSED(modulo))
static PyObject *
array_negative(PyArrayObject *m1)
{
if (can_elide_temp_unary(m1)) {
return PyArray_GenericInplaceUnaryFunction(m1, n_ops.negative);
}
return PyArray_GenericUnaryFunction(m1, n_ops.negative);
}

static PyObject *
array_absolute(PyArrayObject *m1)
{
if (can_elide_temp_unary(m1)) {
return PyArray_GenericInplaceUnaryFunction(m1, n_ops.absolute);
}
return PyArray_GenericUnaryFunction(m1, n_ops.absolute);
}

static PyObject *
array_invert(PyArrayObject *m1)
{
if (can_elide_temp_unary(m1)) {
return PyArray_GenericInplaceUnaryFunction(m1, n_ops.invert);
}
return PyArray_GenericUnaryFunction(m1, n_ops.invert);
}

static PyObject *
array_left_shift(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__lshift__", "__rlshift__", 0, nb_lshift);
if (try_binary_elide(m1, m2, &array_inplace_left_shift, &res, 0)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.left_shift);
}

static PyObject *
array_right_shift(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__rshift__", "__rrshift__", 0, nb_rshift);
if (try_binary_elide(m1, m2, &array_inplace_right_shift, &res, 0)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.right_shift);
}

static PyObject *
array_bitwise_and(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__and__", "__rand__", 0, nb_and);
if (try_binary_elide(m1, m2, &array_inplace_bitwise_and, &res, 1)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.bitwise_and);
}

static PyObject *
array_bitwise_or(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__or__", "__ror__", 0, nb_or);
if (try_binary_elide(m1, m2, &array_inplace_bitwise_or, &res, 1)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.bitwise_or);
}

static PyObject *
array_bitwise_xor(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__xor__", "__rxor__", 0, nb_xor);
if (try_binary_elide(m1, m2, &array_inplace_bitwise_xor, &res, 1)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.bitwise_xor);
}

Expand Down Expand Up @@ -726,14 +797,24 @@ array_inplace_bitwise_xor(PyArrayObject *m1, PyObject *m2)
static PyObject *
array_floor_divide(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__floordiv__", "__rfloordiv__", 0, nb_floor_divide);
if (try_binary_elide(m1, m2, &array_inplace_floor_divide, &res, 0)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.floor_divide);
}

static PyObject *
array_true_divide(PyArrayObject *m1, PyObject *m2)
{
PyObject * res;
GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__truediv__", "__rtruediv__", 0, nb_true_divide);
if (PyArray_CheckExact(m1) &&
(PyArray_ISFLOAT(m1) || PyArray_ISCOMPLEX(m1)) &&
try_binary_elide(m1, m2, &array_inplace_true_divide, &res, 0)) {
return res;
}
return PyArray_GenericBinaryFunction(m1, m2, n_ops.true_divide);
}

Expand Down
Loading
0