numpy
diff --git a/‎doc/release/1.16.0-notes.rst
Lines changed: 41 additions & 0 deletions b/‎doc/release/1.16.0-notes.rst
Lines changed: 41 additions & 0 deletions
diff --git a/‎doc/source/reference/c-api.generalized-ufuncs.rst
Lines changed: 40 additions & 22 deletions b/‎doc/source/reference/c-api.generalized-ufuncs.rst
Lines changed: 40 additions & 22 deletions
diff --git a/‎doc/source/reference/c-api.types-and-structures.rst
Lines changed: 43 additions & 4 deletions b/‎doc/source/reference/c-api.types-and-structures.rst
Lines changed: 43 additions & 4 deletions
diff --git a/‎numpy/core/code_generators/cversions.txt
Lines changed: 2 additions & 0 deletions b/‎numpy/core/code_generators/cversions.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎numpy/core/include/numpy/ufuncobject.h
Lines changed: 23 additions & 0 deletions b/‎numpy/core/include/numpy/ufuncobject.h
Lines changed: 23 additions & 0 deletions
diff --git a/‎numpy/core/setup.py
Lines changed: 2 additions & 2 deletions b/‎numpy/core/setup.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎numpy/core/setup_common.py
Lines changed: 2 additions & 1 deletion b/‎numpy/core/setup_common.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎numpy/core/src/multiarray/numpyos.c renamed to ‎numpy/core/src/common/numpyos.c
Lines changed: 28 additions & 0 deletions b/‎numpy/core/src/multiarray/numpyos.c renamed to ‎numpy/core/src/common/numpyos.c
Lines changed: 28 additions & 0 deletions
diff --git a/‎numpy/core/src/multiarray/numpyos.h renamed to ‎numpy/core/src/common/numpyos.h
Lines changed: 7 additions & 0 deletions b/‎numpy/core/src/multiarray/numpyos.h renamed to ‎numpy/core/src/common/numpyos.h
Lines changed: 7 additions & 0 deletions
diff --git a/‎numpy/core/src/multiarray/arraytypes.c.src
Lines changed: 2 additions & 28 deletions b/‎numpy/core/src/multiarray/arraytypes.c.src
Lines changed: 2 additions & 28 deletions
@@ -102,6 +102,8 @@ for unraveling. ``dims`` remains supported, but is now deprecated.
 C API changes
 =============
 
+The :c:data:`NPY_API_VERSION` was incremented to 0x0000D since
+``core_dim_flags`` and ``core_dim_sizes`` were added to :c:type:`PyUFuncObject`.
 
 New Features
 ============
@@ -172,6 +174,45 @@ behavior will be appending.  This applied to: `LDFLAGS`, `F77FLAGS`,
 `F90FLAGS`, `FREEFLAGS`, `FOPT`, `FDEBUG`, and `FFLAGS`.  See gh-11525 for more
 details.
 
+Generalized ufunc signatures now allow fixed-size dimensions
+------------------------------------------------------------
+By using a numerical value in the signature of a generalized ufunc, one can
+indicate that the given function requires input or output to have dimensions
+with the given size. E.g., the signature of a function that converts a polar
+angle to a two-dimensional cartesian unit vector would be ``()->(2)``; that
+for one that converts two spherical angles to a three-dimensional unit vector
+would be ``(),()->(3)``; and that for the cross product of two
+three-dimensional vectors would be ``(3),(3)->(3)``.
+
+Note that to the elementary function these dimensions are not treated any
+differently from variable ones indicated with a name starting with a letter;
+the loop still is passed the corresponding size, but it can now count on that
+size being equal to the fixed one given in the signature.
+
+Generalized ufunc signatures now allow flexible dimensions
+----------------------------------------------------------
+
+Some functions, in particular numpy's implementation of ``@`` as ``matmul``,
+are very similar to generalized ufuncs in that they operate over core
+dimensions, but one could not present them as such because they were able to
+deal with inputs in which a dimension is missing. To support this, it is now
+allowed to postfix a dimension name with a question mark to indicate that the
+dimension does not necessarily have to be present.
+
+With this addition, the signature for ``matmul`` can be expressed as
+``(m?,n),(n,p?)->(m?,p?)``.  This indicates that if, e.g., the second operand
+has only one dimension, for the purposes of the elementary function it will be
+treated as if that input has core shape ``(n, 1)``, and the output has the
+corresponding core shape of ``(m, 1)``. The actual output array, however, has
+the flexible dimension removed, i.e., it will have shape ``(..., m)``.
+Similarly, if both arguments have only a single dimension, the inputs will be
+presented as having shapes ``(1, n)`` and ``(n, 1)`` to the elementary
+function, and the output as ``(1, 1)``, while the actual output array returned
+will have shape ``()``. In this way, the signature allows one to use a
+single elementary function for four related but different signatures,
+``(m,n),(n,p)->(m,p)``, ``(n),(n,p)->(p)``, ``(m,n),(n)->(m)`` and
+``(n),(n)->()``.
+
 ``np.clip`` and the ``clip`` method check for memory overlap
 ------------------------------------------------------------
 The ``out`` argument to these functions is now always tested for memory overlap
 
@@ -127,38 +127,56 @@ The formal syntax of signatures is as follows::
     <Output arguments>     ::= <Argument list>
     <Argument list>        ::= nil | <Argument> | <Argument> "," <Argument list>
     <Argument>             ::= "(" <Core dimension list> ")"
-    <Core dimension list>  ::= nil | <Core dimension name> |
-                               <Core dimension name> "," <Core dimension list>
-    <Core dimension name>  ::= valid Python variable name
-
+    <Core dimension list>  ::= nil | <Core dimension> |
+                               <Core dimension> "," <Core dimension list>
+    <Core dimension>       ::= <Dimension name> <Dimension modifier>
+    <Dimension name>       ::= valid Python variable name | valid integer
+    <Dimension modifier>   ::= nil | "?"
 
 Notes:
 
 #. All quotes are for clarity.
-#. Core dimensions that share the same name must have the exact same size.
+#. Unmodified core dimensions that share the same name must have the same size.
    Each dimension name typically corresponds to one level of looping in the
    elementary function's implementation.
 #. White spaces are ignored.
+#. An integer as a dimension name freezes that dimension to the value.
+#. If the name is suffixed with the "?" modifier, the dimension is a core
+   dimension only if it exists on all inputs and outputs that share it;
+   otherwise it is ignored (and replaced by a dimension of size 1 for the
+   elementary function).
 
 Here are some examples of signatures:
 
-+-------------+------------------------+-----------------------------------+
-| add         | ``(),()->()``          |                                   |
-+-------------+------------------------+-----------------------------------+
-| sum1d       | ``(i)->()``            |                                   |
-+-------------+------------------------+-----------------------------------+
-| inner1d     | ``(i),(i)->()``        |                                   |
-+-------------+------------------------+-----------------------------------+
-| matmat      | ``(m,n),(n,p)->(m,p)`` | matrix multiplication             |
-+-------------+------------------------+-----------------------------------+
-| vecmat      | ``(n),(n,p)->(p)``     | vector-matrix multiplication      |
-+-------------+------------------------+-----------------------------------+
-| matvec      | ``(m,n),(n)->(m)``     | matrix-vector multiplication      |
-+-------------+------------------------+-----------------------------------+
-| outer_inner | ``(i,t),(j,t)->(i,j)`` | inner over the last dimension,    |
-|             |                        | outer over the second to last,    |
-|             |                        | and loop/broadcast over the rest. |
-+-------------+------------------------+-----------------------------------+
++-------------+----------------------------+-----------------------------------+
+| name        | signature                  | common usage                      |
++=============+============================+===================================+
+| add         | ``(),()->()``              | binary ufunc                      |
++-------------+----------------------------+-----------------------------------+
+| sum1d       | ``(i)->()``                | reduction                         |
++-------------+----------------------------+-----------------------------------+
+| inner1d     | ``(i),(i)->()``            | vector-vector multiplication      |
++-------------+----------------------------+-----------------------------------+
+| matmat      | ``(m,n),(n,p)->(m,p)``     | matrix multiplication             |
++-------------+----------------------------+-----------------------------------+
+| vecmat      | ``(n),(n,p)->(p)``         | vector-matrix multiplication      |
++-------------+----------------------------+-----------------------------------+
+| matvec      | ``(m,n),(n)->(m)``         | matrix-vector multiplication      |
++-------------+----------------------------+-----------------------------------+
+| matmul      | ``(m?,n),(n,p?)->(m?,p?)`` | combination of the four above     |
++-------------+----------------------------+-----------------------------------+
+| outer_inner | ``(i,t),(j,t)->(i,j)``     | inner over the last dimension,    |
+|             |                            | outer over the second to last,    |
+|             |                            | and loop/broadcast over the rest. |
++-------------+----------------------------+-----------------------------------+
+|  cross1d    | ``(3),(3)->(3)``           | cross product where the last      |
+|             |                            | dimension is frozen and must be 3 |
++-------------+----------------------------+-----------------------------------+
+
+.. _frozen:
+
+The last is an instance of freezing a core dimension and can be used to
+improve ufunc performance
 
 C-API for implementing Elementary Functions
 -------------------------------------------
 
@@ -182,8 +182,18 @@ PyArrayDescr_Type
 
 .. c:type:: PyArray_Descr
 
-   The format of the :c:type:`PyArray_Descr` structure that lies at the
-   heart of the :c:data:`PyArrayDescr_Type` is
+   The :c:type:`PyArray_Descr` structure lies at the heart of the
+   :c:data:`PyArrayDescr_Type`. While it is described here for
+   completeness, it should be considered internal to NumPy and manipulated via
+   ``PyArrayDescr_*`` or ``PyDataType*`` functions and macros. The size of this
+   structure is subject to change across versions of NumPy. To ensure
+   compatibility:
+
+   - Never declare a non-pointer instance of the struct
+   - Never perform pointer arithmatic
+   - Never use ``sizof(PyArray_Descr)``
+
+   It has the following structure:
 
    .. code-block:: c
 
@@ -685,7 +695,14 @@ PyUFunc_Type
    the information needed to call the underlying C-code loops that
    perform the actual work. While it is described here for completeness, it
    should be considered internal to NumPy and manipulated via ``PyUFunc_*``
-   functions. It has the following structure:
+   functions. The size of this structure is subject to change across versions
+   of NumPy. To ensure compatibility:
+
+   - Never declare a non-pointer instance of the struct
+   - Never perform pointer arithmetic
+   - Never use ``sizeof(PyUFuncObject)``
+
+   It has the following structure:
 
    .. code-block:: c
 
@@ -713,10 +730,13 @@ PyUFunc_Type
           char *core_signature;
           PyUFunc_TypeResolutionFunc *type_resolver;
           PyUFunc_LegacyInnerLoopSelectionFunc *legacy_inner_loop_selector;
-          void *reserved2;
           PyUFunc_MaskedInnerLoopSelectionFunc *masked_inner_loop_selector;
           npy_uint32 *op_flags;
           npy_uint32 *iter_flags;
+          /* new in API version 0x0000000D */
+          npy_intp *core_dim_sizes;
+          npy_intp *core_dim_flags;
+
       } PyUFuncObject;
 
    .. c:macro: PyUFuncObject.PyObject_HEAD
@@ -776,6 +796,10 @@ PyUFunc_Type
        specifies how many different 1-d loops (of the builtin data
        types) are available.
 
+   .. c:member:: int PyUFuncObject.reserved1
+
+       Unused.
+
    .. c:member:: char *PyUFuncObject.name
 
        A string name for the ufunc. This is used dynamically to build
@@ -870,6 +894,21 @@ PyUFunc_Type
 
        Override the default nditer flags for the ufunc.
 
+   Added in API version 0x0000000D
+
+   .. c:member:: npy_intp *PyUFuncObject.core_dim_sizes
+
+       For each distinct core dimension, the possible
+       :ref:`frozen <frozen>` size if :c:data:`UFUNC_CORE_DIM_SIZE_INFERRED` is 0
+
+   .. c:member:: npy_uint32 *PyUFuncObject.core_dim_flags
+
+       For each distinct core dimension, a set of ``UFUNC_CORE_DIM*`` flags
+
+       - :c:data:`UFUNC_CORE_DIM_CAN_IGNORE` if the dim name ends in ``?``
+       - :c:data:`UFUNC_CORE_DIM_SIZE_INFERRED` if the dim size will be
+         determined from the operands and not from a :ref:`frozen <frozen>` signature
+
 PyArrayIter_Type
 ----------------
 
 
@@ -43,3 +43,5 @@
 # PyArray_SetWritebackIfCopyBase and deprecated PyArray_SetUpdateIfCopyBase.
 0x0000000c = a1bc756c5782853ec2e3616cf66869d8
 
+# Version 13 (Numpy 1.16) Added fields core_dim_flags and core_dim_sizes to PyUFuncObject
+0x0000000d = a1bc756c5782853ec2e3616cf66869d8
@@ -209,9 +209,32 @@ typedef struct _tagPyUFuncObject {
          * set by nditer object.
          */
         npy_uint32 iter_flags;
+
+        /* New in NPY_API_VERSION 0x0000000D and above */
+
+        /*
+         * for each core_num_dim_ix distinct dimension names,
+         * the possible "frozen" size (-1 if not frozen).
+         */
+        npy_intp *core_dim_sizes;
+
+        /*
+         * for each distinct core dimension, a set of UFUNC_CORE_DIM* flags
+         */
+        npy_uint32 *core_dim_flags;
+
+
+
 } PyUFuncObject;
 
 #include "arrayobject.h"
+/* Generalized ufunc; 0x0001 reserved for possible use as CORE_ENABLED */
+/* the core dimension's size will be determined by the operands. */
+#define UFUNC_CORE_DIM_SIZE_INFERRED 0x0002
+/* the core dimension may be absent */
+#define UFUNC_CORE_DIM_CAN_IGNORE 0x0004
+/* flags inferred during execution */
+#define UFUNC_CORE_DIM_MISSING 0x00040000
 
 #define UFUNC_ERR_IGNORE 0
 #define UFUNC_ERR_WARN   1
 
@@ -737,6 +737,7 @@ def get_mathlib_info(*args):
             join('src', 'common', 'ucsnarrow.h'),
             join('src', 'common', 'ufunc_override.h'),
             join('src', 'common', 'umathmodule.h'),
+            join('src', 'common', 'numpyos.h'),
             ]
 
     common_src = [
@@ -746,6 +747,7 @@ def get_mathlib_info(*args):
             join('src', 'common', 'templ_common.h.src'),
             join('src', 'common', 'ucsnarrow.c'),
             join('src', 'common', 'ufunc_override.c'),
+            join('src', 'common', 'numpyos.c'),
             ]
 
     blas_info = get_info('blas_opt', 0)
@@ -785,7 +787,6 @@ def get_mathlib_info(*args):
             join('src', 'multiarray', 'multiarraymodule.h'),
             join('src', 'multiarray', 'nditer_impl.h'),
             join('src', 'multiarray', 'number.h'),
-            join('src', 'multiarray', 'numpyos.h'),
             join('src', 'multiarray', 'refcount.h'),
             join('src', 'multiarray', 'scalartypes.h'),
             join('src', 'multiarray', 'sequence.h'),
@@ -851,7 +852,6 @@ def get_mathlib_info(*args):
             join('src', 'multiarray', 'nditer_constr.c'),
             join('src', 'multiarray', 'nditer_pywrap.c'),
             join('src', 'multiarray', 'number.c'),
-            join('src', 'multiarray', 'numpyos.c'),
             join('src', 'multiarray', 'refcount.c'),
             join('src', 'multiarray', 'sequence.c'),
             join('src', 'multiarray', 'shape.c'),
 
@@ -41,7 +41,8 @@
 # 0x0000000b - 1.13.x
 # 0x0000000c - 1.14.x
 # 0x0000000c - 1.15.x
-C_API_VERSION = 0x0000000c
+# 0x0000000d - 1.16.x
+C_API_VERSION = 0x0000000d
 
 class MismatchCAPIWarning(Warning):
     pass
 
@@ -769,3 +769,31 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value)
     }
     return r;
 }
+
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOLL
+    return strtoll(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoi64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtol(str, endptr, base);
+#endif
+}
+
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base)
+{
+#if defined HAVE_STRTOULL
+    return strtoull(str, endptr, base);
+#elif defined _MSC_VER
+    return _strtoui64(str, endptr, base);
+#else
+    /* ok on 64 bit posix */
+    return PyOS_strtoul(str, endptr, base);
+#endif
+}
+
+
@@ -31,4 +31,11 @@ NumPyOS_ascii_ftoLf(FILE *fp, long double *value);
 NPY_NO_EXPORT int
 NumPyOS_ascii_isspace(int c);
 
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_longlong
+NumPyOS_strtoll(const char *str, char **endptr, int base);
+
+/* Convert a string to an int in an arbitrary base */
+NPY_NO_EXPORT npy_ulonglong
+NumPyOS_strtoull(const char *str, char **endptr, int base);
 #endif
@@ -150,32 +150,6 @@ MyPyLong_AsUnsigned@Type@ (PyObject *obj)
 
 /**end repeat**/
 
-static npy_longlong
-npy_strtoll(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOLL
-    return strtoll(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoi64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtol(str, endptr, base);
-#endif
-}
-
-static npy_ulonglong
-npy_strtoull(const char *str, char **endptr, int base)
-{
-#if defined HAVE_STRTOULL
-    return strtoull(str, endptr, base);
-#elif defined _MSC_VER
-    return _strtoui64(str, endptr, base);
-#else
-    /* ok on 64 bit posix */
-    return PyOS_strtoul(str, endptr, base);
-#endif
-}
-
 /*
  *****************************************************************************
  **                         GETITEM AND SETITEM                             **
@@ -1796,8 +1770,8 @@ BOOL_scan(FILE *fp, npy_bool *ip, void *NPY_UNUSED(ignore),
  * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
  *         npy_long, npy_ulong, npy_longlong, npy_ulonglong,
  *         npy_datetime, npy_timedelta#
- * #func = (PyOS_strtol, PyOS_strtoul)*4, npy_strtoll, npy_strtoull,
- *         npy_strtoll*2#
+ * #func = (PyOS_strtol, PyOS_strtoul)*4, NumPyOS_strtoll, NumPyOS_strtoull,
+ *         NumPyOS_strtoll*2#
  * #btype = (npy_long, npy_ulong)*4, npy_longlong, npy_ulonglong,
  *          npy_longlong*2#
  */