numpy · mattip · Jul 9, 2020 · Mar 7, 2020 · Mar 16, 2020 · Mar 27, 2020
diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst
@@ -0,0 +1,64 @@
+NumPy Scalars are cast when assigned to arrays
+----------------------------------------------
+
+When creating or assigning to arrays, in all relevant cases NumPy
+scalars will now be cast identically to NumPy arrays.  In particular
+this changes the behaviour in some cases which previously raised an
+error::
+
+    np.array([np.float64(np.nan)], dtype=np.int64)
+
+will succeed at this time (this may change) and return an undefined result
+(usually the smallest possible integer).  This also affects assignments::
+
+    arr[0] = np.float64(np.nan)
+
+Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)``
+and that the behaviour is unchanged for ``np.nan`` itself which is a Python
+float.
+To avoid backward compatibility issues, at this time assignment from
+``datetime64`` scalar to strings of too short length remains supported.
+This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")``
+succeeds now, when it failed before.  In the long term this may be
+deprecated or the unsafe cast may be allowed generally to make assignment
+of arrays and scalars behave consistently.
+
+
+Array coercion changes when Strings and other types are mixed
+-------------------------------------------------------------
+
+When stringss and other types are mixed, such as::
+
+    np.array(["string", np.float64(3.)], dtype="S")
+
+The results will change, which may lead to string dtypes with longer strings
+in some cases.  In particularly, if ``dtype="S"`` is not provided any numerical
+value will lead to a string results long enough to hold all possible numerical
+values. (e.g. "S32" for floats).  Note that you should always provide
+``dtype="S"`` when converting non-strings to strings.
+
+If ``dtype="S"`` is provided the results will be largely identical to before,
+but NumPy scalars (not a Python float like ``1.0``), will still enforce
+a uniform string length::
+
+    np.array([np.float64(3.)], dtype="S")  # gives "S32"
+    np.array([3.0], dtype="S")  # gives "S3"
+
+while previously the first version gave the same result as the second.
+
+
+Array coercion restructure
+--------------------------
+
+Array coercion has been restructured.  In general, this should not affect
+users.  In extremely rare corner cases where array-likes are nested::
+
+    np.array([array_like1])
+
+things will now be more consistent with::
+
+    np.array([np.array(array_like1)])
+
+which could potentially change output subtly for badly defined array-likes.
+We are not aware of any such case where the results were not clearly
+incorrect previously.
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
@@ -21,9 +21,11 @@
 
 # The files under src/ that are scanned for API functions
 API_FILES = [join('multiarray', 'alloc.c'),
+             join('multiarray', 'abstractdtypes.c'),
              join('multiarray', 'arrayfunction_override.c'),
              join('multiarray', 'array_assign_array.c'),
              join('multiarray', 'array_assign_scalar.c'),
+             join('multiarray', 'array_coercion.c'),
              join('multiarray', 'arrayobject.c'),
              join('multiarray', 'arraytypes.c.src'),
              join('multiarray', 'buffer.c'),

diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
@@ -1547,11 +1547,15 @@ PyArray_GETITEM(const PyArrayObject *arr, const char *itemptr)
                                         (void *)itemptr, (PyArrayObject *)arr);
 }
 
+/*
+ * SETITEM should only be used if it is known that the value is a scalar
+ * and of a type understood by the arrays dtype.
+ * Use `PyArray_Pack` if the value may be of a different dtype.
+ */
 static NPY_INLINE int
 PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v)
 {
-    return ((PyArrayObject_fields *)arr)->descr->f->setitem(
-                                                        v, itemptr, arr);
+    return ((PyArrayObject_fields *)arr)->descr->f->setitem(v, itemptr, arr);
 }
 
 #else
@@ -1820,18 +1824,33 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size,
     /* TODO: Make this definition public in the API, as soon as its settled */
     NPY_NO_EXPORT extern PyTypeObject PyArrayDTypeMeta_Type;
 
+    typedef struct PyArray_DTypeMeta_tag PyArray_DTypeMeta;
+
+    typedef PyArray_Descr *(discover_descr_from_pyobject_function)(
+            PyArray_DTypeMeta *cls, PyObject *obj);
+
+    /*
+     * Before making this public, we should decide whether it should pass
+     * the type, or allow looking at the object. A possible use-case:
+     * `np.array(np.array([0]), dtype=np.ndarray)`
+     * Could consider arrays that are not `dtype=ndarray` "scalars".
+     */
+    typedef int (is_known_scalar_type_function)(
+            PyArray_DTypeMeta *cls, PyTypeObject *obj);
+
+    typedef PyArray_Descr *(default_descr_function)(PyArray_DTypeMeta *cls);
+
     /*
      * While NumPy DTypes would not need to be heap types the plan is to
-     * make DTypes available in Python at which point we will probably want
-     * them to be.
+     * make DTypes available in Python at which point they will be heap types.
      * Since we also wish to add fields to the DType class, this looks like
      * a typical instance definition, but with PyHeapTypeObject instead of
      * only the PyObject_HEAD.
      * This must only be exposed very extremely careful consideration, since
      * it is a fairly complex construct which may be better to allow
      * refactoring of.
      */
-    typedef struct _PyArray_DTypeMeta {
+    struct PyArray_DTypeMeta_tag {
         PyHeapTypeObject super;
 
         /*
@@ -1870,9 +1889,12 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size,
          * NOTE: We could make a copy to detect changes to `f`.
          */
         PyArray_ArrFuncs *f;
-    } PyArray_DTypeMeta;
 
-    #define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr))
+        /* DType methods, these could be moved into its own struct */
+        discover_descr_from_pyobject_function *discover_descr_from_pyobject;
+        is_known_scalar_type_function *is_known_scalar_type;
+        default_descr_function *default_descr;
+    };
 
 #endif  /* NPY_INTERNAL_BUILD */
 

diff --git a/numpy/core/setup.py b/numpy/core/setup.py
@@ -774,9 +774,11 @@ def get_mathlib_info(*args):
     #######################################################################
 
     multiarray_deps = [
+            join('src', 'multiarray', 'abstractdtypes.h'),
             join('src', 'multiarray', 'arrayobject.h'),
             join('src', 'multiarray', 'arraytypes.h'),
             join('src', 'multiarray', 'arrayfunction_override.h'),
+            join('src', 'multiarray', 'array_coercion.h'),
             join('src', 'multiarray', 'npy_buffer.h'),
             join('src', 'multiarray', 'calculation.h'),
             join('src', 'multiarray', 'common.h'),
@@ -825,9 +827,11 @@ def get_mathlib_info(*args):
             ] + npysort_sources + npymath_sources
 
     multiarray_src = [
+            join('src', 'multiarray', 'abstractdtypes.c'),
             join('src', 'multiarray', 'alloc.c'),
             join('src', 'multiarray', 'arrayobject.c'),
             join('src', 'multiarray', 'arraytypes.c.src'),
+            join('src', 'multiarray', 'array_coercion.c'),
             join('src', 'multiarray', 'array_assign_scalar.c'),
             join('src', 'multiarray', 'array_assign_array.c'),
             join('src', 'multiarray', 'arrayfunction_override.c'),

diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h
@@ -38,6 +38,10 @@ create_datetime_dtype_with_unit(int type_num, NPY_DATETIMEUNIT unit);
 NPY_NO_EXPORT PyArray_DatetimeMetaData *
 get_datetime_metadata_from_dtype(PyArray_Descr *dtype);
 
+NPY_NO_EXPORT int
+find_string_array_datetime64_type(PyArrayObject *arr,
+        PyArray_DatetimeMetaData *meta);
+
 /*
  * Both type1 and type2 must be either NPY_DATETIME or NPY_TIMEDELTA.
  * Applies the type promotion rules between the two types, returning

diff --git a/numpy/core/src/multiarray/abstractdtypes.c b/numpy/core/src/multiarray/abstractdtypes.c
@@ -0,0 +1,168 @@
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "structmember.h"
+
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+#include "numpy/arrayobject.h"
+
+#include "abstractdtypes.h"
+#include "array_coercion.h"
+#include "common.h"
+
+
+static PyArray_Descr *
+discover_descriptor_from_pyint(
+        PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj)
+{
+    assert(PyLong_Check(obj));
+    /*
+     * We check whether long is good enough. If not, check longlong and
+     * unsigned long before falling back to `object`.
+     */
+    long long value = PyLong_AsLongLong(obj);
+    if (error_converting(value)) {
+        PyErr_Clear();
+    }
+    else {
+        if (NPY_MIN_LONG <= value && value <= NPY_MAX_LONG) {
+            return PyArray_DescrFromType(NPY_LONG);
+        }
+        return PyArray_DescrFromType(NPY_LONGLONG);
+    }
+
+    unsigned long long uvalue = PyLong_AsUnsignedLongLong(obj);
+    if (uvalue == (unsigned long long)-1 && PyErr_Occurred()){
+        PyErr_Clear();
+    }
+    else {
+        return PyArray_DescrFromType(NPY_ULONGLONG);
+    }
+
+    return PyArray_DescrFromType(NPY_OBJECT);
+}
+
+
+static
10000
 PyArray_Descr*
+discover_descriptor_from_pyfloat(
+        PyArray_DTypeMeta* NPY_UNUSED(cls), PyObject *obj)
+{
+    assert(PyFloat_CheckExact(obj));
+    return PyArray_DescrFromType(NPY_DOUBLE);
+}
+
+
+static PyArray_Descr*
+discover_descriptor_from_pycomplex(
+        PyArray_DTypeMeta* NPY_UNUSED(cls), PyObject *obj)
+{
+    assert(PyComplex_CheckExact(obj));
+    return PyArray_DescrFromType(NPY_COMPLEX128);
+}
+
+
+NPY_NO_EXPORT int
+initialize_and_map_pytypes_to_dtypes()
+{
+    PyArrayAbstractObjDTypeMeta_Type.tp_base = &PyArrayDTypeMeta_Type;
+    if (PyType_Ready(&PyArrayAbstractObjDTypeMeta_Type) < 0) {
+        return -1;
+    }
+    ((PyTypeObject *)&PyArray_PyIntAbstractDType)->tp_base = &PyArrayDTypeMeta_Type;
+    PyArray_PyIntAbstractDType.scalar_type = &PyLong_Type;
+    if (PyType_Ready((PyTypeObject *)&PyArray_PyIntAbstractDType) < 0) {
+        return -1;
+    }
+    ((PyTypeObject *)&PyArray_PyFloatAbstractDType)->tp_base = &PyArrayDTypeMeta_Type;
+    PyArray_PyFloatAbstractDType.scalar_type = &PyFloat_Type;
+    if (PyType_Ready((PyTypeObject *)&PyArray_PyFloatAbstractDType) < 0) {
+        return -1;
+    }
+    ((PyTypeObject *)&PyArray_PyComplexAbstractDType)->tp_base = &PyArrayDTypeMeta_Type;
+    PyArray_PyComplexAbstractDType.scalar_type = &PyComplex_Type;
+    if (PyType_Ready((PyTypeObject *)&PyArray_PyComplexAbstractDType) < 0) {
+        return -1;
+    }
+
+    /* Register the new DTypes for discovery */
+    if (_PyArray_MapPyTypeToDType(
+            &PyArray_PyIntAbstractDType, &PyLong_Type, NPY_FALSE) < 0) {
+        return -1;
+    }
+    if (_PyArray_MapPyTypeToDType(
+            &PyArray_PyFloatAbstractDType, &PyFloat_Type, NPY_FALSE) < 0) {
+        return -1;
+    }
+    if (_PyArray_MapPyTypeToDType(
+            &PyArray_PyComplexAbstractDType, &PyComplex_Type, NPY_FALSE) < 0) {
+        return -1;
+    }
+
+    /*
+     * Map str, bytes, and bool, for which we do not need abstract versions
+     * to the NumPy DTypes. This is done here using the `is_known_scalar_type`
+     * function.
+     * TODO: The `is_known_scalar_type` function is considered preliminary,
+     *       the same could be achieved e.g. with additional abstract DTypes.
+     */
+    PyArray_DTypeMeta *dtype;
+    dtype = NPY_DTYPE(PyArray_DescrFromType(NPY_UNICODE));
+    if (_PyArray_MapPyTypeToDType(dtype, &PyUnicode_Type, NPY_FALSE) < 0) {
+        return -1;
+    }
+
+    dtype = NPY_DTYPE(PyArray_DescrFromType(NPY_STRING));
+    if (_PyArray_MapPyTypeToDType(dtype, &PyBytes_Type, NPY_FALSE) < 0) {
+        return -1;
+    }
+    dtype = NPY_DTYPE(PyArray_DescrFromType(NPY_BOOL));
+    if (_PyArray_MapPyTypeToDType(dtype, &PyBool_Type, NPY_FALSE) < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+
+
+/* Note: This is currently largely not used, but will be required eventually. */
+NPY_NO_EXPORT PyTypeObject PyArrayAbstractObjDTypeMeta_Type = {
+        PyVarObject_HEAD_INIT(NULL, 0)
+        .tp_name = "numpy._AbstractObjDTypeMeta",
+        .tp_basicsize = sizeof(PyArray_DTypeMeta),
+        .tp_flags = Py_TPFLAGS_DEFAULT,
+        .tp_doc = "Helper MetaClass for value based casting AbstractDTypes.",
+};
+
+NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyIntAbstractDType = {{{
+        PyVarObject_HEAD_INIT(&PyArrayAbstractObjDTypeMeta_Type, 0)
+        .tp_basicsize = sizeof(PyArray_DTypeMeta),
+        .tp_name = "numpy._PyIntBaseAbstractDType",
+    },},
+    .abstract = 1,
+    .discover_descr_from_pyobject = discover_descriptor_from_pyint,
+    .kind = 'i',
+};
+
+NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyFloatAbstractDType = {{{
+        PyVarObject_HEAD_INIT(&PyArrayAbstractObjDTypeMeta_Type, 0)
+        .tp_basicsize = sizeof(PyArray_DTypeMeta),
+        .tp_name = "numpy._PyFloatBaseAbstractDType",
+    },},
+    .abstract = 1,
+    .discover_descr_from_pyobject = discover_descriptor_from_pyfloat,
+    .kind = 'f',
+};
+
+NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyComplexAbstractDType = {{{
+        PyVarObject_HEAD_INIT(&PyArrayAbstractObjDTypeMeta_Type, 0)
+        .tp_basicsize = sizeof(PyArray_DTypeMeta),
+        .tp_name = "numpy._PyComplexBaseAbstractDType",
+    },},
+    .abstract = 1,
+    .discover_descr_from_pyobject = discover_descriptor_from_pycomplex,
+    .kind = 'c',
+};
+
diff --git a/numpy/core/src/multiarray/abstractdtypes.h b/numpy/core/src/multiarray/abstractdtypes.h
@@ -0,0 +1,19 @@
+#ifndef _NPY_ABSTRACTDTYPES_H
+#define _NPY_ABSTRACTDTYPES_H
+
+#include "dtypemeta.h"
+
+/*
+ * These are mainly needed for value based promotion in ufuncs.  It
+ * may be necessary to make them (partially) public, to allow user-defined
+ * dtypes to perform value based casting.
+ */
+NPY_NO_EXPORT extern PyTypeObject PyArrayAbstractObjDTypeMeta_Type;
+NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyIntAbstractDType;
+NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyFloatAbstractDType;
+NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyComplexAbstractDType;
+
+NPY_NO_EXPORT int
+initialize_and_map_pytypes_to_dtypes();
+
+#endif  /*_NPY_ABSTRACTDTYPES_H */