diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst new file mode 100644 index 000000000000..d0fd51265f6e --- /dev/null +++ b/doc/release/upcoming_changes/16200.compatibility.rst @@ -0,0 +1,64 @@ +NumPy Scalars are cast when assigned to arrays +---------------------------------------------- + +When creating or assigning to arrays, in all relevant cases NumPy +scalars will now be cast identically to NumPy arrays. In particular +this changes the behaviour in some cases which previously raised an +error:: + + np.array([np.float64(np.nan)], dtype=np.int64) + +will succeed at this time (this may change) and return an undefined result +(usually the smallest possible integer). This also affects assignments:: + + arr[0] = np.float64(np.nan) + +Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)`` +and that the behaviour is unchanged for ``np.nan`` itself which is a Python +float. +To avoid backward compatibility issues, at this time assignment from +``datetime64`` scalar to strings of too short length remains supported. +This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")`` +succeeds now, when it failed before. In the long term this may be +deprecated or the unsafe cast may be allowed generally to make assignment +of arrays and scalars behave consistently. + + +Array coercion changes when Strings and other types are mixed +------------------------------------------------------------- + +When stringss and other types are mixed, such as:: + + np.array(["string", np.float64(3.)], dtype="S") + +The results will change, which may lead to string dtypes with longer strings +in some cases. In particularly, if ``dtype="S"`` is not provided any numerical +value will lead to a string results long enough to hold all possible numerical +values. (e.g. "S32" for floats). Note that you should always provide +``dtype="S"`` when converting non-strings to strings. + +If ``dtype="S"`` is provided the results will be largely identical to before, +but NumPy scalars (not a Python float like ``1.0``), will still enforce +a uniform string length:: + + np.array([np.float64(3.)], dtype="S") # gives "S32" + np.array([3.0], dtype="S") # gives "S3" + +while previously the first version gave the same result as the second. + + +Array coercion restructure +-------------------------- + +Array coercion has been restructured. In general, this should not affect +users. In extremely rare corner cases where array-likes are nested:: + + np.array([array_like1]) + +things will now be more consistent with:: + + np.array([np.array(array_like1)]) + +which could potentially change output subtly for badly defined array-likes. +We are not aware of any such case where the results were not clearly +incorrect previously. diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py index d88772bdcb96..856db041039e 100644 --- a/numpy/core/code_generators/genapi.py +++ b/numpy/core/code_generators/genapi.py @@ -21,9 +21,11 @@ # The files under src/ that are scanned for API functions API_FILES = [join('multiarray', 'alloc.c'), + join('multiarray', 'abstractdtypes.c'), join('multiarray', 'arrayfunction_override.c'), join('multiarray', 'array_assign_array.c'), join('multiarray', 'array_assign_scalar.c'), + join('multiarray', 'array_coercion.c'), join('multiarray', 'arrayobject.c'), join('multiarray', 'arraytypes.c.src'), join('multiarray', 'buffer.c'), diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 275bb336bb20..bbcf468c1f35 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -1547,11 +1547,15 @@ PyArray_GETITEM(const PyArrayObject *arr, const char *itemptr) (void *)itemptr, (PyArrayObject *)arr); } +/* + * SETITEM should only be used if it is known that the value is a scalar + * and of a type understood by the arrays dtype. + * Use `PyArray_Pack` if the value may be of a different dtype. + */ static NPY_INLINE int PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v) { - return ((PyArrayObject_fields *)arr)->descr->f->setitem( - v, itemptr, arr); + return ((PyArrayObject_fields *)arr)->descr->f->setitem(v, itemptr, arr); } #else @@ -1820,10 +1824,25 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size, /* TODO: Make this definition public in the API, as soon as its settled */ NPY_NO_EXPORT extern PyTypeObject PyArrayDTypeMeta_Type; + typedef struct PyArray_DTypeMeta_tag PyArray_DTypeMeta; + + typedef PyArray_Descr *(discover_descr_from_pyobject_function)( + PyArray_DTypeMeta *cls, PyObject *obj); + + /* + * Before making this public, we should decide whether it should pass + * the type, or allow looking at the object. A possible use-case: + * `np.array(np.array([0]), dtype=np.ndarray)` + * Could consider arrays that are not `dtype=ndarray` "scalars". + */ + typedef int (is_known_scalar_type_function)( + PyArray_DTypeMeta *cls, PyTypeObject *obj); + + typedef PyArray_Descr *(default_descr_function)(PyArray_DTypeMeta *cls); + /* * While NumPy DTypes would not need to be heap types the plan is to - * make DTypes available in Python at which point we will probably want - * them to be. + * make DTypes available in Python at which point they will be heap types. * Since we also wish to add fields to the DType class, this looks like * a typical instance definition, but with PyHeapTypeObject instead of * only the PyObject_HEAD. @@ -1831,7 +1850,7 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size, * it is a fairly complex construct which may be better to allow * refactoring of. */ - typedef struct _PyArray_DTypeMeta { + struct PyArray_DTypeMeta_tag { PyHeapTypeObject super; /* @@ -1870,9 +1889,12 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size, * NOTE: We could make a copy to detect changes to `f`. */ PyArray_ArrFuncs *f; - } PyArray_DTypeMeta; - #define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr)) + /* DType methods, these could be moved into its own struct */ + discover_descr_from_pyobject_function *discover_descr_from_pyobject; + is_known_scalar_type_function *is_known_scalar_type; + default_descr_function *default_descr; + }; #endif /* NPY_INTERNAL_BUILD */ diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 5498601794b8..8e00e43920ad 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -774,9 +774,11 @@ def get_mathlib_info(*args): ####################################################################### multiarray_deps = [ + join('src', 'multiarray', 'abstractdtypes.h'), join('src', 'multiarray', 'arrayobject.h'), join('src', 'multiarray', 'arraytypes.h'), join('src', 'multiarray', 'arrayfunction_override.h'), + join('src', 'multiarray', 'array_coercion.h'), join('src', 'multiarray', 'npy_buffer.h'), join('src', 'multiarray', 'calculation.h'), join('src', 'multiarray', 'common.h'), @@ -825,9 +827,11 @@ def get_mathlib_info(*args): ] + npysort_sources + npymath_sources multiarray_src = [ + join('src', 'multiarray', 'abstractdtypes.c'), join('src', 'multiarray', 'alloc.c'), join('src', 'multiarray', 'arrayobject.c'), join('src', 'multiarray', 'arraytypes.c.src'), + join('src', 'multiarray', 'array_coercion.c'), join('src', 'multiarray', 'array_assign_scalar.c'), join('src', 'multiarray', 'array_assign_array.c'), join('src', 'multiarray', 'arrayfunction_override.c'), diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h index 20f7a132ce91..4e7ade5edacc 100644 --- a/numpy/core/src/multiarray/_datetime.h +++ b/numpy/core/src/multiarray/_datetime.h @@ -38,6 +38,10 @@ create_datetime_dtype_with_unit(int type_num, NPY_DATETIMEUNIT unit); NPY_NO_EXPORT PyArray_DatetimeMetaData * get_datetime_metadata_from_dtype(PyArray_Descr *dtype); +NPY_NO_EXPORT int +find_string_array_datetime64_type(PyArrayObject *arr, + PyArray_DatetimeMetaData *meta); + /* * Both type1 and type2 must be either NPY_DATETIME or NPY_TIMEDELTA. * Applies the type promotion rules between the two types, returning diff --git a/numpy/core/src/multiarray/abstractdtypes.c b/numpy/core/src/multiarray/abstractdtypes.c new file mode 100644 index 000000000000..02c0eac53d7b --- /dev/null +++ b/numpy/core/src/multiarray/abstractdtypes.c @@ -0,0 +1,168 @@ +#define PY_SSIZE_T_CLEAN +#include +#include "structmember.h" + + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include "numpy/ndarraytypes.h" +#include "numpy/arrayobject.h" + +#include "abstractdtypes.h" +#include "array_coercion.h" +#include "common.h" + + +static PyArray_Descr * +discover_descriptor_from_pyint( + PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj) +{ + assert(PyLong_Check(obj)); + /* + * We check whether long is good enough. If not, check longlong and + * unsigned long before falling back to `object`. + */ + long long value = PyLong_AsLongLong(obj); + if (error_converting(value)) { + PyErr_Clear(); + } + else { + if (NPY_MIN_LONG <= value && value <= NPY_MAX_LONG) { + return PyArray_DescrFromType(NPY_LONG); + } + return PyArray_DescrFromType(NPY_LONGLONG); + } + + unsigned long long uvalue = PyLong_AsUnsignedLongLong(obj); + if (uvalue == (unsigned long long)-1 && PyErr_Occurred()){ + PyErr_Clear(); + } + else { + return PyArray_DescrFromType(NPY_ULONGLONG); + } + + return PyArray_DescrFromType(NPY_OBJECT); +} + + +static PyArray_Descr* +discover_descriptor_from_pyfloat( + PyArray_DTypeMeta* NPY_UNUSED(cls), PyObject *obj) +{ + assert(PyFloat_CheckExact(obj)); + return PyArray_DescrFromType(NPY_DOUBLE); +} + + +static PyArray_Descr* +discover_descriptor_from_pycomplex( + PyArray_DTypeMeta* NPY_UNUSED(cls), PyObject *obj) +{ + assert(PyComplex_CheckExact(obj)); + return PyArray_DescrFromType(NPY_COMPLEX128); +} + + +NPY_NO_EXPORT int +initialize_and_map_pytypes_to_dtypes() +{ + PyArrayAbstractObjDTypeMeta_Type.tp_base = &PyArrayDTypeMeta_Type; + if (PyType_Ready(&PyArrayAbstractObjDTypeMeta_Type) < 0) { + return -1; + } + ((PyTypeObject *)&PyArray_PyIntAbstractDType)->tp_base = &PyArrayDTypeMeta_Type; + PyArray_PyIntAbstractDType.scalar_type = &PyLong_Type; + if (PyType_Ready((PyTypeObject *)&PyArray_PyIntAbstractDType) < 0) { + return -1; + } + ((PyTypeObject *)&PyArray_PyFloatAbstractDType)->tp_base = &PyArrayDTypeMeta_Type; + PyArray_PyFloatAbstractDType.scalar_type = &PyFloat_Type; + if (PyType_Ready((PyTypeObject *)&PyArray_PyFloatAbstractDType) < 0) { + return -1; + } + ((PyTypeObject *)&PyArray_PyComplexAbstractDType)->tp_base = &PyArrayDTypeMeta_Type; + PyArray_PyComplexAbstractDType.scalar_type = &PyComplex_Type; + if (PyType_Ready((PyTypeObject *)&PyArray_PyComplexAbstractDType) < 0) { + return -1; + } + + /* Register the new DTypes for discovery */ + if (_PyArray_MapPyTypeToDType( + &PyArray_PyIntAbstractDType, &PyLong_Type, NPY_FALSE) < 0) { + return -1; + } + if (_PyArray_MapPyTypeToDType( + &PyArray_PyFloatAbstractDType, &PyFloat_Type, NPY_FALSE) < 0) { + return -1; + } + if (_PyArray_MapPyTypeToDType( + &PyArray_PyComplexAbstractDType, &PyComplex_Type, NPY_FALSE) < 0) { + return -1; + } + + /* + * Map str, bytes, and bool, for which we do not need abstract versions + * to the NumPy DTypes. This is done here using the `is_known_scalar_type` + * function. + * TODO: The `is_known_scalar_type` function is considered preliminary, + * the same could be achieved e.g. with additional abstract DTypes. + */ + PyArray_DTypeMeta *dtype; + dtype = NPY_DTYPE(PyArray_DescrFromType(NPY_UNICODE)); + if (_PyArray_MapPyTypeToDType(dtype, &PyUnicode_Type, NPY_FALSE) < 0) { + return -1; + } + + dtype = NPY_DTYPE(PyArray_DescrFromType(NPY_STRING)); + if (_PyArray_MapPyTypeToDType(dtype, &PyBytes_Type, NPY_FALSE) < 0) { + return -1; + } + dtype = NPY_DTYPE(PyArray_DescrFromType(NPY_BOOL)); + if (_PyArray_MapPyTypeToDType(dtype, &PyBool_Type, NPY_FALSE) < 0) { + return -1; + } + + return 0; +} + + + +/* Note: This is currently largely not used, but will be required eventually. */ +NPY_NO_EXPORT PyTypeObject PyArrayAbstractObjDTypeMeta_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "numpy._AbstractObjDTypeMeta", + .tp_basicsize = sizeof(PyArray_DTypeMeta), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = "Helper MetaClass for value based casting AbstractDTypes.", +}; + +NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyIntAbstractDType = {{{ + PyVarObject_HEAD_INIT(&PyArrayAbstractObjDTypeMeta_Type, 0) + .tp_basicsize = sizeof(PyArray_DTypeMeta), + .tp_name = "numpy._PyIntBaseAbstractDType", + },}, + .abstract = 1, + .discover_descr_from_pyobject = discover_descriptor_from_pyint, + .kind = 'i', +}; + +NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyFloatAbstractDType = {{{ + PyVarObject_HEAD_INIT(&PyArrayAbstractObjDTypeMeta_Type, 0) + .tp_basicsize = sizeof(PyArray_DTypeMeta), + .tp_name = "numpy._PyFloatBaseAbstractDType", + },}, + .abstract = 1, + .discover_descr_from_pyobject = discover_descriptor_from_pyfloat, + .kind = 'f', +}; + +NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyComplexAbstractDType = {{{ + PyVarObject_HEAD_INIT(&PyArrayAbstractObjDTypeMeta_Type, 0) + .tp_basicsize = sizeof(PyArray_DTypeMeta), + .tp_name = "numpy._PyComplexBaseAbstractDType", + },}, + .abstract = 1, + .discover_descr_from_pyobject = discover_descriptor_from_pycomplex, + .kind = 'c', +}; + diff --git a/numpy/core/src/multiarray/abstractdtypes.h b/numpy/core/src/multiarray/abstractdtypes.h new file mode 100644 index 000000000000..50239acf2b3e --- /dev/null +++ b/numpy/core/src/multiarray/abstractdtypes.h @@ -0,0 +1,19 @@ +#ifndef _NPY_ABSTRACTDTYPES_H +#define _NPY_ABSTRACTDTYPES_H + +#include "dtypemeta.h" + +/* + * These are mainly needed for value based promotion in ufuncs. It + * may be necessary to make them (partially) public, to allow user-defined + * dtypes to perform value based casting. + */ +NPY_NO_EXPORT extern PyTypeObject PyArrayAbstractObjDTypeMeta_Type; +NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyIntAbstractDType; +NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyFloatAbstractDType; +NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyComplexAbstractDType; + +NPY_NO_EXPORT int +initialize_and_map_pytypes_to_dtypes(); + +#endif /*_NPY_ABSTRACTDTYPES_H */ diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c new file mode 100644 index 000000000000..8fe996ed2078 --- /dev/null +++ b/numpy/core/src/multiarray/array_coercion.c @@ -0,0 +1,1430 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _UMATHMODULE +#define _MULTIARRAYMODULE + +#include "Python.h" + +#include "numpy/npy_3kcompat.h" + +#include "lowlevel_strided_loops.h" +#include "numpy/arrayobject.h" + +#include "descriptor.h" +#include "convert_datatype.h" +#include "dtypemeta.h" + +#include "array_coercion.h" +#include "ctors.h" +#include "common.h" +#include "_datetime.h" +#include "npy_import.h" + + +/* + * This file defines helpers for some of the ctors.c functions which + * create an array from Python sequences and types. + * When creating an array with ``np.array(...)`` we have to do two main things: + * + * 1. Find the exact shape of the resulting array + * 2. Find the correct dtype of the resulting array. + * + * In most cases these two things are can be done in a single processing step. + * There are in principle three different calls that should be distinguished: + * + * 1. The user calls ``np.array(..., dtype=np.dtype(" DType. + * TODO: This mapping means that it is currently impossible to delete a + * pair of pytype <-> DType. To resolve this, it is necessary to + * weakly reference the pytype. As long as the pytype is alive, we + * want to be able to use `np.array([pytype()])`. + * It should be possible to retrofit this without too much trouble + * (all type objects support weak references). + */ +PyObject *_global_pytype_to_type_dict = NULL; + + +/* Enum to track or signal some things during dtype and shape discovery */ +enum _dtype_discovery_flags { + FOUND_RAGGED_ARRAY = 1 << 0, + GAVE_SUBCLASS_WARNING = 1 << 1, + PROMOTION_FAILED = 1 << 2, + DISCOVER_STRINGS_AS_SEQUENCES = 1 << 3, + DISCOVER_TUPLES_AS_ELEMENTS = 1 << 4, + MAX_DIMS_WAS_REACHED = 1 << 5, + DESCRIPTOR_WAS_SET = 1 << 6, +}; + + +/** + * Adds known sequence types to the global type dictionary, note that when + * a DType is passed in, this lookup may be ignored. + * + * @return -1 on error 0 on success + */ +static int +_prime_global_pytype_to_type_dict() +{ + int res; + + /* Add the basic Python sequence types */ + res = PyDict_SetItem(_global_pytype_to_type_dict, + (PyObject *)&PyList_Type, Py_None); + if (res < 0) { + return -1; + } + res = PyDict_SetItem(_global_pytype_to_type_dict, + (PyObject *)&PyTuple_Type, Py_None); + if (res < 0) { + return -1; + } + /* NumPy Arrays are not handled as scalars */ + res = PyDict_SetItem(_global_pytype_to_type_dict, + (PyObject *)&PyArray_Type, Py_None); + if (res < 0) { + return -1; + } + return 0; +} + + +/** + * Add a new mapping from a python type to the DType class. + * + * This assumes that the DType class is guaranteed to hold on the + * python type (this assumption is guaranteed). + * This functionality supercedes ``_typenum_fromtypeobj``. + * + * @param DType DType to map the python type to + * @param pytype Python type to map from + * @param userdef Whether or not it is user defined. We ensure that user + * defined scalars subclass from our scalars (for now). + */ +NPY_NO_EXPORT int +_PyArray_MapPyTypeToDType( + PyArray_DTypeMeta *DType, PyTypeObject *pytype, npy_bool userdef) +{ + PyObject *Dtype_obj = (PyObject *)DType; + + if (userdef) { + /* + * It seems we did not strictly enforce this in the legacy dtype + * API, but assume that it is always true. Further, this could be + * relaxed in the future. In particular we should have a new + * superclass of ``np.generic`` in order to note enforce the array + * scalar behaviour. + */ + if (!PyObject_IsSubclass((PyObject *)pytype, (PyObject *)&PyGenericArrType_Type)) { + PyErr_Format(PyExc_RuntimeError, + "currently it is only possible to register a DType " + "for scalars deriving from `np.generic`, got '%S'.", + (PyObject *)pytype); + return -1; + } + } + + /* Create the global dictionary if it does not exist */ + if (NPY_UNLIKELY(_global_pytype_to_type_dict == NULL)) { + _global_pytype_to_type_dict = PyDict_New(); + if (_global_pytype_to_type_dict == NULL) { + return -1; + } + if (_prime_global_pytype_to_type_dict() < 0) { + return -1; + } + } + + int res = PyDict_Contains(_global_pytype_to_type_dict, (PyObject *)pytype); + if (res < 0) { + return -1; + } + else if (res) { + PyErr_SetString(PyExc_RuntimeError, + "Can only map one python type to DType."); + return -1; + } + + return PyDict_SetItem(_global_pytype_to_type_dict, + (PyObject *)pytype, Dtype_obj); +} + + +/** + * Lookup the DType for a registered known python scalar type. + * + * @param pytype Python Type to look up + * @return DType, None if it a known non-scalar, or NULL if an unknown object. + */ +static NPY_INLINE PyArray_DTypeMeta * +discover_dtype_from_pytype(PyTypeObject *pytype) +{ + PyObject *DType; + + if (pytype == &PyArray_Type) { + Py_INCREF(Py_None); + return (PyArray_DTypeMeta *)Py_None; + } + + DType = PyDict_GetItem(_global_pytype_to_type_dict, (PyObject *)pytype); + if (DType == NULL) { + /* the python type is not known */ + return NULL; + } + + Py_INCREF(DType); + if (DType == Py_None) { + return (PyArray_DTypeMeta *)Py_None; + } + assert(PyObject_TypeCheck(DType, (PyTypeObject *)&PyArrayDTypeMeta_Type)); + return (PyArray_DTypeMeta *)DType; +} + + +/** + * Find the correct DType class for the given python type. If flags is NULL + * this is not used to discover a dtype, but only for conversion to an + * existing dtype. In that case the Python (not NumPy) scalar subclass + * checks are skipped. + * + * @param obj The python object, mainly type(pyobj) is used, the object + * is passed to reuse existing code at this time only. + * @param flags Flags used to know if warnings were already given. If + * flags is NULL, this is not + * @param fixed_DType if not NULL, will be checked first for whether or not + * it can/wants to handle the (possible) scalar value. + * @return New reference to either a DType class, Py_None, or NULL on error. + */ +static NPY_INLINE PyArray_DTypeMeta * +discover_dtype_from_pyobject( + PyObject *obj, enum _dtype_discovery_flags *flags, + PyArray_DTypeMeta *fixed_DType) +{ + if (fixed_DType != NULL) { + /* + * Let the given DType handle the discovery. This is when the + * scalar-type matches exactly, or the DType signals that it can + * handle the scalar-type. (Even if it cannot handle here it may be + * asked to attempt to do so later, if no other matching DType exists.) + */ + if ((Py_TYPE(obj) == fixed_DType->scalar_type) || + (fixed_DType->is_known_scalar_type != NULL && + fixed_DType->is_known_scalar_type(fixed_DType, Py_TYPE(obj)))) { + Py_INCREF(fixed_DType); + return fixed_DType; + } + } + + PyArray_DTypeMeta *DType = discover_dtype_from_pytype(Py_TYPE(obj)); + if (DType != NULL) { + return DType; + } + /* + * At this point we have not found a clear mapping, but mainly for + * backward compatibility we have to make some further attempts at + * interpreting the input as a known scalar type. + */ + PyArray_Descr *legacy_descr; + if (PyArray_IsScalar(obj, Generic)) { + legacy_descr = PyArray_DescrFromScalar(obj); + if (legacy_descr == NULL) { + return NULL; + } + } + else if (flags == NULL) { + Py_INCREF(Py_None); + return (PyArray_DTypeMeta *)Py_None; + } + else if (PyBytes_Check(obj)) { + legacy_descr = PyArray_DescrFromType(NPY_BYTE); + } + else if (PyUnicode_Check(obj)) { + legacy_descr = PyArray_DescrFromType(NPY_UNICODE); + } + else { + legacy_descr = _array_find_python_scalar_type(obj); + } + + if (legacy_descr != NULL) { + DType = NPY_DTYPE(legacy_descr); + Py_INCREF(DType); + Py_DECREF(legacy_descr); + /* TODO: Enable warning about subclass handling */ + if (0 && !((*flags) & GAVE_SUBCLASS_WARNING)) { + if (DEPRECATE_FUTUREWARNING( + "in the future NumPy will not automatically find the " + "dtype for subclasses of scalars known to NumPy (i.e. " + "python types). Use the appropriate `dtype=...` to create " + "this array. This will use the `object` dtype or raise " + "an error in the future.") < 0) { + return NULL; + } + *flags |= GAVE_SUBCLASS_WARNING; + } + return DType; + } + Py_INCREF(Py_None); + return (PyArray_DTypeMeta *)Py_None; +} + + +/* + * This function should probably become public API eventually. At this + * time it is implemented by falling back to `PyArray_AdaptFlexibleDType`. + * We will use `CastingImpl[from, to].adjust_descriptors(...)` to implement + * this logic. + */ +static NPY_INLINE PyArray_Descr * +cast_descriptor_to_fixed_dtype( + PyArray_Descr *descr, PyArray_DTypeMeta *fixed_DType) +{ + if (fixed_DType == NULL) { + /* Nothing to do, we only need to promote the new dtype */ + Py_INCREF(descr); + return descr; + } + + if (!fixed_DType->parametric) { + /* + * Don't actually do anything, the default is always the result + * of any cast. + */ + return fixed_DType->default_descr(fixed_DType); + } + if (PyObject_TypeCheck((PyObject *)descr, (PyTypeObject *)fixed_DType)) { + Py_INCREF(descr); + return descr; + } + /* + * TODO: When this is implemented for all dtypes, the special cases + * can be removed... + */ + if (fixed_DType->legacy && fixed_DType->parametric && + NPY_DTYPE(descr)->legacy) { + PyArray_Descr *flex_dtype = PyArray_DescrFromType(fixed_DType->type_num); + return PyArray_AdaptFlexibleDType(descr, flex_dtype); + } + + PyErr_SetString(PyExc_NotImplementedError, + "Must use casting to find the correct dtype, this is " + "not yet implemented! " + "(It should not be possible to hit this code currently!)"); + return NULL; +} + + +/** + * Discover the correct descriptor from a known DType class and scalar. + * If the fixed DType can discover a dtype instance/descr all is fine, + * if it cannot and DType is used instead, a cast will have to be tried. + * + * @param fixed_DType A user provided fixed DType, can be NULL + * @param DType A discovered DType (by discover_dtype_from_pyobject); + * this can be identical to `fixed_DType`, if it obj is a + * known scalar. Can be `NULL` indicating no known type. + * @param obj The Python scalar object. At the time of calling this function + * it must be known that `obj` should represent a scalar. + */ +static NPY_INLINE PyArray_Descr * +find_scalar_descriptor( + PyArray_DTypeMeta *fixed_DType, PyArray_DTypeMeta *DType, + PyObject *obj) +{ + PyArray_Descr *descr; + + if (DType == NULL && fixed_DType == NULL) { + /* No known DType and no fixed one means we go to object. */ + return PyArray_DescrFromType(NPY_OBJECT); + } + else if (DType == NULL) { + /* + * If no DType is known/found, give the fixed give one a second + * chance. This allows for example string, to call `str(obj)` to + * figure out the length for arbitrary objects. + */ + descr = fixed_DType->discover_descr_from_pyobject(fixed_DType, obj); + } + else { + descr = DType->discover_descr_from_pyobject(DType, obj); + } + if (descr == NULL) { + return NULL; + } + if (fixed_DType == NULL) { + return descr; + } + + Py_SETREF(descr, cast_descriptor_to_fixed_dtype(descr, fixed_DType)); + return descr; +} + + +/** + * Assign a single element in an array from a python value. + * + * The dtypes SETITEM should only be trusted to generally do the right + * thing if something is known to be a scalar *and* is of a python type known + * to the DType (which should include all basic Python math types), but in + * general a cast may be necessary. + * This function handles the cast, which is for example hit when assigning + * a float128 to complex128. + * + * At this time, this function does not support arrays (historically we + * mainly supported arrays through `__float__()`, etc.). Such support should + * possibly be added (although when called from `PyArray_AssignFromCache` + * the input cannot be an array). + * Note that this is also problematic for some array-likes, such as + * `astropy.units.Quantity` and `np.ma.masked`. These are used to us calling + * `__float__`/`__int__` for 0-D instances in many cases. + * Eventually, we may want to define this as wrong: They must use DTypes + * instead of (only) subclasses. Until then, here as well as in + * `PyArray_AssignFromCache` (which already does this), we need to special + * case 0-D array-likes to behave like arbitrary (unknown!) Python objects. + * + * @param descr + * @param item + * @param value + * @return 0 on success -1 on failure. + */ +/* + * TODO: This function should possibly be public API. + */ +NPY_NO_EXPORT int +PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value) +{ + PyArrayObject_fields arr_fields = { + .flags = NPY_ARRAY_WRITEABLE, /* assume array is not behaved. */ + }; + Py_SET_TYPE(&arr_fields, &PyArray_Type); + Py_REFCNT(&arr_fields) = 1; + + if (NPY_UNLIKELY(descr->type_num == NPY_OBJECT)) { + /* + * We always have store objects directly, casting will lose some + * type information. Any other dtype discards the type information. + * TODO: For a Categorical[object] this path may be necessary? + */ + arr_fields.descr = descr; + return descr->f->setitem(value, item, &arr_fields); + } + + /* discover_dtype_from_pyobject includes a check for is_known_scalar_type */ + PyArray_DTypeMeta *DType = discover_dtype_from_pyobject( + value, NULL, NPY_DTYPE(descr)); + if (DType == NULL) { + return -1; + } + if (DType == NPY_DTYPE(descr) || DType == (PyArray_DTypeMeta *)Py_None) { + /* We can set the element directly (or at least will try to) */ + Py_XDECREF(DType); + arr_fields.descr = descr; + return descr->f->setitem(value, item, &arr_fields); + } + PyArray_Descr *tmp_descr; + tmp_descr = DType->discover_descr_from_pyobject(DType, value); + Py_DECREF(DType); + if (tmp_descr == NULL) { + return -1; + } + + char *data = PyObject_Malloc(tmp_descr->elsize); + if (data == NULL) { + PyErr_NoMemory(); + Py_DECREF(tmp_descr); + return -1; + } + if (PyDataType_FLAGCHK(tmp_descr, NPY_NEEDS_INIT)) { + memset(data, 0, tmp_descr->elsize); + } + arr_fields.descr = tmp_descr; + if (tmp_descr->f->setitem(value, data, &arr_fields) < 0) { + PyObject_Free(data); + Py_DECREF(tmp_descr); + return -1; + } + if (PyDataType_REFCHK(tmp_descr)) { + /* We could probably use move-references above */ + PyArray_Item_INCREF(data, tmp_descr); + } + + int res = 0; + int needs_api = 0; + PyArray_StridedUnaryOp *stransfer; + NpyAuxData *transferdata; + if (PyArray_GetDTypeTransferFunction( + 0, 0, 0, tmp_descr, descr, 0, &stransfer, &transferdata, + &needs_api) == NPY_FAIL) { + res = -1; + goto finish; + } + stransfer(item, 0, data, 0, 1, tmp_descr->elsize, transferdata); + NPY_AUXDATA_FREE(transferdata); + + if (needs_api && PyErr_Occurred()) { + res = -1; + } + + finish: + if (PyDataType_REFCHK(tmp_descr)) { + /* We could probably use move-references above */ + PyArray_Item_XDECREF(data, tmp_descr); + } + PyObject_Free(data); + Py_DECREF(tmp_descr); + return res; +} + + +static int +update_shape(int curr_ndim, int *max_ndim, + npy_intp out_shape[NPY_MAXDIMS], int new_ndim, + const npy_intp new_shape[NPY_MAXDIMS], npy_bool sequence, + enum _dtype_discovery_flags *flags) +{ + int success = 0; /* unsuccessful if array is ragged */ + const npy_bool max_dims_reached = *flags & MAX_DIMS_WAS_REACHED; + + if (curr_ndim + new_ndim > *max_ndim) { + success = -1; + /* Only update/check as many dims as possible, max_ndim is unchanged */ + new_ndim = *max_ndim - curr_ndim; + } + else if (!sequence && (*max_ndim != curr_ndim + new_ndim)) { + /* + * Sequences do not update max_ndim, otherwise shrink and check. + * This is depth first, so if it is already set, `out_shape` is filled. + */ + *max_ndim = curr_ndim + new_ndim; + /* If a shape was already set, this is also ragged */ + if (max_dims_reached) { + success = -1; + } + } + for (int i = 0; i < new_ndim; i++) { + npy_intp curr_dim = out_shape[curr_ndim + i]; + npy_intp new_dim = new_shape[i]; + + if (!max_dims_reached) { + out_shape[curr_ndim + i] = new_dim; + } + else if (new_dim != curr_dim) { + /* The array is ragged, and this dimension is unusable already */ + success = -1; + if (!sequence) { + /* Remove dimensions that we cannot use: */ + *max_ndim -= new_ndim + i; + } + else { + assert(i == 0); + /* max_ndim is usually not updated for sequences, so set now: */ + *max_ndim = curr_ndim; + } + break; + } + } + if (!sequence) { + *flags |= MAX_DIMS_WAS_REACHED; + } + return success; +} + + +#define COERCION_CACHE_CACHE_SIZE 5 +static int _coercion_cache_num = 0; +static coercion_cache_obj *_coercion_cache_cache[COERCION_CACHE_CACHE_SIZE]; + +/* + * Steals a reference to the object. + */ +static NPY_INLINE int +npy_new_coercion_cache( + PyObject *converted_obj, PyObject *arr_or_sequence, npy_bool sequence, + coercion_cache_obj ***next_ptr, int ndim) +{ + coercion_cache_obj *cache; + if (_coercion_cache_num > 0) { + _coercion_cache_num--; + cache = _coercion_cache_cache[_coercion_cache_num]; + } + else { + cache = PyObject_MALLOC(sizeof(coercion_cache_obj)); + } + if (cache == NULL) { + PyErr_NoMemory(); + return -1; + } + cache->converted_obj = converted_obj; + cache->arr_or_sequence = arr_or_sequence; + cache->sequence = sequence; + cache->depth = ndim; + cache->next = NULL; + **next_ptr = cache; + *next_ptr = &(cache->next); + return 0; +} + +/** + * Unlink coercion cache item. + * + * @param current + * @return next coercion cache object (or NULL) + */ +NPY_NO_EXPORT NPY_INLINE coercion_cache_obj * +npy_unlink_coercion_cache(coercion_cache_obj *current) +{ + coercion_cache_obj *next = current->next; + Py_DECREF(current->arr_or_sequence); + if (_coercion_cache_num < COERCION_CACHE_CACHE_SIZE) { + _coercion_cache_cache[_coercion_cache_num] = current; + _coercion_cache_num++; + } + else { + PyObject_FREE(current); + } + return next; +} + +NPY_NO_EXPORT NPY_INLINE void +npy_free_coercion_cache(coercion_cache_obj *next) { + /* We only need to check from the last used cache pos */ + while (next != NULL) { + next = npy_unlink_coercion_cache(next); + } +} + +#undef COERCION_CACHE_CACHE_SIZE + +/** + * Do the promotion step and possible casting. This function should + * never be called if a descriptor was requested. In that case the output + * dtype is not of importance, so we must not risk promotion errors. + * + * @param out_descr The current descriptor. + * @param descr The newly found descriptor to promote with + * @param flags dtype discover flags to signal failed promotion. + * @return -1 on error, 0 on success. + */ +static NPY_INLINE int +handle_promotion(PyArray_Descr **out_descr, PyArray_Descr *descr, + enum _dtype_discovery_flags *flags) +{ + assert(!(*flags & DESCRIPTOR_WAS_SET)); + + if (*out_descr == NULL) { + Py_INCREF(descr); + *out_descr = descr; + return 0; + } + PyArray_Descr *new_descr = PyArray_PromoteTypes(descr, *out_descr); + if (new_descr == NULL) { + PyErr_Clear(); + *flags |= PROMOTION_FAILED; + /* Continue with object, since we may need the dimensionality */ + new_descr = PyArray_DescrFromType(NPY_OBJECT); + } + Py_SETREF(*out_descr, new_descr); + return 0; +} + + +/** + * Handle a leave node (known scalar) during dtype and shape discovery. + * + * @param obj The python object or nested sequence to convert + * @param max_dims The maximum number of dimensions. + * @param curr_dims The current number of dimensions (depth in the recursion) + * @param out_shape The discovered output shape, will be filled + * @param coercion_cache The coercion cache object to use. + * @param DType the DType class that should be used, or NULL, if not provided. + * @param flags used signal that this is a ragged array, used internally and + * can be expanded if necessary. + */ +static NPY_INLINE int +handle_scalar( + PyObject *obj, int curr_dims, int *max_dims, + PyArray_Descr **out_descr, npy_intp *out_shape, + PyArray_DTypeMeta *fixed_DType, + enum _dtype_discovery_flags *flags, PyArray_DTypeMeta *DType) +{ + PyArray_Descr *descr; + + if (update_shape(curr_dims, max_dims, out_shape, + 0, NULL, NPY_FALSE, flags) < 0) { + *flags |= FOUND_RAGGED_ARRAY; + return *max_dims; + } + if (*flags & DESCRIPTOR_WAS_SET) { + /* no need to do any promotion */ + return *max_dims; + } + /* This is a scalar, so find the descriptor */ + descr = find_scalar_descriptor(fixed_DType, DType, obj); + if (descr == NULL) { + return -1; + } + if (handle_promotion(out_descr, descr, flags) < 0) { + Py_DECREF(descr); + return -1; + } + Py_DECREF(descr); + return *max_dims; +} + + +/** + * Return the correct descriptor given an array object and a DType class. + * + * This is identical to casting the arrays descriptor/dtype to the new + * DType class + * + * @param arr The array object. + * @param DType The DType class to cast to (or NULL for convenience) + * @param out_descr The output descriptor will set. The result can be NULL + * when the array is of object dtype and has no elements. + * + * @return -1 on failure, 0 on success. + */ +static int +find_descriptor_from_array( + PyArrayObject *arr, PyArray_DTypeMeta *DType, PyArray_Descr **out_descr) +{ + enum _dtype_discovery_flags flags = 0; + *out_descr = NULL; + + if (NPY_UNLIKELY(DType != NULL && DType->parametric && + PyArray_ISOBJECT(arr))) { + /* + * We have one special case, if (and only if) the input array is of + * object DType and the dtype is not fixed already but parametric. + * Then, we allow inspection of all elements, treating them as + * elements. We do this recursively, so nested 0-D arrays can work, + * but nested higher dimensional arrays will lead to an error. + */ + assert(DType->type_num != NPY_OBJECT); /* not parametric */ + + PyArrayIterObject *iter; + iter = (PyArrayIterObject *)PyArray_IterNew((PyObject *)arr); + if (iter == NULL) { + return -1; + } + while (iter->index < iter->size) { + PyArray_DTypeMeta *item_DType; + /* + * Note: If the array contains typed objects we may need to use + * the dtype to use casting for finding the correct instance. + */ + PyObject *elem = PyArray_GETITEM(arr, iter->dataptr); + if (elem == NULL) { + Py_DECREF(iter); + return -1; + } + item_DType = discover_dtype_from_pyobject(elem, &flags, DType); + if (item_DType == NULL) { + Py_DECREF(iter); + Py_DECREF(elem); + return -1; + } + if (item_DType == (PyArray_DTypeMeta *)Py_None) { + Py_SETREF(item_DType, NULL); + } + int flat_max_dims = 0; + if (handle_scalar(elem, 0, &flat_max_dims, out_descr, + NULL, DType, &flags, item_DType) < 0) { + Py_DECREF(iter); + Py_DECREF(elem); + Py_XDECREF(item_DType); + return -1; + } + Py_XDECREF(item_DType); + Py_DECREF(elem); + PyArray_ITER_NEXT(iter); + } + Py_DECREF(iter); + } + else if (DType != NULL && NPY_UNLIKELY(DType->type_num == NPY_DATETIME) && + PyArray_ISSTRING(arr)) { + /* + * TODO: This branch should be deprecated IMO, the workaround is + * to cast to the object to a string array. Although a specific + * function (if there is even any need) would be better. + * This is value based casting! + * Unless of course we actually want to support this kind of thing + * in general (not just for object dtype)... + */ + PyArray_DatetimeMetaData meta; + meta.base = NPY_FR_GENERIC; + meta.num = 1; + + if (find_string_array_datetime64_type(arr, &meta) < 0) { + return -1; + } + else { + *out_descr = create_datetime_dtype(NPY_DATETIME, &meta); + if (*out_descr == NULL) { + return -1; + } + } + } + else { + /* + * If this is not an object array figure out the dtype cast, + * or simply use the returned DType. + */ + *out_descr = cast_descriptor_to_fixed_dtype( + PyArray_DESCR(arr), DType); + if (*out_descr == NULL) { + return -1; + } + } + return 0; +} + +/** + * Given a dtype or DType object, find the correct descriptor to cast the + * array to. + * + * This function is identical to normal casting using only the dtype, however, + * it supports inspecting the elements when the array has object dtype + * (and the given datatype describes a parametric DType class). + * + * @param arr + * @param dtype A dtype instance or class. + * @return A concrete dtype instance or NULL + */ +NPY_NO_EXPORT PyArray_Descr * +PyArray_AdaptDescriptorToArray(PyArrayObject *arr, PyObject *dtype) +{ + /* If the requested dtype is flexible, adapt it */ + PyArray_Descr *new_dtype; + PyArray_DTypeMeta *new_DType; + int res; + + res = PyArray_ExtractDTypeAndDescriptor((PyObject *)dtype, + &new_dtype, &new_DType); + if (res < 0) { + return NULL; + } + if (new_dtype == NULL) { + res = find_descriptor_from_array(arr, new_DType, &new_dtype); + if (res < 0) { + Py_DECREF(new_DType); + return NULL; + } + if (new_dtype == NULL) { + /* This is an object array but contained no elements, use default */ + new_dtype = new_DType->default_descr(new_DType); + } + } + Py_DECREF(new_DType); + return new_dtype; +} + + +/** + * Recursion helper for `PyArray_DiscoverDTypeAndShape`. See its + * documentation for additional details. + * + * @param obj The current (possibly nested) object + * @param curr_dims The current depth, i.e. initially 0 and increasing. + * @param max_dims Maximum number of dimensions, modified during discovery. + * @param out_descr dtype instance (or NULL) to promoted and update. + * @param out_shape The current shape (updated) + * @param coercion_cache_tail_ptr The tail of the linked list of coercion + * cache objects, which hold on to converted sequences and arrays. + * This is a pointer to the `->next` slot of the previous cache so + * that we can append a new cache object (and update this pointer). + * (Initially it is a pointer to the user-provided head pointer). + * @param fixed_DType User provided fixed DType class + * @param flags Discovery flags (reporting and behaviour flags, see def.) + * @return The updated number of maximum dimensions (i.e. scalars will set + * this to the current dimensions). + */ +NPY_NO_EXPORT int +PyArray_DiscoverDTypeAndShape_Recursive( + PyObject *obj, int curr_dims, int max_dims, PyArray_Descr**out_descr, + npy_intp out_shape[NPY_MAXDIMS], + coercion_cache_obj ***coercion_cache_tail_ptr, + PyArray_DTypeMeta *fixed_DType, enum _dtype_discovery_flags *flags) +{ + PyArrayObject *arr = NULL; + PyObject *seq; + + /* + * The first step is to find the DType class if it was not provided, + * alternatively we have to find out that this is not a scalar at all + * (which could fail and lead us to `object` dtype). + */ + PyArray_DTypeMeta *DType = NULL; + + if (NPY_UNLIKELY(*flags & DISCOVER_STRINGS_AS_SEQUENCES)) { + /* + * We currently support that bytes/strings are considered sequences, + * if the dtype is np.dtype('c'), this should be deprecated probably, + * but requires hacks right now. + */ + if (PyBytes_Check(obj) && PyBytes_Size(obj) != 1) { + goto force_sequence_due_to_char_dtype; + } + else if (PyUnicode_Check(obj) && PyUnicode_GetLength(obj) != 1) { + goto force_sequence_due_to_char_dtype; + } + } + + /* If this is a known scalar, find the corresponding DType class */ + DType = discover_dtype_from_pyobject(obj, flags, fixed_DType); + if (DType == NULL) { + return -1; + } + else if (DType == (PyArray_DTypeMeta *)Py_None) { + Py_DECREF(Py_None); + } + else { + max_dims = handle_scalar( + obj, curr_dims, &max_dims, out_descr, out_shape, fixed_DType, + flags, DType); + Py_DECREF(DType); + return max_dims; + } + + /* + * At this point we expect to find either a sequence, or an array-like. + * Although it is still possible that this fails and we have to use + * `object`. + */ + if (PyArray_Check(obj)) { + arr = (PyArrayObject *)obj; + Py_INCREF(arr); + } + else { + PyArray_Descr *requested_descr = NULL; + if (*flags & DESCRIPTOR_WAS_SET) { + /* __array__ may be passed the requested descriptor if provided */ + requested_descr = *out_descr; + } + arr = (PyArrayObject *)_array_from_array_like(obj, + requested_descr, 0, NULL); + if (arr == NULL) { + return -1; + } + else if (arr == (PyArrayObject *)Py_NotImplemented) { + Py_DECREF(arr); + arr = NULL; + } + } + if (arr != NULL) { + /* + * This is an array object which will be added to the cache, keeps + * the reference to the array alive (takes ownership). + */ + if (npy_new_coercion_cache(obj, (PyObject *)arr, + 0, coercion_cache_tail_ptr, curr_dims) < 0) { + return -1; + } + + if (curr_dims == 0) { + /* + * Special case for reverse broadcasting, ignore max_dims if this + * is a single array-like object; needed for PyArray_CopyObject. + */ + memcpy(out_shape, PyArray_SHAPE(arr), + PyArray_NDIM(arr) * sizeof(npy_intp)); + max_dims = PyArray_NDIM(arr); + } + else if (update_shape(curr_dims, &max_dims, out_shape, + PyArray_NDIM(arr), PyArray_SHAPE(arr), NPY_FALSE, flags) < 0) { + *flags |= FOUND_RAGGED_ARRAY; + return max_dims; + } + + if (*flags & DESCRIPTOR_WAS_SET) { + return max_dims; + } + /* + * For arrays we may not just need to cast the dtype to the user + * provided fixed_DType. If this is an object array, the elements + * may need to be inspected individually. + * Note, this finds the descriptor of the array first and only then + * promotes here (different associativity). + */ + PyArray_Descr *cast_descr; + if (find_descriptor_from_array(arr, fixed_DType, &cast_descr) < 0) { + return -1; + } + if (cast_descr == NULL) { + /* object array with no elements, no need to promote/adjust. */ + return max_dims; + } + if (handle_promotion(out_descr, cast_descr, flags) < 0) { + Py_DECREF(cast_descr); + return -1; + } + Py_DECREF(cast_descr); + return max_dims; + } + + /* + * The last step is to assume the input should be handled as a sequence + * and to handle it recursively. That is, unless we have hit the + * dimension limit. + */ + npy_bool is_sequence = (PySequence_Check(obj) && PySequence_Size(obj) >= 0); + if (NPY_UNLIKELY(*flags & DISCOVER_TUPLES_AS_ELEMENTS) && + PyTuple_Check(obj)) { + is_sequence = NPY_FALSE; + } + if (curr_dims == max_dims || !is_sequence) { + /* Clear any PySequence_Size error which would corrupts further calls */ + PyErr_Clear(); + max_dims = handle_scalar( + obj, curr_dims, &max_dims, out_descr, out_shape, fixed_DType, + flags, NULL); + if (is_sequence) { + /* Flag as ragged or too deep array */ + *flags |= FOUND_RAGGED_ARRAY; + } + return max_dims; + } + /* If we stop supporting bytes/str subclasses, more may be required here: */ + assert(!PyBytes_Check(obj) && !PyUnicode_Check(obj)); + + force_sequence_due_to_char_dtype: + + /* Ensure we have a sequence (required for PyPy) */ + seq = PySequence_Fast(obj, "Could not convert object to sequence"); + if (seq == NULL) { + /* + * Specifically do not fail on things that look like a dictionary, + * instead treat them as scalar. + */ + if (PyErr_ExceptionMatches(PyExc_KeyError)) { + PyErr_Clear(); + max_dims = handle_scalar( + obj, curr_dims, &max_dims, out_descr, out_shape, fixed_DType, + flags, NULL); + return max_dims; + } + return -1; + } + /* The cache takes ownership of the sequence here. */ + if (npy_new_coercion_cache(obj, seq, 1, coercion_cache_tail_ptr, curr_dims) < 0) { + return -1; + } + + npy_intp size = PySequence_Fast_GET_SIZE(seq); + PyObject **objects = PySequence_Fast_ITEMS(seq); + + if (update_shape(curr_dims, &max_dims, + out_shape, 1, &size, NPY_TRUE, flags) < 0) { + /* But do update, if there this is a ragged case */ + *flags |= FOUND_RAGGED_ARRAY; + return max_dims; + } + if (size == 0) { + /* If the sequence is empty, this must be the last dimension */ + *flags |= MAX_DIMS_WAS_REACHED; + return curr_dims + 1; + } + + /* Recursive call for each sequence item */ + for (Py_ssize_t i = 0; i < size; i++) { + max_dims = PyArray_DiscoverDTypeAndShape_Recursive( + objects[i], curr_dims + 1, max_dims, + out_descr, out_shape, coercion_cache_tail_ptr, fixed_DType, + flags); + + if (max_dims < 0) { + return -1; + } + } + return max_dims; +} + + +/** + * Finds the DType and shape of an arbitrary nested sequence. This is the + * general purpose function to find the parameters of the array (but not + * the array itself) as returned by `np.array()` + * + * Note: Before considering to make part of this public, we should consider + * whether things such as `out_descr != NULL` should be supported in + * a public API. + * + * @param obj Scalar or nested sequences. + * @param max_dims Maximum number of dimensions (after this scalars are forced) + * @param out_shape Will be filled with the output shape (more than the actual + * shape may be written). + * @param coercion_cache NULL initialized reference to a cache pointer. + * May be set to the first coercion_cache, and has to be freed using + * npy_free_coercion_cache. + * This should be stored in a thread-safe manner (i.e. function static) + * and is designed to be consumed by `PyArray_AssignFromCache`. + * If not consumed, must be freed using `npy_free_coercion_cache`. + * @param fixed_DType A user provided fixed DType class. + * @param requested_descr A user provided fixed descriptor. This is always + * returned as the discovered descriptor, but currently only used + * for the ``__array__`` protocol. + * @param out_descr Set to the discovered output descriptor. This may be + * non NULL but only when fixed_DType/requested_descr are not given. + * If non NULL, it is the first dtype being promoted and used if there + * are no elements. + * The result may be unchanged (remain NULL) when converting a + * sequence with no elements. In this case it is callers responsibility + * to choose a default. + * @return dimensions of the discovered object or -1 on error. + * WARNING: If (and only if) the output is a single array, the ndim + * returned _can_ exceed the maximum allowed number of dimensions. + * It might be nice to deprecate this? But it allows things such as + * `arr1d[...] = np.array([[1,2,3,4]])` + */ +NPY_NO_EXPORT int +PyArray_DiscoverDTypeAndShape( + PyObject *obj, int max_dims, + npy_intp out_shape[NPY_MAXDIMS], + coercion_cache_obj **coercion_cache, + PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr, + PyArray_Descr **out_descr) +{ + coercion_cache_obj **coercion_cache_head = coercion_cache; + *coercion_cache = NULL; + enum _dtype_discovery_flags flags = 0; + + /* + * Support a passed in descriptor (but only if nothing was specified). + */ + assert(*out_descr == NULL || fixed_DType == NULL); + /* Validate input of requested descriptor and DType */ + if (fixed_DType != NULL) { + assert(PyObject_TypeCheck( + (PyObject *)fixed_DType, (PyTypeObject *)&PyArrayDTypeMeta_Type)); + } + + if (requested_descr != NULL) { + assert(fixed_DType == NPY_DTYPE(requested_descr)); + /* The output descriptor must be the input. */ + Py_INCREF(requested_descr); + *out_descr = requested_descr; + flags |= DESCRIPTOR_WAS_SET; + } + + /* + * Call the recursive function, the setup for this may need expanding + * to handle caching better. + */ + + /* Legacy discovery flags */ + if (requested_descr != NULL) { + if (requested_descr->type_num == NPY_STRING && + requested_descr->type == 'c') { + /* Character dtype variation of string (should be deprecated...) */ + flags |= DISCOVER_STRINGS_AS_SEQUENCES; + } + else if (requested_descr->type_num == NPY_VOID && + (requested_descr->names || requested_descr->subarray)) { + /* Void is a chimera, in that it may or may not be structured... */ + flags |= DISCOVER_TUPLES_AS_ELEMENTS; + } + } + + int ndim = PyArray_DiscoverDTypeAndShape_Recursive( + obj, 0, max_dims, out_descr, out_shape, &coercion_cache, + fixed_DType, &flags); + if (ndim < 0) { + goto fail; + } + + if (NPY_UNLIKELY(flags & FOUND_RAGGED_ARRAY)) { + /* + * If max-dims was reached and the dimensions reduced, this is ragged. + * Otherwise, we merely reached the maximum dimensions, which is + * slightly different. This happens for example for `[1, [2, 3]]` + * where the maximum dimensions is 1, but then a sequence found. + * + * In this case we need to inform the user and clean out the cache + * since it may be too deep. + */ + + /* Handle reaching the maximum depth differently: */ + int too_deep = ndim == max_dims; + + if (fixed_DType == NULL) { + /* This is discovered as object, but deprecated */ + static PyObject *visibleDeprecationWarning = NULL; + npy_cache_import( + "numpy", "VisibleDeprecationWarning", + &visibleDeprecationWarning); + if (visibleDeprecationWarning == NULL) { + goto fail; + } + if (!too_deep) { + /* NumPy 1.19, 2019-11-01 */ + if (PyErr_WarnEx(visibleDeprecationWarning, + "Creating an ndarray from ragged nested sequences (which " + "is a list-or-tuple of lists-or-tuples-or ndarrays with " + "different lengths or shapes) is deprecated. If you " + "meant to do this, you must specify 'dtype=object' " + "when creating the ndarray.", 1) < 0) { + goto fail; + } + } + else { + /* NumPy 1.20, 2020-05-08 */ + /* Note, max_dims should normally always be NPY_MAXDIMS here */ + if (PyErr_WarnFormat(visibleDeprecationWarning, 1, + "Creating an ndarray from nested sequences exceeding " + "the maximum number of dimensions of %d is deprecated. " + "If you mean to do this, you must specify " + "'dtype=object' when creating the ndarray.", + max_dims) < 0) { + goto fail; + } + } + /* Ensure that ragged arrays always return object dtype */ + Py_XSETREF(*out_descr, PyArray_DescrFromType(NPY_OBJECT)); + } + else if (fixed_DType->type_num != NPY_OBJECT) { + /* Only object DType supports ragged cases unify error */ + if (!too_deep) { + PyObject *shape = PyArray_IntTupleFromIntp(ndim, out_shape); + PyErr_Format(PyExc_ValueError, + "setting an array element with a sequence. The " + "requested array has an inhomogeneous shape after " + "%d dimensions. The detected shape was " + "%R + inhomogeneous part.", + ndim, shape); + Py_DECREF(shape); + goto fail; + } + else { + PyErr_Format(PyExc_ValueError, + "setting an array element with a sequence. The " + "requested array would exceed the maximum number of " + "dimension of %d.", + max_dims); + goto fail; + } + } + + /* + * If the array is ragged, the cache may be too deep, so clean it. + * The cache is left at the same depth as the array though. + */ + coercion_cache_obj **next_ptr = coercion_cache_head; + coercion_cache_obj *current = *coercion_cache_head; /* item to check */ + while (current != NULL) { + if (current->depth > ndim) { + /* delete "next" cache item and advanced it (unlike later) */ + current = npy_unlink_coercion_cache(current); + continue; + } + /* advance both prev and next, and set prev->next to new item */ + *next_ptr = current; + next_ptr = &(current->next); + current = current->next; + } + *next_ptr = NULL; + } + /* We could check here for max-ndims being reached as well */ + + if (requested_descr != NULL) { + /* descriptor was provided, we did not accidentally change it */ + assert(*out_descr == requested_descr); + } + else if (NPY_UNLIKELY(*out_descr == NULL)) { + /* + * When the object contained no elements (sequence of length zero), + * the no descriptor may have been found. When a DType was requested + * we use it to define the output dtype. + * Otherwise, out_descr will remain NULL and the caller has to set + * the correct default. + */ + if (fixed_DType != NULL) { + if (fixed_DType->default_descr == NULL) { + Py_INCREF(fixed_DType->singleton); + *out_descr = fixed_DType->singleton; + } + else { + *out_descr = fixed_DType->default_descr(fixed_DType); + if (*out_descr == NULL) { + goto fail; + } + } + } + } + return ndim; + + fail: + npy_free_coercion_cache(*coercion_cache_head); + *coercion_cache_head = NULL; + Py_XSETREF(*out_descr, NULL); + return -1; +} + + + +/** + * Check the descriptor is a legacy "flexible" DType instance, this is + * an instance which is (normally) not attached to an array, such as a string + * of length 0 or a datetime with no unit. + * These should be largely deprecated, and represent only the DType class + * for most `dtype` parameters. + * + * TODO: This function should eventually recieve a deprecation warning and + * be removed. + * + * @param descr + * @return 1 if this is not a concrete dtype instance 0 otherwise + */ +static int +descr_is_legacy_parametric_instance(PyArray_Descr *descr) +{ + if (PyDataType_ISUNSIZED(descr)) { + return 1; + } + /* Flexible descr with generic time unit (which can be adapted) */ + if (PyDataType_ISDATETIME(descr)) { + PyArray_DatetimeMetaData *meta; + meta = get_datetime_metadata_from_dtype(descr); + if (meta->base == NPY_FR_GENERIC) { + return 1; + } + } + return 0; +} + + +/** + * Given either a DType instance or class, (or legacy flexible instance), + * ands sets output dtype instance and DType class. Both results may be + * NULL, but if `out_descr` is set `out_DType` will always be the + * corresponding class. + * + * @param dtype + * @param out_descr + * @param out_DType + * @return 0 on success -1 on failure + */ +NPY_NO_EXPORT int +PyArray_ExtractDTypeAndDescriptor(PyObject *dtype, + PyArray_Descr **out_descr, PyArray_DTypeMeta **out_DType) +{ + *out_DType = NULL; + *out_descr = NULL; + + if (dtype != NULL) { + if (PyObject_TypeCheck(dtype, (PyTypeObject *)&PyArrayDTypeMeta_Type)) { + assert(dtype != (PyObject * )&PyArrayDescr_Type); /* not np.dtype */ + *out_DType = (PyArray_DTypeMeta *)dtype; + Py_INCREF(*out_DType); + } + else if (PyObject_TypeCheck((PyObject *)Py_TYPE(dtype), + (PyTypeObject *)&PyArrayDTypeMeta_Type)) { + *out_DType = NPY_DTYPE(dtype); + Py_INCREF(*out_DType); + if (!descr_is_legacy_parametric_instance((PyArray_Descr *)dtype)) { + *out_descr = (PyArray_Descr *)dtype; + Py_INCREF(*out_descr); + } + } + else { + PyErr_SetString(PyExc_TypeError, + "dtype parameter must be a DType instance or class."); + return -1; + } + } + return 0; +} + + +/* + * Python API function to expose the dtype+shape discovery functionality + * directly. + */ +NPY_NO_EXPORT PyObject * +_discover_array_parameters(PyObject *NPY_UNUSED(self), + PyObject *args, PyObject *kwargs) +{ + static char *kwlist[] = {"obj", "dtype", NULL}; + + PyObject *obj; + PyObject *dtype = NULL; + PyArray_Descr *fixed_descriptor = NULL; + PyArray_DTypeMeta *fixed_DType = NULL; + npy_intp shape[NPY_MAXDIMS]; + + if (!PyArg_ParseTupleAndKeywords( + args, kwargs, "O|O:_discover_array_parameters", kwlist, + &obj, &dtype)) { + return NULL; + } + + if (PyArray_ExtractDTypeAndDescriptor(dtype, + &fixed_descriptor, &fixed_DType) < 0) { + return NULL; + } + + coercion_cache_obj *coercion_cache = NULL; + PyObject *out_dtype = NULL; + int ndim = PyArray_DiscoverDTypeAndShape( + obj, NPY_MAXDIMS, shape, + &coercion_cache, + fixed_DType, fixed_descriptor, (PyArray_Descr **)&out_dtype); + Py_XDECREF(fixed_DType); + Py_XDECREF(fixed_descriptor); + if (ndim < 0) { + return NULL; + } + npy_free_coercion_cache(coercion_cache); + if (out_dtype == NULL) { + /* Empty sequence, report this as None. */ + out_dtype = Py_None; + Py_INCREF(Py_None); + } + + PyObject *shape_tuple = PyArray_IntTupleFromIntp(ndim, shape); + if (shape_tuple == NULL) { + return NULL; + } + + PyObject *res = PyTuple_Pack(2, (PyObject *)out_dtype, shape_tuple); + Py_DECREF(out_dtype); + Py_DECREF(shape_tuple); + return res; +} diff --git a/numpy/core/src/multiarray/array_coercion.h b/numpy/core/src/multiarray/array_coercion.h new file mode 100644 index 000000000000..90ce0355a11c --- /dev/null +++ b/numpy/core/src/multiarray/array_coercion.h @@ -0,0 +1,58 @@ +#ifndef _NPY_ARRAY_COERCION_H +#define _NPY_ARRAY_COERCION_H + + +/* + * We do not want to coerce arrays many times unless absolutely necessary. + * The same goes for sequences, so everything we have seen, we will have + * to store somehow. This is a linked list of these objects. + */ +typedef struct coercion_cache_obj { + PyObject *converted_obj; + PyObject *arr_or_sequence; + struct coercion_cache_obj *next; + npy_bool sequence; + int depth; /* the dimension at which this object was found. */ +} coercion_cache_obj; + + +NPY_NO_EXPORT int +_PyArray_MapPyTypeToDType( + PyArray_DTypeMeta *DType, PyTypeObject *pytype, npy_bool userdef); + +NPY_NO_EXPORT int +PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value); + +NPY_NO_EXPORT PyArray_Descr * +PyArray_AdaptDescriptorToArray(PyArrayObject *arr, PyObject *dtype); + +NPY_NO_EXPORT int +PyArray_DiscoverDTypeAndShape( + PyObject *obj, int max_dims, + npy_intp out_shape[NPY_MAXDIMS], + coercion_cache_obj **coercion_cache, + PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr, + PyArray_Descr **out_descr); + +NPY_NO_EXPORT int +PyArray_ExtractDTypeAndDescriptor(PyObject *dtype, + PyArray_Descr **out_descr, PyArray_DTypeMeta **out_DType); + +NPY_NO_EXPORT PyObject * +_discover_array_parameters(PyObject *NPY_UNUSED(self), + PyObject *args, PyObject *kwargs); + + +/* Would make sense to inline the freeing functions everywhere */ +/* Frees the coercion cache object recursively. */ +NPY_NO_EXPORT void +npy_free_coercion_cache(coercion_cache_obj *first); + +/* unlink a single item and return the next */ +NPY_NO_EXPORT coercion_cache_obj * +npy_unlink_coercion_cache(coercion_cache_obj *current); + +NPY_NO_EXPORT int +PyArray_AssignFromCache(PyArrayObject *self, coercion_cache_obj *cache); + +#endif /* _NPY_ARRAY_COERCION_H */ diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index dedaf38eb6d1..95c6506744ab 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -43,6 +43,7 @@ maintainer email: oliphant.travis@ieee.org #include "arrayobject.h" #include "conversion_utils.h" #include "ctors.h" +#include "dtypemeta.h" #include "methods.h" #include "descriptor.h" #include "iterators.h" @@ -57,6 +58,7 @@ maintainer email: oliphant.travis@ieee.org #include "strfuncs.h" #include "binop_override.h" +#include "array_coercion.h" /*NUMPY_API Compute the size of an array (in number of items) @@ -235,136 +237,96 @@ PyArray_SetBaseObject(PyArrayObject *arr, PyObject *obj) } +/** + * Assign an arbitrary object a NumPy array. This is largely basically + * identical to PyArray_FromAny, but assigns directly to the output array. + * + * @param dest Array to be written to + * @param src_object Object to be assigned, array-coercion rules apply. + * @return 0 on success -1 on failures. + */ /*NUMPY_API*/ NPY_NO_EXPORT int PyArray_CopyObject(PyArrayObject *dest, PyObject *src_object) { int ret = 0; - PyArrayObject *src; + PyArrayObject *view; PyArray_Descr *dtype = NULL; - int ndim = 0; + int ndim; npy_intp dims[NPY_MAXDIMS]; + coercion_cache_obj *cache = NULL; - Py_INCREF(src_object); /* - * Special code to mimic Numeric behavior for - * character arrays. + * We have to set the maximum number of dimensions here to support + * sequences within object arrays. */ - if (PyArray_DESCR(dest)->type == NPY_CHARLTR && - PyArray_NDIM(dest) > 0 && - PyString_Check(src_object)) { - npy_intp n_new, n_old; - char *new_string; - PyObject *tmp; + ndim = PyArray_DiscoverDTypeAndShape(src_object, + PyArray_NDIM(dest), dims, &cache, + NPY_DTYPE(PyArray_DESCR(dest)), PyArray_DESCR(dest), &dtype); + if (ndim < 0) { + return -1; + } - n_new = PyArray_DIMS(dest)[PyArray_NDIM(dest)-1]; - n_old = PyString_Size(src_object); - if (n_new > n_old) { - new_string = malloc(n_new); - if (new_string == NULL) { - Py_DECREF(src_object); - PyErr_NoMemory(); - return -1; - } - memcpy(new_string, PyString_AS_STRING(src_object), n_old); - memset(new_string + n_old, ' ', n_new - n_old); - tmp = PyString_FromStringAndSize(new_string, n_new); - free(new_string); - Py_DECREF(src_object); - src_object = tmp; - } + if (cache != NULL && !(cache->sequence)) { + /* The input is an array or array object, so assign directly */ + assert(cache->converted_obj == src_object); + view = (PyArrayObject *)cache->arr_or_sequence; + Py_DECREF(dtype); + ret = PyArray_AssignArray(dest, view, NULL, NPY_UNSAFE_CASTING); + npy_free_coercion_cache(cache); + return ret; } /* - * Get either an array object we can copy from, or its parameters - * if there isn't a convenient array available. + * We may need to broadcast, due to shape mismatches, in this case + * create a temporary array first, and assign that after filling + * it from the sequences/scalar. */ - if (PyArray_GetArrayParamsFromObject_int(src_object, - PyArray_DESCR(dest), 0, &dtype, &ndim, dims, &src) < 0) { - Py_DECREF(src_object); - return -1; + if (ndim != PyArray_NDIM(dest) || + !PyArray_CompareLists(PyArray_DIMS(dest), dims, ndim)) { + /* + * Broadcasting may be necessary, so assign to a view first. + * This branch could lead to a shape mismatch error later. + */ + assert (ndim <= PyArray_NDIM(dest)); /* would error during discovery */ + view = (PyArrayObject *) PyArray_NewFromDescr( + &PyArray_Type, dtype, ndim, dims, NULL, NULL, + PyArray_FLAGS(dest) & NPY_ARRAY_F_CONTIGUOUS, NULL); + if (view == NULL) { + npy_free_coercion_cache(cache); + return -1; + } + } + else { + Py_DECREF(dtype); + view = dest; } - /* If it's not an array, either assign from a sequence or as a scalar */ - if (src == NULL) { - /* If the input is scalar */ - if (ndim == 0) { - /* If there's one dest element and src is a Python scalar */ - if (PyArray_IsScalar(src_object, Generic)) { - char *value; - int retcode; - - value = scalar_value(src_object, dtype); - if (value == NULL) { - Py_DECREF(dtype); - Py_DECREF(src_object); - return -1; - } - - /* TODO: switch to SAME_KIND casting */ - retcode = PyArray_AssignRawScalar(dest, dtype, value, - NULL, NPY_UNSAFE_CASTING); - Py_DECREF(dtype); - Py_DECREF(src_object); - return retcode; - } - /* Otherwise use the dtype's setitem function */ - else { - if (PyArray_SIZE(dest) == 1) { - Py_DECREF(dtype); - Py_DECREF(src_object); - ret = PyArray_SETITEM(dest, PyArray_DATA(dest), src_object); - return ret; - } - else { - src = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, - dtype, 0, NULL, NULL, - NULL, 0, NULL); - if (src == NULL) { - Py_DECREF(src_object); - return -1; - } - if (PyArray_SETITEM(src, PyArray_DATA(src), src_object) < 0) { - Py_DECREF(src_object); - Py_DECREF(src); - return -1; - } - } - } + /* Assign the values to `view` (whichever array that is) */ + if (cache == NULL) { + /* single (non-array) item, assign immediately */ + if (PyArray_Pack( + PyArray_DESCR(view), PyArray_DATA(view), src_object) < 0) { + goto fail; } - else { - /* - * If there are more than enough dims, use AssignFromSequence - * because it can handle this style of broadcasting. - */ - if (ndim >= PyArray_NDIM(dest)) { - int res; - Py_DECREF(dtype); - res = PyArray_AssignFromSequence(dest, src_object); - Py_DECREF(src_object); - return res; - } - /* Otherwise convert to an array and do an array-based copy */ - src = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, - dtype, ndim, dims, NULL, NULL, - PyArray_ISFORTRAN(dest), NULL); - if (src == NULL) { - Py_DECREF(src_object); - return -1; - } - if (PyArray_AssignFromSequence(src, src_object) < 0) { - Py_DECREF(src); - Py_DECREF(src_object); - return -1; - } + } + else { + if (PyArray_AssignFromCache(view, cache) < 0) { + goto fail; } } - - /* If it's an array, do a move (handling possible overlapping data) */ - ret = PyArray_MoveInto(dest, src); - Py_DECREF(src); - Py_DECREF(src_object); + if (view == dest) { + return 0; + } + ret = PyArray_AssignArray(dest, view, NULL, NPY_UNSAFE_CASTING); + Py_DECREF(view); return ret; + + fail: + if (view != dest) { + Py_DECREF(view); + } + return -1; } diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 55ae7377907a..2abc79167cb9 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -9,6 +9,7 @@ #include "npy_pycompat.h" #include "common.h" +#include "abstractdtypes.h" #include "usertypes.h" #include "common.h" @@ -16,6 +17,7 @@ #include "get_attr_string.h" #include "mem_overlap.h" +#include "array_coercion.h" /* * The casting to use for implicit assignment operations resulting from @@ -44,88 +46,19 @@ _array_find_python_scalar_type(PyObject *op) else if (PyComplex_Check(op)) { return PyArray_DescrFromType(NPY_CDOUBLE); } - else if (PyInt_Check(op)) { - /* bools are a subclass of int */ - if (PyBool_Check(op)) { - return PyArray_DescrFromType(NPY_BOOL); - } - else { - return PyArray_DescrFromType(NPY_LONG); - } - } else if (PyLong_Check(op)) { - /* check to see if integer can fit into a longlong or ulonglong - and return that --- otherwise return object */ - if ((PyLong_AsLongLong(op) == -1) && PyErr_Occurred()) { - PyErr_Clear(); - } - else { - return PyArray_DescrFromType(NPY_LONGLONG); - } - - if ((PyLong_AsUnsignedLongLong(op) == (unsigned long long) -1) - && PyErr_Occurred()){ - PyErr_Clear(); - } - else { - return PyArray_DescrFromType(NPY_ULONGLONG); - } - - return PyArray_DescrFromType(NPY_OBJECT); + return PyArray_PyIntAbstractDType.discover_descr_from_pyobject( + &PyArray_PyIntAbstractDType, op); } return NULL; } -/* - * These constants are used to signal that the recursive dtype determination in - * PyArray_DTypeFromObject encountered a string type, and that the recursive - * search must be restarted so that string representation lengths can be - * computed for all scalar types. - */ -#define RETRY_WITH_STRING 1 -#define RETRY_WITH_UNICODE 2 - -/* - * Recursively examines the object to determine an appropriate dtype - * to use for converting to an ndarray. - * - * 'obj' is the object to be converted to an ndarray. - * - * 'maxdims' is the maximum recursion depth. - * - * 'out_dtype' should be either NULL or a minimal starting dtype when - * the function is called. It is updated with the results of type - * promotion. This dtype does not get updated when processing NA objects. - * This is reset to NULL on failure. - * - * Returns 0 on success, -1 on failure. - */ - NPY_NO_EXPORT int -PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype) -{ - int res; - - res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_dtype, 0); - if (res == RETRY_WITH_STRING) { - res = PyArray_DTypeFromObjectHelper(obj, maxdims, - out_dtype, NPY_STRING); - if (res == RETRY_WITH_UNICODE) { - res = PyArray_DTypeFromObjectHelper(obj, maxdims, - out_dtype, NPY_UNICODE); - } - } - else if (res == RETRY_WITH_UNICODE) { - res = PyArray_DTypeFromObjectHelper(obj, maxdims, - out_dtype, NPY_UNICODE); - } - return res; -} /* * Get a suitable string dtype by calling `__str__`. * For `np.bytes_`, this assumes an ASCII encoding. */ -static PyArray_Descr * +NPY_NO_EXPORT PyArray_Descr * PyArray_DTypeFromObjectStringDiscovery( PyObject *obj, PyArray_Descr *last_dtype, int string_type) { @@ -159,8 +92,8 @@ PyArray_DTypeFromObjectStringDiscovery( return NULL; } if (last_dtype != NULL && - last_dtype->type_num == string_type && - last_dtype->elsize >= itemsize) { + last_dtype->type_num == string_type && + last_dtype->elsize >= itemsize) { Py_INCREF(last_dtype); return last_dtype; } @@ -172,348 +105,28 @@ PyArray_DTypeFromObjectStringDiscovery( return dtype; } + +/* + * This function is now identical to the new PyArray_DiscoverDTypeAndShape + * but only returns the the dtype. It should in most cases be slowly phased + * out. (Which may need some refactoring to PyArray_FromAny to make it simpler) + */ NPY_NO_EXPORT int -PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims, - PyArray_Descr **out_dtype, int string_type) +PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype) { - int i, size; - PyArray_Descr *dtype = NULL; - PyObject *ip; - Py_buffer buffer_view; - /* types for sequence handling */ - PyObject ** objects; - PyObject * seq; - PyTypeObject * common_type; - - /* Check if it's an ndarray */ - if (PyArray_Check(obj)) { - dtype = PyArray_DESCR((PyArrayObject *)obj); - Py_INCREF(dtype); - goto promote_types; - } - - /* See if it's a python None */ - if (obj == Py_None) { - dtype = PyArray_DescrFromType(NPY_OBJECT); - if (dtype == NULL) { - goto fail; - } - goto promote_types; - } - /* Check if it's a NumPy scalar */ - else if (PyArray_IsScalar(obj, Generic)) { - if (!string_type) { - dtype = PyArray_DescrFromScalar(obj); - if (dtype == NULL) { - goto fail; - } - } - else { - dtype = PyArray_DTypeFromObjectStringDiscovery( - obj, *out_dtype, string_type); - if (dtype == NULL) { - goto fail; - } - - /* nothing to do, dtype is already correct */ - if (dtype == *out_dtype){ - Py_DECREF(dtype); - return 0; - } - } - goto promote_types; - } - - /* Check if it's a Python scalar */ - dtype = _array_find_python_scalar_type(obj); - if (dtype != NULL) { - if (string_type) { - /* dtype is not used in this (string discovery) branch */ - Py_DECREF(dtype); - dtype = PyArray_DTypeFromObjectStringDiscovery( - obj, *out_dtype, string_type); - if (dtype == NULL) { - goto fail; - } - - /* nothing to do, dtype is already correct */ - if (dtype == *out_dtype){ - Py_DECREF(dtype); - return 0; - } - } - goto promote_types; - } - - /* Check if it's an ASCII string */ - if (PyBytes_Check(obj)) { - int itemsize = PyString_GET_SIZE(obj); - - /* If it's already a big enough string, don't bother type promoting */ - if (*out_dtype != NULL && - (*out_dtype)->type_num == NPY_STRING && - (*out_dtype)->elsize >= itemsize) { - return 0; - } - dtype = PyArray_DescrNewFromType(NPY_STRING); - if (dtype == NULL) { - goto fail; - } - dtype->elsize = itemsize; - goto promote_types; - } - - /* Check if it's a Unicode string */ - if (PyUnicode_Check(obj)) { - int itemsize = PyUnicode_GetLength(obj); - if (itemsize < 0) { - goto fail; - } - itemsize *= 4; + coercion_cache_obj *cache = NULL; + npy_intp shape[NPY_MAXDIMS]; + int ndim; - /* - * If it's already a big enough unicode object, - * don't bother type promoting - */ - if (*out_dtype != NULL && - (*out_dtype)->type_num == NPY_UNICODE && - (*out_dtype)->elsize >= itemsize) { - return 0; - } - dtype = PyArray_DescrNewFromType(NPY_UNICODE); - if (dtype == NULL) { - goto fail; - } - dtype->elsize = itemsize; - goto promote_types; - } - - /* PEP 3118 buffer interface */ - if (PyObject_CheckBuffer(obj) == 1) { - memset(&buffer_view, 0, sizeof(Py_buffer)); - if (PyObject_GetBuffer(obj, &buffer_view, - PyBUF_FORMAT|PyBUF_STRIDES) == 0 || - PyObject_GetBuffer(obj, &buffer_view, - PyBUF_FORMAT|PyBUF_SIMPLE) == 0) { - - PyErr_Clear(); - dtype = _descriptor_from_pep3118_format(buffer_view.format); - PyBuffer_Release(&buffer_view); - if (dtype) { - goto promote_types; - } - } - else if (PyObject_GetBuffer(obj, &buffer_view, PyBUF_STRIDES) == 0 || - PyObject_GetBuffer(obj, &buffer_view, PyBUF_SIMPLE) == 0) { - - PyErr_Clear(); - dtype = PyArray_DescrNewFromType(NPY_VOID); - dtype->elsize = buffer_view.itemsize; - PyBuffer_Release(&buffer_view); - goto promote_types; - } - else { - PyErr_Clear(); - } - } - - /* The array interface */ - ip = PyArray_LookupSpecial_OnInstance(obj, "__array_interface__"); - if (ip != NULL) { - if (PyDict_Check(ip)) { - PyObject *typestr; - PyObject *tmp = NULL; - typestr = _PyDict_GetItemStringWithError(ip, "typestr"); - if (typestr == NULL && PyErr_Occurred()) { - goto fail; - } - /* Allow unicode type strings */ - if (typestr && PyUnicode_Check(typestr)) { - tmp = PyUnicode_AsASCIIString(typestr); - typestr = tmp; - } - if (typestr && PyBytes_Check(typestr)) { - dtype =_array_typedescr_fromstr(PyBytes_AS_STRING(typestr)); - if (tmp == typestr) { - Py_DECREF(tmp); - } - Py_DECREF(ip); - if (dtype == NULL) { - goto fail; - } - goto promote_types; - } - } - Py_DECREF(ip); - } - else if (PyErr_Occurred()) { - PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */ - } - - - /* The array struct interface */ - ip = PyArray_LookupSpecial_OnInstance(obj, "__array_struct__"); - if (ip != NULL) { - PyArrayInterface *inter; - char buf[40]; - - if (NpyCapsule_Check(ip)) { - inter = (PyArrayInterface *)NpyCapsule_AsVoidPtr(ip); - if (inter->two == 2) { - PyOS_snprintf(buf, sizeof(buf), - "|%c%d", inter->typekind, inter->itemsize); - dtype = _array_typedescr_fromstr(buf); - Py_DECREF(ip); - if (dtype == NULL) { - goto fail; - } - goto promote_types; - } - } - Py_DECREF(ip); - } - else if (PyErr_Occurred()) { - PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */ - } - - /* The __array__ attribute */ - ip = PyArray_LookupSpecial_OnInstance(obj, "__array__"); - if (ip != NULL) { - Py_DECREF(ip); - ip = PyObject_CallMethod(obj, "__array__", NULL); - if(ip && PyArray_Check(ip)) { - dtype = PyArray_DESCR((PyArrayObject *)ip); - Py_INCREF(dtype); - Py_DECREF(ip); - goto promote_types; - } - Py_XDECREF(ip); - if (PyErr_Occurred()) { - goto fail; - } - } - else if (PyErr_Occurred()) { - PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */ - } - - /* - * If we reached the maximum recursion depth without hitting one - * of the above cases, and obj isn't a sequence-like object, the output - * dtype should be either OBJECT or a user-defined type. - * - * Note that some libraries define sequence-like classes but want them to - * be treated as objects, and they expect numpy to treat it as an object if - * __len__ is not defined. - */ - if (maxdims == 0 || !PySequence_Check(obj) || PySequence_Size(obj) < 0) { - /* clear any PySequence_Size error which corrupts further calls */ - PyErr_Clear(); - - if (*out_dtype == NULL || (*out_dtype)->type_num != NPY_OBJECT) { - Py_XDECREF(*out_dtype); - *out_dtype = PyArray_DescrFromType(NPY_OBJECT); - if (*out_dtype == NULL) { - return -1; - } - } - return 0; - } - - /* - * The C-API recommends calling PySequence_Fast before any of the other - * PySequence_Fast* functions. This is required for PyPy - */ - seq = PySequence_Fast(obj, "Could not convert object to sequence"); - if (seq == NULL) { - goto fail; - } - - /* Recursive case, first check the sequence contains only one type */ - size = PySequence_Fast_GET_SIZE(seq); - /* objects is borrowed, do not release seq */ - objects = PySequence_Fast_ITEMS(seq); - common_type = size > 0 ? Py_TYPE(objects[0]) : NULL; - for (i = 1; i < size; ++i) { - if (Py_TYPE(objects[i]) != common_type) { - common_type = NULL; - break; - } - } - - /* all types are the same and scalar, one recursive call is enough */ - if (common_type != NULL && !string_type && - (common_type == &PyFloat_Type || -/* TODO: we could add longs if we add a range check */ - common_type == &PyBool_Type || - common_type == &PyComplex_Type)) { - size = 1; - } - - /* Recursive call for each sequence item */ - for (i = 0; i < size; ++i) { - int res = PyArray_DTypeFromObjectHelper(objects[i], maxdims - 1, - out_dtype, string_type); - if (res < 0) { - Py_DECREF(seq); - goto fail; - } - else if (res > 0) { - Py_DECREF(seq); - return res; - } + ndim = PyArray_DiscoverDTypeAndShape( + obj, maxdims, shape, &cache, NULL, NULL, out_dtype); + if (ndim < 0) { + return -1; } - - Py_DECREF(seq); - + npy_free_coercion_cache(cache); return 0; - - -promote_types: - /* Set 'out_dtype' if it's NULL */ - if (*out_dtype == NULL) { - if (!string_type && dtype->type_num == NPY_STRING) { - Py_DECREF(dtype); - return RETRY_WITH_STRING; - } - if (!string_type && dtype->type_num == NPY_UNICODE) { - Py_DECREF(dtype); - return RETRY_WITH_UNICODE; - } - *out_dtype = dtype; - return 0; - } - /* Do type promotion with 'out_dtype' */ - else { - PyArray_Descr *res_dtype = PyArray_PromoteTypes(dtype, *out_dtype); - Py_DECREF(dtype); - if (res_dtype == NULL) { - goto fail; - } - if (!string_type && - res_dtype->type_num == NPY_UNICODE && - (*out_dtype)->type_num != NPY_UNICODE) { - Py_DECREF(res_dtype); - return RETRY_WITH_UNICODE; - } - if (!string_type && - res_dtype->type_num == NPY_STRING && - (*out_dtype)->type_num != NPY_STRING) { - Py_DECREF(res_dtype); - return RETRY_WITH_STRING; - } - Py_DECREF(*out_dtype); - *out_dtype = res_dtype; - return 0; - } - -fail: - Py_XDECREF(*out_dtype); - *out_dtype = NULL; - return -1; } -#undef RETRY_WITH_STRING -#undef RETRY_WITH_UNICODE /* new reference */ NPY_NO_EXPORT PyArray_Descr * diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index 4ba25c079d42..793cefaf8367 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -20,6 +20,11 @@ #define NPY_BEGIN_THREADS_NDITER(iter) #endif + +NPY_NO_EXPORT PyArray_Descr * +PyArray_DTypeFromObjectStringDiscovery( + PyObject *obj, PyArray_Descr *last_dtype, int string_type); + /* * Recursively examines the object to determine an appropriate dtype * to use for converting to an ndarray. diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 7bd0886774ee..94cd1e5fa193 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -12,8 +12,10 @@ #include "npy_pycompat.h" #include "numpy/npy_math.h" +#include "array_coercion.h" #include "common.h" #include "ctors.h" +#include "dtypemeta.h" #include "scalartypes.h" #include "mapping.h" @@ -47,11 +49,11 @@ PyArray_CastToType(PyArrayObject *arr, PyArray_Descr *dtype, int is_f_order) { PyObject *out; - /* If the requested dtype is flexible, adapt it */ - dtype = PyArray_AdaptFlexibleDType((PyObject *)arr, PyArray_DESCR(arr), dtype); + Py_SETREF(dtype, PyArray_AdaptDescriptorToArray(arr, (PyObject *)dtype)); if (dtype == NULL) { return NULL; } + out = PyArray_NewFromDescr(Py_TYPE(arr), dtype, PyArray_NDIM(arr), PyArray_DIMS(arr), @@ -128,24 +130,22 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num) } /* + * Legacy function to find the correct dtype when casting from any built-in + * dtype to NPY_STRING, NPY_UNICODE, NPY_VOID, and NPY_DATETIME with generic + * units. + * * This function returns a dtype based on flex_dtype and the values in - * data_dtype and data_obj. It also calls Py_DECREF on the flex_dtype. If the + * data_dtype. It also calls Py_DECREF on the flex_dtype. If the * flex_dtype is not flexible, it returns it as-is. * * Usually, if data_obj is not an array, dtype should be the result * given by the PyArray_GetArrayParamsFromObject function. * - * The data_obj may be NULL if just a dtype is known for the source. - * * If *flex_dtype is NULL, returns immediately, without setting an * exception, leaving any previous error handling intact. - * - * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID, - * and NPY_DATETIME with generic units. */ NPY_NO_EXPORT PyArray_Descr * -PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, - PyArray_Descr *flex_dtype) +PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype) { PyArray_DatetimeMetaData *meta; PyArray_Descr *retval = NULL; @@ -227,73 +227,6 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, break; case NPY_OBJECT: size = 64; - if ((flex_type_num == NPY_STRING || - flex_type_num == NPY_UNICODE) && - data_obj != NULL) { - PyObject *list; - - if (PyArray_CheckScalar(data_obj)) { - list = PyArray_ToList((PyArrayObject *)data_obj); - if (list != NULL) { - PyObject *s = PyObject_Str(list); - if (s == NULL) { - Py_DECREF(list); - Py_DECREF(retval); - return NULL; - } - else { - size = PyObject_Length(s); - Py_DECREF(s); - } - Py_DECREF(list); - } - } - else if (PyArray_Check(data_obj)) { - /* - * Convert data array to list of objects since - * GetArrayParamsFromObject won't iterate over - * array. - */ - PyArray_Descr *dtype = NULL; - PyArrayObject *arr = NULL; - int result; - int ndim = 0; - npy_intp dims[NPY_MAXDIMS]; - list = PyArray_ToList((PyArrayObject *)data_obj); - result = PyArray_GetArrayParamsFromObject_int( - list, - retval, - 0, &dtype, - &ndim, dims, &arr); - Py_DECREF(list); - Py_XDECREF(arr); - if (result < 0) { - Py_XDECREF(dtype); - Py_DECREF(retval); - return NULL; - } - if (result == 0 && dtype != NULL) { - if (flex_type_num == NPY_UNICODE) { - size = dtype->elsize / 4; - } - else { - size = dtype->elsize; - } - } - Py_XDECREF(dtype); - } - else if (PyArray_IsPythonScalar(data_obj)) { - PyObject *s = PyObject_Str(data_obj); - if (s == NULL) { - Py_DECREF(retval); - return NULL; - } - else { - size = PyObject_Length(s); - Py_DECREF(s); - } - } - } break; case NPY_STRING: case NPY_VOID: @@ -353,12 +286,6 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, retval = create_datetime_dtype(flex_type_num, meta); Py_DECREF(flex_dtype); } - else if (data_obj != NULL) { - /* Detect the unit from the input's data */ - retval = find_object_datetime_type(data_obj, - flex_type_num); - Py_DECREF(flex_dtype); - } } } else { @@ -1292,7 +1219,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) PyArray_Descr *temp = PyArray_DescrNew(type1); PyDataType_MAKEUNSIZED(temp); - temp = PyArray_AdaptFlexibleDType(NULL, type2, temp); + temp = PyArray_AdaptFlexibleDType(type2, temp); if (temp == NULL) { return NULL; } @@ -1333,7 +1260,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) PyArray_Descr *ret = NULL; PyArray_Descr *temp = PyArray_DescrNew(type1); PyDataType_MAKEUNSIZED(temp); - temp = PyArray_AdaptFlexibleDType(NULL, type2, temp); + temp = PyArray_AdaptFlexibleDType(type2, temp); if (temp == NULL) { return NULL; } @@ -1384,7 +1311,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) PyArray_Descr *ret = NULL; PyArray_Descr *temp = PyArray_DescrNew(type2); PyDataType_MAKEUNSIZED(temp); - temp = PyArray_AdaptFlexibleDType(NULL, type1, temp); + temp = PyArray_AdaptFlexibleDType(type1, temp); if (temp == NULL) { return NULL; } @@ -1404,7 +1331,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) PyArray_Descr *ret = NULL; PyArray_Descr *temp = PyArray_DescrNew(type2); PyDataType_MAKEUNSIZED(temp); - temp = PyArray_AdaptFlexibleDType(NULL, type1, temp); + temp = PyArray_AdaptFlexibleDType(type1, temp); if (temp == NULL) { return NULL; } @@ -2154,7 +2081,6 @@ PyArray_ObjectType(PyObject *op, int minimum_type) return NPY_NOTYPE; } } - if (PyArray_DTypeFromObject(op, NPY_MAXDIMS, &dtype) < 0) { return NPY_NOTYPE; } @@ -2162,6 +2088,19 @@ PyArray_ObjectType(PyObject *op, int minimum_type) if (dtype == NULL) { ret = NPY_DEFAULT_TYPE; } + else if (!NPY_DTYPE(dtype)->legacy) { + /* + * TODO: If we keep all type number style API working, by defining + * type numbers always. We may be able to allow this again. + */ + PyErr_Format(PyExc_TypeError, + "This function currently only supports native NumPy dtypes " + "and old-style user dtypes, but the dtype was %S.\n" + "(The function may need to be updated to support arbitrary" + "user dtypes.)", + dtype); + ret = NPY_NOTYPE; + } else { ret = dtype->type_num; } diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index 4a7d851874e9..9b7f39db2e33 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -47,7 +47,6 @@ npy_set_invalid_cast_error( * and NPY_DATETIME with generic units. */ NPY_NO_EXPORT PyArray_Descr * -PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype, - PyArray_Descr *flex_dtype); +PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype); #endif diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index ccebe9da6c3a..0c4ffe141916 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -30,6 +30,7 @@ #include #include "get_attr_string.h" +#include "array_coercion.h" /* * Reading from a file or a string. @@ -52,9 +53,6 @@ typedef int (*next_element)(void **, void *, PyArray_Descr *, void *); typedef int (*skip_separator)(void **, const char *, void *); -static PyObject * -_array_from_array_like(PyObject *op, - PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context); static npy_bool string_is_fully_read(char const* start, char const* end) { @@ -455,420 +453,169 @@ copy_and_swap(void *dst, void *src, int itemsize, npy_intp numitems, } } + /* - * adapted from Numarray, - * a: destination array - * s: source object, array or sequence - * dim: current recursion dimension, must be 0 on first call - * dst: must be NULL on first call - * it is a view on the destination array viewing the place where to put the - * data of the current recursion + * Recursive helper to assign using a coercion cache. This function + * must consume the cache depth first, just as the cache was originally + * produced. */ -static int -setArrayFromSequence(PyArrayObject *a, PyObject *s, - int dim, PyArrayObject * dst) +NPY_NO_EXPORT int +PyArray_AssignFromCache_Recursive( + PyArrayObject *self, const int ndim, coercion_cache_obj **cache) { - Py_ssize_t i, slen; - int res = -1; - - /* first recursion, view equal destination */ - if (dst == NULL) - dst = a; + /* Consume first cache element by extracting information and freeing it */ + PyObject *original_obj = (*cache)->converted_obj; + PyObject *obj = (*cache)->arr_or_sequence; + Py_INCREF(obj); + npy_bool sequence = (*cache)->sequence; + int depth = (*cache)->depth; + *cache = npy_unlink_coercion_cache(*cache); /* - * This code is to ensure that the sequence access below will - * return a lower-dimensional sequence. + * The maximum depth is special (specifically for objects), but usually + * unrolled in the sequence branch below. */ - - /* INCREF on entry DECREF on exit */ - Py_INCREF(s); - - PyObject *seq = NULL; - - if (PyArray_Check(s)) { - if (!(PyArray_CheckExact(s))) { + if (NPY_UNLIKELY(depth == ndim)) { + /* + * We have reached the maximum depth. We should simply assign to the + * element in principle. There is one exception. If this is a 0-D + * array being stored into a 0-D array (but we do not reach here then). + */ + if (PyArray_ISOBJECT(self)) { + assert(ndim != 0); /* guaranteed by PyArray_AssignFromCache */ + assert(PyArray_NDIM(self) == 0); + Py_DECREF(obj); + return PyArray_Pack(PyArray_DESCR(self), PyArray_BYTES(self), + original_obj); + } + if (sequence) { /* - * make sure a base-class array is used so that the dimensionality - * reduction assumption is correct. + * Sanity check which may be removed, the error is raised already + * in `PyArray_DiscoverDTypeAndShape`. */ - /* This will DECREF(s) if replaced */ - s = PyArray_EnsureArray(s); - if (s == NULL) { - goto fail; - } - } - - /* dst points to correct array subsection */ - if (PyArray_CopyInto(dst, (PyArrayObject *)s) < 0) { + assert(0); + PyErr_SetString(PyExc_RuntimeError, + "setting an array element with a sequence"); goto fail; } - - Py_DECREF(s); - return 0; - } - - if (dim > PyArray_NDIM(a)) { - PyErr_Format(PyExc_ValueError, - "setArrayFromSequence: sequence/array dimensions mismatch."); - goto fail; + else if (original_obj != obj || !PyArray_CheckExact(obj)) { + /* + * If the leave node is an array-like, but not a numpy array, + * we pretend it is an arbitrary scalar. This means that in + * most cases (where the dtype is int or float), we will end + * up using float(array-like), or int(array-like). That does + * not support general casting, but helps Quantity and masked + * arrays, because it allows them to raise an error when + * `__float__()` or `__int__()` is called. + */ + Py_DECREF(obj); + return PyArray_SETITEM(self, PyArray_BYTES(self), original_obj); + } } - /* Try __array__ before using s as a sequence */ - PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL); - if (tmp == NULL) { - goto fail; - } - else if (tmp == Py_NotImplemented) { - Py_DECREF(tmp); + /* The element is either a sequence, or an array */ + if (!sequence) { + /* Straight forward array assignment */ + assert(PyArray_Check(obj)); + if (PyArray_CopyInto(self, (PyArrayObject *)obj) < 0) { + goto fail; + } } else { - int r = PyArray_CopyInto(dst, (PyArrayObject *)tmp); - Py_DECREF(tmp); - if (r < 0) { + assert(depth != ndim); + npy_intp length = PySequence_Length(obj); + if (length != PyArray_DIMS(self)[0]) { + PyErr_SetString(PyExc_RuntimeError, + "Inconsistent object during array creation? " + "Content of sequences changed (length inconsistent)."); goto fail; } - Py_DECREF(s); - return 0; - } - - seq = PySequence_Fast(s, "Could not convert object to sequence"); - if (seq == NULL) { - goto fail; - } - slen = PySequence_Fast_GET_SIZE(seq); - /* - * Either the dimensions match, or the sequence has length 1 and can - * be broadcast to the destination. - */ - if (slen != PyArray_DIMS(a)[dim] && slen != 1) { - PyErr_Format(PyExc_ValueError, - "cannot copy sequence with size %zd to array axis " - "with dimension %" NPY_INTP_FMT, slen, PyArray_DIMS(a)[dim]); - goto fail; - } + for (npy_intp i = 0; i < length; i++) { + PyObject *value = PySequence_Fast_GET_ITEM(obj, i); - /* Broadcast the one element from the sequence to all the outputs */ - if (slen == 1) { - PyObject *o = PySequence_Fast_GET_ITEM(seq, 0); - npy_intp alen = PyArray_DIM(a, dim); - - for (i = 0; i < alen; i++) { - if ((PyArray_NDIM(a) - dim) > 1) { - PyArrayObject * tmp = - (PyArrayObject *)array_item_asarray(dst, i); - if (tmp == NULL) { + if (*cache == NULL || (*cache)->converted_obj != value || + (*cache)->depth != depth + 1) { + if (ndim != depth + 1) { + PyErr_SetString(PyExc_RuntimeError, + "Inconsistent object during array creation? " + "Content of sequences changed (now too shallow)."); goto fail; } - - res = setArrayFromSequence(a, o, dim+1, tmp); - Py_DECREF(tmp); - } - else { - char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]); - res = PyArray_SETITEM(dst, b, o); - } - if (res < 0) { - goto fail; - } - } - } - /* Copy element by element */ - else { - for (i = 0; i < slen; i++) { - PyObject * o = PySequence_Fast_GET_ITEM(seq, i); - if ((PyArray_NDIM(a) - dim) > 1) { - PyArrayObject * tmp = - (PyArrayObject *)array_item_asarray(dst, i); - if (tmp == NULL) { + /* Straight forward assignment of elements */ + char *item; + item = (PyArray_BYTES(self) + i * PyArray_STRIDES(self)[0]); + if (PyArray_Pack(PyArray_DESCR(self), item, value) < 0) { goto fail; } - - res = setArrayFromSequence(a, o, dim+1, tmp); - Py_DECREF(tmp); - } - else { - char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]); - res = PyArray_SETITEM(dst, b, o); - } - if (res < 0) { - goto fail; - } - } - } - - Py_DECREF(seq); - Py_DECREF(s); - return 0; - - fail: - Py_XDECREF(seq); - Py_DECREF(s); - return res; -} - -NPY_NO_EXPORT int -PyArray_AssignFromSequence(PyArrayObject *self, PyObject *v) -{ - if (!PySequence_Check(v)) { - PyErr_SetString(PyExc_ValueError, - "assignment from non-sequence"); - return -1; - } - if (PyArray_NDIM(self) == 0) { - PyErr_SetString(PyExc_ValueError, - "assignment to 0-d array"); - return -1; - } - return setArrayFromSequence(self, v, 0, NULL); -} - -/* - * The rest of this code is to build the right kind of array - * from a python object. - */ - -static int -discover_itemsize(PyObject *s, int nd, int *itemsize, int string_type) -{ - int r; - npy_intp n, i; - - if (PyArray_Check(s)) { - *itemsize = PyArray_MAX(*itemsize, PyArray_ITEMSIZE((PyArrayObject *)s)); - return 0; - } - - if ((nd == 0) || PyString_Check(s) || - PyMemoryView_Check(s) || PyUnicode_Check(s)) { - /* If an object has no length, leave it be */ - if (string_type && s != NULL && - !PyString_Check(s) && !PyUnicode_Check(s)) { - PyObject *s_string = NULL; - if (string_type == NPY_STRING) { - s_string = PyObject_Str(s); } else { - s_string = PyObject_Str(s); - } - if (s_string) { - n = PyObject_Length(s_string); - Py_DECREF(s_string); - } - else { - n = -1; + PyArrayObject *view; + view = (PyArrayObject *)array_item_asarray(self, i); + if (view < 0) { + goto fail; + } + if (PyArray_AssignFromCache_Recursive(view, ndim, cache) < 0) { + Py_DECREF(view); + goto fail; + } + Py_DECREF(view); } } - else { - n = PyObject_Length(s); - } - if (n == -1) { - PyErr_Clear(); - } - else { - *itemsize = PyArray_MAX(*itemsize, n); - } - return 0; } - - n = PySequence_Length(s); - for (i = 0; i < n; i++) { - PyObject *e = PySequence_GetItem(s,i); - - if (e == NULL) { - return -1; - } - - r = discover_itemsize(e, nd - 1, itemsize, string_type); - Py_DECREF(e); - if (r == -1) { - return -1; - } - } - + Py_DECREF(obj); return 0; -} - - -typedef enum { - DISCOVERED_OK = 0, - DISCOVERED_RAGGED = 1, - DISCOVERED_OBJECT = 2 -} discovered_t; - -static void -_discover_dimensions_array(PyArrayObject *arr, int *maxndim, npy_intp *d) { - if (PyArray_NDIM(arr) < *maxndim) { - *maxndim = PyArray_NDIM(arr); - } - for (int i = 0; i < *maxndim; i++) { - d[i] = PyArray_DIM(arr, i); - } + fail: + Py_DECREF(obj); + return -1; } -/* - * Take an arbitrary object and discover how many dimensions it - * has, filling in the dimensions as we go. +/** + * Fills an item based on a coercion cache object. It consumes the cache + * object while doing so. + * + * @param self Array to fill. + * @param cache coercion_cache_object, will be consumed. The cache must not + * contain a single array (must start with a sequence). The array case + * should be handled by `PyArray_FromArray()` before. + * @return 0 on success -1 on failure. */ -static int -discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it, - int stop_at_string, int stop_at_tuple, - discovered_t *out_is_object) -{ - PyObject *e; - npy_intp n, i; - PyObject * seq; - - if (*maxndim == 0) { - return 0; - } - - /* obj is an Array */ - if (PyArray_Check(obj)) { - _discover_dimensions_array((PyArrayObject *)obj, maxndim, d); - return 0; - } - - /* obj is a Scalar */ - if (PyArray_IsScalar(obj, Generic)) { - *maxndim = 0; - return 0; - } - - /* obj is not a Sequence */ - if (!PySequence_Check(obj) || - PySequence_Length(obj) < 0) { - *maxndim = 0; - PyErr_Clear(); - return 0; - } - - /* obj is a String */ - if (PyString_Check(obj) || - PyUnicode_Check(obj)) { - if (stop_at_string) { - *maxndim = 0; - } - else { - d[0] = PySequence_Length(obj); - *maxndim = 1; - } - return 0; - } - - /* obj is a Tuple, but tuples aren't expanded */ - if (stop_at_tuple && PyTuple_Check(obj)) { - *maxndim = 0; - return 0; - } - +NPY_NO_EXPORT int +PyArray_AssignFromCache(PyArrayObject *self, coercion_cache_obj *cache) { + int ndim = PyArray_NDIM(self); /* - * In the future, the result of `_array_from_array_like` should possibly - * be cached. This may require passing the correct dtype/writable - * information already in the dimension discovery step (if they are - * distinct steps). + * Do not support ndim == 0 now with an array in the cache. + * The ndim == 0 is special because np.array(np.array(0), dtype=object) + * should unpack the inner array. + * Since the single-array case is special, it is handled previously + * in either case. */ - e = _array_from_array_like(obj, NULL, NPY_FALSE, NULL); - if (e == Py_NotImplemented) { - Py_DECREF(e); - } - else if (e != NULL) { - _discover_dimensions_array((PyArrayObject *)e, maxndim, d); - Py_DECREF(e); - return 0; - } - else if (PyErr_Occurred()) { - /* TODO[gh-14801]: propagate crashes during attribute access? */ - PyErr_Clear(); - } - - seq = PySequence_Fast(obj, "Could not convert object to sequence"); - if (seq == NULL) { - /* - * PySequence_Check detects whether an old type object is a - * sequence by the presence of the __getitem__ attribute, and - * for new type objects that aren't dictionaries by the - * presence of the __len__ attribute as well. In either case it - * is possible to have an object that tests as a sequence but - * doesn't behave as a sequence and consequently, the - * PySequence_GetItem call can fail. When that happens and the - * object looks like a dictionary, we truncate the dimensions - * and set the object creation flag, otherwise we pass the - * error back up the call chain. - */ - if (PyErr_ExceptionMatches(PyExc_KeyError)) { - PyErr_Clear(); - *maxndim = 0; - *out_is_object = DISCOVERED_OBJECT; - return 0; - } - else { - return -1; - } - } - n = PySequence_Fast_GET_SIZE(seq); + assert(cache->sequence); + assert(ndim != 0); /* guaranteed if cache contains a sequence */ - d[0] = n; - - /* 1-dimensional sequence */ - if (n == 0 || *maxndim == 1) { - *maxndim = 1; - Py_DECREF(seq); - return 0; + if (PyArray_AssignFromCache_Recursive(self, ndim, &cache) < 0) { + /* free the remaining cache. */ + npy_free_coercion_cache(cache); + return -1; } - else { - int all_elems_maxndim = *maxndim - 1; - npy_intp *all_elems_d = d + 1; - int all_dimensions_match = 1; - - /* Get the dimensions of the first item as a baseline */ - PyObject *first = PySequence_Fast_GET_ITEM(seq, 0); - if (discover_dimensions( - first, &all_elems_maxndim, all_elems_d, check_it, - stop_at_string, stop_at_tuple, out_is_object) < 0) { - Py_DECREF(seq); - return -1; - } - - /* Compare the dimensions of all the remaining items */ - for (i = 1; i < n; ++i) { - int j; - int elem_maxndim = *maxndim - 1; - npy_intp elem_d[NPY_MAXDIMS]; - - PyObject *elem = PySequence_Fast_GET_ITEM(seq, i); - if (discover_dimensions( - elem, &elem_maxndim, elem_d, check_it, - stop_at_string, stop_at_tuple, out_is_object) < 0) { - Py_DECREF(seq); - return -1; - } - /* Find the number of left-dimensions which match, j */ - for (j = 0; j < elem_maxndim && j < all_elems_maxndim; ++j) { - if (elem_d[j] != all_elems_d[j]) { - break; - } - } - if (j != elem_maxndim || j != all_elems_maxndim) { - all_dimensions_match = 0; - } - all_elems_maxndim = j; - } - *maxndim = all_elems_maxndim + 1; - if (!all_dimensions_match) { - /* typically results in an array containing variable-length lists */ - *out_is_object = DISCOVERED_RAGGED; - } + /* + * Sanity check, this is the initial call, and when it returns, the + * cache has to be fully consumed, otherwise something is wrong. + * NOTE: May be nicer to put into a recursion helper. + */ + if (cache != NULL) { + PyErr_SetString(PyExc_RuntimeError, + "Inconsistent object during array creation? " + "Content of sequences changed (cache not consumed)."); + return -1; } - - Py_DECREF(seq); - return 0; } + static void raise_memory_error(int nd, npy_intp const *dims, PyArray_Descr *descr) { @@ -1518,7 +1265,7 @@ _array_from_buffer_3118(PyObject *memoryview) * or NULL with an error set. (A new reference to Py_NotImplemented * is returned.) */ -static PyObject * +NPY_NO_EXPORT PyObject * _array_from_array_like(PyObject *op, PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context) { PyObject* tmp; @@ -1598,313 +1345,25 @@ _array_from_array_like(PyObject *op, } -/* - * Retrieves the array parameters for viewing/converting an arbitrary - * PyObject* to a NumPy array. This allows the "innate type and shape" - * of Python list-of-lists to be discovered without - * actually converting to an array. - * - * In some cases, such as structured arrays and the __array__ interface, - * a data type needs to be used to make sense of the object. When - * this is needed, provide a Descr for 'requested_dtype', otherwise - * provide NULL. This reference is not stolen. Also, if the requested - * dtype doesn't modify the interpretation of the input, out_dtype will - * still get the "innate" dtype of the object, not the dtype passed - * in 'requested_dtype'. - * - * If writing to the value in 'op' is desired, set the boolean - * 'writeable' to 1. This raises an error when 'op' is a scalar, list - * of lists, or other non-writeable 'op'. - * - * Result: When success (0 return value) is returned, either out_arr - * is filled with a non-NULL PyArrayObject and - * the rest of the parameters are untouched, or out_arr is - * filled with NULL, and the rest of the parameters are - * filled. - * - * Typical usage: - * - * PyArrayObject *arr = NULL; - * PyArray_Descr *dtype = NULL; - * int ndim = 0; - * npy_intp dims[NPY_MAXDIMS]; - * - * if (PyArray_GetArrayParamsFromObject(op, NULL, 1, &dtype, - * &ndim, dims, &arr, NULL) < 0) { - * return NULL; - * } - * if (arr == NULL) { - * ... validate/change dtype, validate flags, ndim, etc ... - * // Could make custom strides here too - * arr = PyArray_NewFromDescr(&PyArray_Type, dtype, ndim, - * dims, NULL, - * is_f_order ? NPY_ARRAY_F_CONTIGUOUS : 0, - * NULL); - * if (arr == NULL) { - * return NULL; - * } - * if (PyArray_CopyObject(arr, op) < 0) { - * Py_DECREF(arr); - * return NULL; - * } - * } - * else { - * ... in this case the other parameters weren't filled, just - * validate and possibly copy arr itself ... - * } - * ... use arr ... - */ +/*NUMPY_API*/ NPY_NO_EXPORT int -PyArray_GetArrayParamsFromObject_int(PyObject *op, - PyArray_Descr *requested_dtype, - npy_bool writeable, - PyArray_Descr **out_dtype, - int *out_ndim, npy_intp *out_dims, - PyArrayObject **out_arr) +PyArray_GetArrayParamsFromObject(PyObject *NPY_UNUSED(op), + PyArray_Descr *NPY_UNUSED(requested_dtype), + npy_bool NPY_UNUSED(writeable), + PyArray_Descr **NPY_UNUSED(out_dtype), + int *NPY_UNUSED(out_ndim), npy_intp *NPY_UNUSED(out_dims), + PyArrayObject **NPY_UNUSED(out_arr), PyObject *NPY_UNUSED(context)) { - PyObject *tmp; - - /* If op is an array */ - if (PyArray_Check(op)) { - if (writeable - && PyArray_FailUnlessWriteable((PyArrayObject *)op, "array") < 0) { - return -1; - } - Py_INCREF(op); - *out_arr = (PyArrayObject *)op; - return 0; - } - - /* If op is a NumPy scalar */ - if (PyArray_IsScalar(op, Generic)) { - if (writeable) { - PyErr_SetString(PyExc_RuntimeError, - "cannot write to scalar"); - return -1; - } - *out_dtype = PyArray_DescrFromScalar(op); - if (*out_dtype == NULL) { - return -1; - } - *out_ndim = 0; - *out_arr = NULL; - return 0; - } - - /* If op is a Python scalar */ - *out_dtype = _array_find_python_scalar_type(op); - if (*out_dtype != NULL) { - if (writeable) { - PyErr_SetString(PyExc_RuntimeError, - "cannot write to scalar"); - Py_DECREF(*out_dtype); - return -1; - } - *out_ndim = 0; - *out_arr = NULL; - return 0; - } - - /* If op is an array-like */ - tmp = _array_from_array_like(op, requested_dtype, writeable, NULL); - if (tmp == NULL) { - return -1; - } - else if (tmp != Py_NotImplemented) { - *out_arr = (PyArrayObject*) tmp; - return 0; - } - else { - Py_DECREF(Py_NotImplemented); - } - - /* Try to treat op as a list of lists */ - if (!writeable && PySequence_Check(op)) { - int check_it, stop_at_string, stop_at_tuple; - int type_num, type; - - /* - * Determine the type, using the requested data type if - * it will affect how the array is retrieved - */ - if (requested_dtype != NULL && ( - requested_dtype->type_num == NPY_STRING || - requested_dtype->type_num == NPY_UNICODE || - (requested_dtype->type_num == NPY_VOID && - (requested_dtype->names || requested_dtype->subarray)) || - requested_dtype->type == NPY_CHARLTR || - requested_dtype->type_num == NPY_OBJECT)) { - Py_INCREF(requested_dtype); - *out_dtype = requested_dtype; - } - else { - *out_dtype = NULL; - if (PyArray_DTypeFromObject(op, NPY_MAXDIMS, out_dtype) < 0) { - if (PyErr_ExceptionMatches(PyExc_MemoryError)) { - return -1; - } - /* Return NPY_OBJECT for most exceptions */ - else { - PyErr_Clear(); - *out_dtype = PyArray_DescrFromType(NPY_OBJECT); - if (*out_dtype == NULL) { - return -1; - } - } - } - if (*out_dtype == NULL) { - *out_dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE); - if (*out_dtype == NULL) { - return -1; - } - } - } - - type_num = (*out_dtype)->type_num; - type = (*out_dtype)->type; - - check_it = (type != NPY_CHARLTR); - stop_at_string = (type_num != NPY_STRING) || - (type == NPY_STRINGLTR); - stop_at_tuple = (type_num == NPY_VOID && - ((*out_dtype)->names || (*out_dtype)->subarray)); - - *out_ndim = NPY_MAXDIMS; - discovered_t is_object = DISCOVERED_OK; - if (discover_dimensions( - op, out_ndim, out_dims, check_it, - stop_at_string, stop_at_tuple, &is_object) < 0) { - Py_DECREF(*out_dtype); - if (PyErr_Occurred()) { - return -1; - } - *out_dtype = PyArray_DescrFromType(NPY_OBJECT); - if (*out_dtype == NULL) { - return -1; - } - *out_ndim = 0; - *out_arr = NULL; - return 0; - } - /* If object arrays are forced */ - if (is_object != DISCOVERED_OK) { - static PyObject *visibleDeprecationWarning = NULL; - npy_cache_import( - "numpy", "VisibleDeprecationWarning", - &visibleDeprecationWarning); - if (visibleDeprecationWarning == NULL) { - return -1; - } - if (is_object == DISCOVERED_RAGGED && requested_dtype == NULL) { - /* NumPy 1.19, 2019-11-01 */ - if (PyErr_WarnEx(visibleDeprecationWarning, "Creating an " - "ndarray from ragged nested sequences (which is a " - "list-or-tuple of lists-or-tuples-or ndarrays with " - "different lengths or shapes) is deprecated. If you " - "meant to do this, you must specify 'dtype=object' " - "when creating the ndarray", 1) < 0) - { - return -1; - } - } - /* either DISCOVERED_OBJECT or there is a requested_dtype */ - Py_DECREF(*out_dtype); - *out_dtype = PyArray_DescrFromType(NPY_OBJECT); - if (*out_dtype == NULL) { - return -1; - } - } - - if ((*out_dtype)->type == NPY_CHARLTR && (*out_ndim) > 0 && - out_dims[(*out_ndim) - 1] == 1) { - (*out_ndim) -= 1; - } - - /* If the type is flexible, determine its size */ - if (PyDataType_ISUNSIZED(*out_dtype) && - PyTypeNum_ISEXTENDED((*out_dtype)->type_num)) { - int itemsize = 0; - int string_type = 0; - if ((*out_dtype)->type_num == NPY_STRING || - (*out_dtype)->type_num == NPY_UNICODE) { - string_type = (*out_dtype)->type_num; - } - if (discover_itemsize(op, *out_ndim, &itemsize, string_type) < 0) { - Py_DECREF(*out_dtype); - if (PyErr_Occurred() && - PyErr_GivenExceptionMatches(PyErr_Occurred(), - PyExc_MemoryError)) { - return -1; - } - /* Say it's an OBJECT scalar if there's an error */ - PyErr_Clear(); - *out_dtype = PyArray_DescrFromType(NPY_OBJECT); - *out_ndim = 0; - *out_arr = NULL; - return 0; - } - if ((*out_dtype)->type_num == NPY_UNICODE) { - itemsize *= 4; - } - - if (itemsize != (*out_dtype)->elsize) { - PyArray_DESCR_REPLACE(*out_dtype); - (*out_dtype)->elsize = itemsize; - } - } - - *out_arr = NULL; - return 0; - } - - /* Anything can be viewed as an object, unless it needs to be writeable */ - if (!writeable) { - *out_dtype = PyArray_DescrFromType(NPY_OBJECT); - if (*out_dtype == NULL) { - return -1; - } - *out_ndim = 0; - *out_arr = NULL; - return 0; - } - + /* Deprecated in NumPy 1.19, removed in NumPy 1.20. */ PyErr_SetString(PyExc_RuntimeError, - "object cannot be viewed as a writeable numpy array"); + "PyArray_GetArrayParamsFromObject() C-API function is removed " + "`PyArray_FromAny()` should be used at this time. New C-API " + "may be exposed in the future (please do request this if it " + "would help you)."); return -1; } -/*NUMPY_API*/ -NPY_NO_EXPORT int -PyArray_GetArrayParamsFromObject(PyObject *op, - PyArray_Descr *requested_dtype, - npy_bool writeable, - PyArray_Descr **out_dtype, - int *out_ndim, npy_intp *out_dims, - PyArrayObject **out_arr, PyObject *context) -{ - /* NumPy 1.19, 2020-01-24 */ - if (DEPRECATE( - "PyArray_GetArrayParamsFromObject() C-API function is deprecated " - "and expected to be removed rapidly. If you are using it (i.e. see " - "this warning/error), please notify the NumPy developers. " - "As of now it is expected that any use case is served similarly " - "well by `PyArray_FromAny()` and this function is unused outside " - "of NumPy itself.") < 0) { - return -1; - } - - if (context != NULL) { - PyErr_SetString(PyExc_RuntimeError, "'context' must be NULL"); - return -1; - } - - return PyArray_GetArrayParamsFromObject_int(op, - requested_dtype, writeable, out_dtype, out_ndim, out_dims, - out_arr); -} - - /*NUMPY_API * Does not check for NPY_ARRAY_ENSURECOPY and NPY_ARRAY_NOTSWAPPED in flags * Steals a reference to newtype --- which can be NULL @@ -1919,6 +1378,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, */ PyArrayObject *arr = NULL, *ret; PyArray_Descr *dtype = NULL; + coercion_cache_obj *cache = NULL; int ndim = 0; npy_intp dims[NPY_MAXDIMS]; @@ -1927,124 +1387,104 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, return NULL; } - /* Get either the array or its parameters if it isn't an array */ - if (PyArray_GetArrayParamsFromObject_int(op, - newtype, 0, &dtype, &ndim, dims, &arr) < 0) { + PyArray_Descr *fixed_descriptor; + PyArray_DTypeMeta *fixed_DType; + if (PyArray_ExtractDTypeAndDescriptor((PyObject *)newtype, + &fixed_descriptor, &fixed_DType) < 0) { Py_XDECREF(newtype); return NULL; } + Py_XDECREF(newtype); - /* If the requested dtype is flexible, adapt it */ - if (newtype != NULL) { - newtype = PyArray_AdaptFlexibleDType((arr == NULL) ? op : (PyObject *)arr, - (dtype == NULL) ? PyArray_DESCR(arr) : dtype, - newtype); - if (newtype == NULL) { - return NULL; - } + ndim = PyArray_DiscoverDTypeAndShape(op, + NPY_MAXDIMS, dims, &cache, fixed_DType, fixed_descriptor, &dtype); + + Py_XDECREF(fixed_descriptor); + Py_XDECREF(fixed_DType); + if (ndim < 0) { + return NULL; + } + if (dtype == NULL) { + dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE); } - /* If we got dimensions and dtype instead of an array */ - if (arr == NULL) { - if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) || - (flags & NPY_ARRAY_UPDATEIFCOPY)) { - Py_DECREF(dtype); - Py_XDECREF(newtype); - PyErr_SetString(PyExc_TypeError, - "WRITEBACKIFCOPY used for non-array input."); - return NULL; - } - else if (min_depth != 0 && ndim < min_depth) { - Py_DECREF(dtype); - Py_XDECREF(newtype); - PyErr_SetString(PyExc_ValueError, - "object of too small depth for desired array"); - ret = NULL; - } - else if (max_depth != 0 && ndim > max_depth) { - Py_DECREF(dtype); - Py_XDECREF(newtype); - PyErr_SetString(PyExc_ValueError, - "object too deep for desired array"); - ret = NULL; - } - else if (ndim == 0 && PyArray_IsScalar(op, Generic)) { - ret = (PyArrayObject *)PyArray_FromScalar(op, newtype); - Py_DECREF(dtype); - } - else { - if (newtype == NULL) { - newtype = dtype; - } - else { - /* - * TODO: would be nice to do this too, but it's - * a behavior change. It's also a bit tricky - * for downcasting to small integer and float - * types, and might be better to modify - * PyArray_AssignFromSequence and descr->f->setitem - * to have a 'casting' parameter and - * to check each value with scalar rules like - * in PyArray_MinScalarType. - */ - /* - if (!(flags&NPY_ARRAY_FORCECAST) && ndim > 0 && - !PyArray_CanCastTo(dtype, newtype)) { - Py_DECREF(dtype); - Py_XDECREF(newtype); - PyErr_SetString(PyExc_TypeError, - "object cannot be safely cast to array " - "of required type"); - return NULL; - } - */ - Py_DECREF(dtype); - } + if (min_depth != 0 && ndim < min_depth) { + PyErr_SetString(PyExc_ValueError, + "object of too small depth for desired array"); + Py_DECREF(dtype); + npy_free_coercion_cache(cache); + return NULL; + } + if (max_depth != 0 && ndim > max_depth) { + PyErr_SetString(PyExc_ValueError, + "object too deep for desired array"); + Py_DECREF(dtype); + npy_free_coercion_cache(cache); + return NULL; + } - /* Create an array and copy the data */ - ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, newtype, - ndim, dims, - NULL, NULL, - flags&NPY_ARRAY_F_CONTIGUOUS, NULL); - if (ret == NULL) { - return NULL; - } + /* Got the correct parameters, but the cache may already hold the result */ + if (cache != NULL && !(cache->sequence)) { + /* + * There is only a single array-like and it was converted, it + * may still have the incorrect type, but that is handled below. + */ + assert(cache->converted_obj == op); + arr = (PyArrayObject *)(cache->arr_or_sequence); + /* we may need to cast or assert flags (e.g. copy) */ + PyObject *res = PyArray_FromArray(arr, dtype, flags); + npy_unlink_coercion_cache(cache); + return res; + } + else if (cache == NULL && PyArray_IsScalar(op, Void) && + !(((PyVoidScalarObject *)op)->flags & NPY_ARRAY_OWNDATA) && + PyArray_EquivTypes(((PyVoidScalarObject *)op)->descr, dtype)) { + /* + * Special case, we return a *view* into void scalars, mainly to + * allow "reversed" assignment: + * arr[indx]["field"] = val # instead of arr["field"][indx] = val + */ + assert(ndim == 0); - if (ndim > 0) { - if (PyArray_AssignFromSequence(ret, op) < 0) { - Py_DECREF(ret); - ret = NULL; - } - } - else { - if (PyArray_SETITEM(ret, PyArray_DATA(ret), op) < 0) { - Py_DECREF(ret); - ret = NULL; - } - } - } + return PyArray_NewFromDescrAndBase( + &PyArray_Type, dtype, + 0, NULL, NULL, + ((PyVoidScalarObject *)op)->obval, + ((PyVoidScalarObject *)op)->flags, + NULL, op); } - else { - if (min_depth != 0 && PyArray_NDIM(arr) < min_depth) { - PyErr_SetString(PyExc_ValueError, - "object of too small depth for desired array"); - Py_DECREF(arr); - Py_XDECREF(newtype); - ret = NULL; - } - else if (max_depth != 0 && PyArray_NDIM(arr) > max_depth) { - PyErr_SetString(PyExc_ValueError, - "object too deep for desired array"); - Py_DECREF(arr); - Py_XDECREF(newtype); - ret = NULL; - } - else { - ret = (PyArrayObject *)PyArray_FromArray(arr, newtype, flags); - Py_DECREF(arr); - } + + /* There was no array (or array-like) passed in directly. */ + if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) || + (flags & NPY_ARRAY_UPDATEIFCOPY)) { + PyErr_SetString(PyExc_TypeError, + "WRITEBACKIFCOPY used for non-array input."); + Py_DECREF(dtype); + return NULL; } + /* Create a new array and copy the data */ + ret = (PyArrayObject *)PyArray_NewFromDescr( + &PyArray_Type, dtype, ndim, dims, NULL, NULL, + flags&NPY_ARRAY_F_CONTIGUOUS, NULL); + if (ret == NULL) { + return NULL; + } + if (cache == NULL) { + /* This is a single item. Set it directly. */ + assert(ndim == 0); + if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) { + Py_DECREF(ret); + return NULL; + } + return (PyObject *)ret; + } + assert(ndim != 0); + assert(op == cache->converted_obj); + if (PyArray_AssignFromCache(ret, cache) < 0) { + Py_DECREF(ret); + return NULL; + } return (PyObject *)ret; } diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h index 9e63cd7d2eb9..8db1412c71c9 100644 --- a/numpy/core/src/multiarray/ctors.h +++ b/numpy/core/src/multiarray/ctors.h @@ -30,13 +30,9 @@ PyArray_New( PyTypeObject *, int nd, npy_intp const *, int, npy_intp const*, void *, int, int, PyObject *); -NPY_NO_EXPORT int -PyArray_GetArrayParamsFromObject_int(PyObject *op, - PyArray_Descr *requested_dtype, - npy_bool writeable, - PyArray_Descr **out_dtype, - int *out_ndim, npy_intp *out_dims, - PyArrayObject **out_arr); +NPY_NO_EXPORT PyObject * +_array_from_array_like(PyObject *op, + PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context); NPY_NO_EXPORT PyObject * PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, @@ -98,9 +94,6 @@ copy_and_swap(void *dst, void *src, int itemsize, npy_intp numitems, NPY_NO_EXPORT void byte_swap_vector(void *p, npy_intp n, int size); -NPY_NO_EXPORT int -PyArray_AssignFromSequence(PyArrayObject *self, PyObject *v); - /* * Calls arr_of_subclass.__array_wrap__(towrap), in order to make 'towrap' * have the same ndarray subclass as 'arr_of_subclass'. diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index cfe80189897b..8f3948c23c53 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -3429,7 +3429,7 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step, * * Returns 0 on success, -1 on failure. */ -static int +NPY_NO_EXPORT int find_string_array_datetime64_type(PyArrayObject *arr, PyArray_DatetimeMetaData *meta) { @@ -3552,44 +3552,9 @@ find_string_array_datetime64_type(PyArrayObject *arr, * Returns 0 on success, -1 on failure. */ static int -recursive_find_object_datetime64_type(PyObject *obj, - PyArray_DatetimeMetaData *meta) +find_object_datetime64_meta(PyObject *obj, PyArray_DatetimeMetaData *meta) { - /* Array -> use its metadata */ - if (PyArray_Check(obj)) { - PyArrayObject *arr = (PyArrayObject *)obj; - PyArray_Descr *arr_dtype = PyArray_DESCR(arr); - - if (arr_dtype->type_num == NPY_STRING || - arr_dtype->type_num == NPY_UNICODE) { - return find_string_array_datetime64_type(arr, meta); - } - /* If the array has metadata, use it */ - else if (arr_dtype->type_num == NPY_DATETIME || - arr_dtype->type_num == NPY_TIMEDELTA) { - PyArray_DatetimeMetaData *tmp_meta; - - /* Get the metadata from the type */ - tmp_meta = get_datetime_metadata_from_dtype(arr_dtype); - if (tmp_meta == NULL) { - return -1; - } - - /* Combine it with 'meta' */ - if (compute_datetime_metadata_greatest_common_divisor(meta, - tmp_meta, meta, 0, 0) < 0) { - return -1; - } - - return 0; - } - /* If it's not an object array, stop looking */ - else if (arr_dtype->type_num != NPY_OBJECT) { - return 0; - } - } - /* Datetime scalar -> use its metadata */ - else if (PyArray_IsScalar(obj, Datetime)) { + if (PyArray_IsScalar(obj, Datetime)) { PyDatetimeScalarObject *dts = (PyDatetimeScalarObject *)obj; /* Combine it with 'meta' */ @@ -3661,34 +3626,6 @@ recursive_find_object_datetime64_type(PyObject *obj, return 0; } - - /* Now check if what we have left is a sequence for recursion */ - if (PySequence_Check(obj)) { - Py_ssize_t i, len = PySequence_Size(obj); - if (len < 0 && PyErr_Occurred()) { - return -1; - } - - for (i = 0; i < len; ++i) { - int ret; - PyObject *f = PySequence_GetItem(obj, i); - if (f == NULL) { - return -1; - } - if (Npy_EnterRecursiveCall(" in recursive_find_object_datetime64_type") != 0) { - Py_DECREF(f); - return -1; - } - ret = recursive_find_object_datetime64_type(f, meta); - Py_LeaveRecursiveCall(); - Py_DECREF(f); - if (ret < 0) { - return ret; - } - } - - return 0; - } /* Otherwise ignore it */ else { return 0; @@ -3722,70 +3659,10 @@ delta_checker(PyArray_DatetimeMetaData *meta) * Returns 0 on success, -1 on failure. */ static int -recursive_find_object_timedelta64_type(PyObject *obj, - PyArray_DatetimeMetaData *meta) +find_object_timedelta64_meta(PyObject *obj, PyArray_DatetimeMetaData *meta) { - /* Array -> use its metadata */ - if (PyArray_Check(obj)) { - PyArrayObject *arr = (PyArrayObject *)obj; - PyArray_Descr *arr_dtype = PyArray_DESCR(arr); - - /* If the array has metadata, use it */ - if (arr_dtype->type_num == NPY_DATETIME || - arr_dtype->type_num == NPY_TIMEDELTA) { - PyArray_DatetimeMetaData *tmp_meta; - - /* Get the metadata from the type */ - tmp_meta = get_datetime_metadata_from_dtype(arr_dtype); - if (tmp_meta == NULL) { - return -1; - } - - /* Combine it with 'meta' */ - if (compute_datetime_metadata_greatest_common_divisor(meta, - tmp_meta, meta, 0, 0) < 0) { - return -1; - } - - return 0; - } - /* If it's not an object array, stop looking */ - else if (arr_dtype->type_num != NPY_OBJECT) { - return 0; - } - else { - if (PyArray_NDIM(arr) == 0) { - /* - * special handling of 0 dimensional NumPy object - * arrays, which may be indexed to retrieve their - * single object using [()], but not by using - * __getitem__(integer) approaches - */ - PyObject *item, *args; - - args = PyTuple_New(0); - if (args == NULL) { - return 0; - } - item = PyObject_GetItem(obj, args); - Py_DECREF(args); - if (item == NULL) { - return 0; - } - /* - * NOTE: may need other type checks here in the future - * for expanded 0 D datetime array conversions? - */ - if (PyDelta_Check(item)) { - Py_DECREF(item); - return delta_checker(meta); - } - Py_DECREF(item); - } - } - } /* Datetime scalar -> use its metadata */ - else if (PyArray_IsScalar(obj, Timedelta)) { + if (PyArray_IsScalar(obj, Timedelta)) { PyTimedeltaScalarObject *dts = (PyTimedeltaScalarObject *)obj; /* Combine it with 'meta' */ @@ -3805,34 +3682,6 @@ recursive_find_object_timedelta64_type(PyObject *obj, else if (PyDelta_Check(obj)) { return delta_checker(meta); } - - /* Now check if what we have left is a sequence for recursion */ - if (PySequence_Check(obj)) { - Py_ssize_t i, len = PySequence_Size(obj); - if (len < 0 && PyErr_Occurred()) { - return -1; - } - - for (i = 0; i < len; ++i) { - int ret; - PyObject *f = PySequence_GetItem(obj, i); - if (f == NULL) { - return -1; - } - if (Npy_EnterRecursiveCall(" in recursive_find_object_timedelta64_type") != 0) { - Py_DECREF(f); - return -1; - } - ret = recursive_find_object_timedelta64_type(f, meta); - Py_LeaveRecursiveCall(); - Py_DECREF(f); - if (ret < 0) { - return ret; - } - } - - return 0; - } /* Otherwise ignore it */ else { return 0; @@ -3853,7 +3702,7 @@ find_object_datetime_type(PyObject *obj, int type_num) meta.num = 1; if (type_num == NPY_DATETIME) { - if (recursive_find_object_datetime64_type(obj, &meta) < 0) { + if (find_object_datetime64_meta(obj, &meta) < 0) { return NULL; } else { @@ -3861,7 +3710,7 @@ find_object_datetime_type(PyObject *obj, int type_num) } } else if (type_num == NPY_TIMEDELTA) { - if (recursive_find_object_timedelta64_type(obj, &meta) < 0) { + if (find_object_timedelta64_meta(obj, &meta) < 0) { return NULL; } else { diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 4e37b9628ac1..a7c33d88fa06 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -1801,9 +1801,10 @@ static void arraydescr_dealloc(PyArray_Descr *self) { if (self->fields == Py_None) { - fprintf(stderr, "*** Reference count error detected: \n" \ - "an attempt was made to deallocate %d (%c) ***\n", + fprintf(stderr, "*** Reference count error detected: " + "an attempt was made to deallocate the dtype %d (%c) ***\n", self->type_num, self->type); + assert(0); Py_INCREF(self); Py_INCREF(self); return; diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index a26426d41549..3a58b58498ce 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -1099,7 +1099,7 @@ get_datetime_to_unicode_transfer_function(int aligned, /* Get an ASCII string data type, adapted to match the UNICODE one */ str_dtype = PyArray_DescrFromType(NPY_STRING); - str_dtype = PyArray_AdaptFlexibleDType(NULL, dst_dtype, str_dtype); + str_dtype = PyArray_AdaptFlexibleDType(dst_dtype, str_dtype); if (str_dtype == NULL) { return NPY_FAIL; } @@ -1222,7 +1222,7 @@ get_unicode_to_datetime_transfer_function(int aligned, /* Get an ASCII string data type, adapted to match the UNICODE one */ str_dtype = PyArray_DescrFromType(NPY_STRING); - str_dtype = PyArray_AdaptFlexibleDType(NULL, src_dtype, str_dtype); + str_dtype = PyArray_AdaptFlexibleDType(src_dtype, str_dtype); if (str_dtype == NULL) { return NPY_FAIL; } diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index 9982cd6761ce..3026e68e9af0 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -8,9 +8,13 @@ #define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE #include +#include #include "npy_pycompat.h" +#include "common.h" #include "dtypemeta.h" +#include "_datetime.h" +#include "array_coercion.h" static void @@ -104,6 +108,179 @@ legacy_dtype_default_new(PyArray_DTypeMeta *self, return (PyObject *)self->singleton; } + +static PyArray_Descr * +nonparametric_discover_descr_from_pyobject( + PyArray_DTypeMeta *cls, PyObject *obj) +{ + /* If the object is of the correct scalar type return our singleton */ + assert(!cls->parametric); + Py_INCREF(cls->singleton); + return cls->singleton; +} + + +static PyArray_Descr * +string_discover_descr_from_pyobject( + PyArray_DTypeMeta *cls, PyObject *obj) +{ + npy_intp itemsize = -1; + if (PyBytes_Check(obj)) { + itemsize = PyBytes_Size(obj); + } + else if (PyUnicode_Check(obj)) { + itemsize = PyUnicode_GetLength(obj); + } + if (itemsize != -1) { + if (cls->type_num == NPY_UNICODE) { + itemsize *= 4; + } + if (itemsize > NPY_MAX_INT) { + PyErr_SetString(PyExc_TypeError, + "string to large to store inside array."); + } + PyArray_Descr *res = PyArray_DescrNewFromType(cls->type_num); + res->elsize = (int)itemsize; + return res; + } + return PyArray_DTypeFromObjectStringDiscovery(obj, NULL, cls->type_num); +} + + +static PyArray_Descr * +void_discover_descr_from_pyobject( + PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj) +{ + if (PyArray_IsScalar(obj, Void)) { + PyVoidScalarObject *void_obj = (PyVoidScalarObject *)obj; + Py_INCREF(void_obj->descr); + return void_obj->descr; + } + if (PyBytes_Check(obj)) { + PyArray_Descr *descr = PyArray_DescrNewFromType(NPY_VOID); + Py_ssize_t itemsize = (int)PyBytes_Size(obj); + if (itemsize > NPY_MAX_INT) { + PyErr_SetString(PyExc_TypeError, + "byte-like to large to store inside array."); + } + descr->elsize = itemsize; + return descr; + } + PyErr_Format(PyExc_TypeError, + "A bytes-like object is required, not '%s'", Py_TYPE(obj)->tp_name); + return NULL; +} + + +static PyArray_Descr * +discover_datetime_and_timedelta_from_pyobject( + PyArray_DTypeMeta *cls, PyObject *obj) { + if (PyArray_IsScalar(obj, Datetime) || + PyArray_IsScalar(obj, Timedelta)) { + PyArray_DatetimeMetaData *meta; + PyArray_Descr *descr = PyArray_DescrFromScalar(obj); + meta = get_datetime_metadata_from_dtype(descr); + if (meta == NULL) { + return NULL; + } + PyArray_Descr *new_descr = create_datetime_dtype(cls->type_num, meta); + Py_DECREF(descr); + return new_descr; + } + else { + return find_object_datetime_type(obj, cls->type_num); + } +} + + +static PyArray_Descr * +flexible_default_descr(PyArray_DTypeMeta *cls) +{ + PyArray_Descr *res = PyArray_DescrNewFromType(cls->type_num); + if (res == NULL) { + return NULL; + } + res->elsize = 1; + if (cls->type_num == NPY_UNICODE) { + res->elsize *= 4; + } + return res; +} + + +static int +python_builtins_are_known_scalar_types( + PyArray_DTypeMeta *NPY_UNUSED(cls), PyTypeObject *pytype) +{ + /* + * Always accept the common Python types, this ensures that we do not + * convert pyfloat->float64->integers. Subclasses are hopefully rejected + * as being discovered. + * This is necessary only for python scalar classes which we discover + * as valid DTypes. + */ + if (pytype == &PyFloat_Type) { + return 1; + } + if (pytype == &PyLong_Type) { + return 1; + } + if (pytype == &PyBool_Type) { + return 1; + } + if (pytype == &PyComplex_Type) { + return 1; + } + if (pytype == &PyUnicode_Type) { + return 1; + } + if (pytype == &PyBytes_Type) { + return 1; + } + return 0; +} + + +static int +datetime_known_scalar_types( + PyArray_DTypeMeta *cls, PyTypeObject *pytype) +{ + if (python_builtins_are_known_scalar_types(cls, pytype)) { + return 1; + } + /* + * To be able to identify the descriptor from e.g. any string, datetime + * must take charge. Otherwise we would attempt casting which does not + * truly support this. Only object arrays are special cased in this way. + */ + return (PyType_IsSubtype(pytype, &PyString_Type) || + PyType_IsSubtype(pytype, &PyUnicode_Type)); +} + + +static int +string_known_scalar_types( + PyArray_DTypeMeta *cls, PyTypeObject *pytype) { + if (python_builtins_are_known_scalar_types(cls, pytype)) { + return 1; + } + if (PyType_IsSubtype(pytype, &PyDatetimeArrType_Type)) { + /* + * TODO: This should likely be deprecated or otherwise resolved. + * Deprecation has to occur in `String->setitem` unfortunately. + * + * Datetime currently do not cast to shorter strings, but string + * coercion for arbitrary values uses `str(obj)[:len]` so it works. + * This means `np.array(np.datetime64("2020-01-01"), "U9")` + * and `np.array(np.datetime64("2020-01-01")).astype("U9")` behave + * differently. + */ + return 1; + } + return 0; +} + + /** * This function takes a PyArray_Descr and replaces its base class with * a newly created dtype subclass (DTypeMeta instances). @@ -221,12 +398,41 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) dtype_class->f = descr->f; dtype_class->kind = descr->kind; + /* Strings and voids have (strange) logic around scalars. */ + dtype_class->is_known_scalar_type = python_builtins_are_known_scalar_types; + if (PyTypeNum_ISDATETIME(descr->type_num)) { /* Datetimes are flexible, but were not considered previously */ dtype_class->parametric = NPY_TRUE; + dtype_class->discover_descr_from_pyobject = ( + discover_datetime_and_timedelta_from_pyobject); + if (descr->type_num == NPY_DATETIME) { + dtype_class->is_known_scalar_type = datetime_known_scalar_types; + } } else if (PyTypeNum_ISFLEXIBLE(descr->type_num)) { dtype_class->parametric = NPY_TRUE; + dtype_class->default_descr = flexible_default_descr; + if (descr->type_num == NPY_VOID) { + dtype_class->discover_descr_from_pyobject = ( + void_discover_descr_from_pyobject); + } + else { + dtype_class->is_known_scalar_type = string_known_scalar_types; + dtype_class->discover_descr_from_pyobject = ( + string_discover_descr_from_pyobject); + } + } + else { + /* nonparametric case */ + dtype_class->discover_descr_from_pyobject = ( + nonparametric_discover_descr_from_pyobject); + } + + if (_PyArray_MapPyTypeToDType(dtype_class, descr->typeobj, + PyTypeNum_ISUSERDEF(dtype_class->type_num)) < 0) { + Py_DECREF(dtype_class); + return -1; } /* Finally, replace the current class of the descr */ diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h index 97152d1ada8c..e0909a7eb4b2 100644 --- a/numpy/core/src/multiarray/dtypemeta.h +++ b/numpy/core/src/multiarray/dtypemeta.h @@ -1,6 +1,8 @@ #ifndef _NPY_DTYPEMETA_H #define _NPY_DTYPEMETA_H +#define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr)) + NPY_NO_EXPORT int dtypemeta_wrap_legacy_descriptor(PyArray_Descr *dtypem); diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 45c019f493e5..8052e24e424b 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -26,7 +26,7 @@ #include "npy_binsearch.h" #include "alloc.h" #include "arraytypes.h" - +#include "array_coercion.h" static NPY_GCC_OPT_3 NPY_INLINE int @@ -2629,5 +2629,5 @@ PyArray_MultiIndexSetItem(PyArrayObject *self, const npy_intp *multi_index, data += ind * strides[idim]; } - return PyArray_SETITEM(self, data, obj); + return PyArray_Pack(PyArray_DESCR(self), data, obj); } diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c index c71b7b770c07..ac5b90400fe5 100644 --- a/numpy/core/src/multiarray/iterators.c +++ b/numpy/core/src/multiarray/iterators.c @@ -15,6 +15,7 @@ #include "iterators.h" #include "ctors.h" #include "common.h" +#include "array_coercion.h" #define NEWAXIS_INDEX -1 #define ELLIPSIS_INDEX -2 @@ -824,7 +825,7 @@ iter_ass_subscript(PyArrayIterObject *self, PyObject *ind, PyObject *val) if (PyBool_Check(ind)) { retval = 0; if (PyObject_IsTrue(ind)) { - retval = PyArray_SETITEM(self->ao, self->dataptr, val); + retval = PyArray_Pack(PyArray_DESCR(self->ao), self->dataptr, val); } goto finish; } @@ -841,7 +842,7 @@ iter_ass_subscript(PyArrayIterObject *self, PyObject *ind, PyObject *val) goto finish; } PyArray_ITER_GOTO1D(self, start); - retval = type->f->setitem(val, self->dataptr, self->ao); + retval = PyArray_Pack(PyArray_DESCR(self->ao), self->dataptr, val); PyArray_ITER_RESET(self); if (retval < 0) { PyErr_SetString(PyExc_ValueError, diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index f73cb48d9357..c27e0c391ea9 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -22,6 +22,7 @@ #include "item_selection.h" #include "mem_overlap.h" #include "array_assign.h" +#include "array_coercion.h" #define HAS_INTEGER 1 @@ -1754,7 +1755,7 @@ array_assign_item(PyArrayObject *self, Py_ssize_t i, PyObject *op) if (get_item_pointer(self, &item, indices, 1) < 0) { return -1; } - if (PyArray_SETITEM(self, item, op) < 0) { + if (PyArray_Pack(PyArray_DESCR(self), item, op) < 0) { return -1; } } @@ -1832,7 +1833,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op) if (get_item_pointer(self, &item, indices, index_num) < 0) { return -1; } - if (PyArray_SETITEM(self, item, op) < 0) { + if (PyArray_Pack(PyArray_DESCR(self), item, op) < 0) { return -1; } /* integers do not store objects in indices */ diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index d81650ecdc16..a2db8042f004 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -14,6 +14,7 @@ #include "npy_pycompat.h" #include "npy_import.h" #include "ufunc_override.h" +#include "array_coercion.h" #include "common.h" #include "templ_common.h" /* for npy_mul_with_overflow_intp */ #include "ctors.h" @@ -809,6 +810,12 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds) return NULL; } + /* If it is not a concrete dtype instance find the best one for the array */ + Py_SETREF(dtype, PyArray_AdaptDescriptorToArray(self, (PyObject *)dtype)); + if (dtype == NULL) { + return NULL; + } + /* * If the memory layout matches and, data types are equivalent, * and it's not a subtype if subok is False, then we @@ -831,13 +838,6 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds) else if (PyArray_CanCastArrayTo(self, dtype, casting)) { PyArrayObject *ret; - /* If the requested dtype is flexible, adapt it */ - dtype = PyArray_AdaptFlexibleDType((PyObject *)self, - PyArray_DESCR(self), dtype); - if (dtype == NULL) { - return NULL; - } - /* This steals the reference to dtype, so no DECREF of dtype */ ret = (PyArrayObject *)PyArray_NewLikeArray( self, order, dtype, subok); diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 4190c53bda7f..9a34685f4254 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -35,6 +35,8 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; /* Internal APIs */ #include "alloc.h" +#include "abstractdtypes.h" +#include "array_coercion.h" #include "arrayfunction_override.h" #include "arraytypes.h" #include "arrayobject.h" @@ -823,6 +825,9 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2) PyObject* ret = NULL; typenum = PyArray_ObjectType(op1, 0); + if (typenum == NPY_NOTYPE && PyErr_Occurred()) { + return NULL; + } typenum = PyArray_ObjectType(op2, typenum); typec = PyArray_DescrFromType(typenum); if (typec == NULL) { @@ -912,6 +917,9 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out) NPY_BEGIN_THREADS_DEF; typenum = PyArray_ObjectType(op1, 0); + if (typenum == NPY_NOTYPE && PyErr_Occurred()) { + return NULL; + } typenum = PyArray_ObjectType(op2, typenum); typec = PyArray_DescrFromType(typenum); if (typec == NULL) { @@ -3975,6 +3983,7 @@ normalize_axis_index(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds) return PyInt_FromLong(axis); } + static struct PyMethodDef array_module_methods[] = { {"_get_implementing_args", (PyCFunction)array__get_implementing_args, @@ -4151,6 +4160,8 @@ static struct PyMethodDef array_module_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"set_legacy_print_mode", (PyCFunction)set_legacy_print_mode, METH_VARARGS, NULL}, + {"_discover_array_parameters", (PyCFunction)_discover_array_parameters, + METH_VARARGS | METH_KEYWORDS, NULL}, /* from umath */ {"frompyfunc", (PyCFunction) ufunc_frompyfunc, @@ -4620,6 +4631,9 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { if (set_typeinfo(d) != 0) { goto err; } + if (initialize_and_map_pytypes_to_dtypes() < 0) { + goto err; + } if (initumath(m) != 0) { goto err; } diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index f2dbc9f03d2d..7da17eafe31b 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -16,6 +16,7 @@ #include "nditer_impl.h" #include "arrayobject.h" +#include "array_coercion.h" #include "templ_common.h" #include "array_assign.h" @@ -1101,17 +1102,11 @@ npyiter_prepare_one_operand(PyArrayObject **op, */ if (op_request_dtype != NULL) { /* We just have a borrowed reference to op_request_dtype */ - Py_INCREF(op_request_dtype); - /* If the requested dtype is flexible, adapt it */ - op_request_dtype = PyArray_AdaptFlexibleDType((PyObject *)(*op), PyArray_DESCR(*op), - op_request_dtype); - if (op_request_dtype == NULL) { + Py_SETREF(*op_dtype, PyArray_AdaptDescriptorToArray( + *op, (PyObject *)op_request_dtype)); + if (*op_dtype == NULL) { return 0; } - - /* Store the requested dtype */ - Py_DECREF(*op_dtype); - *op_dtype = op_request_dtype; } /* Check if the operand is in the byte order requested */ diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py index b8d4b5cdf7e1..30019b253023 100644 --- a/numpy/core/tests/test_array_coercion.py +++ b/numpy/core/tests/test_array_coercion.py @@ -141,12 +141,6 @@ class TestStringDiscovery: [object(), 1.2, 10**43, None, "string"], ids=["object", "1.2", "10**43", "None", "string"]) def test_basic_stringlength(self, obj): - if not isinstance(obj, (str, int)): - pytest.xfail( - "The Single object (first assert) uses a different branch " - "and thus gives a different result (either wrong or longer" - "string than normally discovered).") - length = len(str(obj)) expected = np.dtype(f"S{length}") @@ -156,8 +150,9 @@ def test_basic_stringlength(self, obj): # A nested array is also discovered correctly arr = np.array(obj, dtype="O") assert np.array(arr, dtype="S").dtype == expected + # Check that .astype() behaves identical + assert arr.astype("S").dtype == expected - @pytest.mark.xfail(reason="Only single array unpacking is supported") @pytest.mark.parametrize("obj", [object(), 1.2, 10**43, None, "string"], ids=["object", "1.2", "10**43", "None", "string"]) @@ -167,7 +162,6 @@ def test_nested_arrays_stringlength(self, obj): arr = np.array(obj, dtype="O") assert np.array([arr, arr], dtype="S").dtype == expected - @pytest.mark.xfail(reason="Only single array unpacking is supported") @pytest.mark.parametrize("arraylike", arraylikes()) def test_unpack_first_level(self, arraylike): # We unpack exactly one level of array likes @@ -223,21 +217,22 @@ def test_scalar(self, scalar): assert arr.shape == () assert arr.dtype == scalar.dtype - if type(scalar) is np.bytes_: - pytest.xfail("Nested bytes use len(str(scalar)) currently.") - arr = np.array([[scalar, scalar]]) assert arr.shape == (1, 2) assert arr.dtype == scalar.dtype # Additionally to string this test also runs into a corner case # with datetime promotion (the difference is the promotion order). - @pytest.mark.xfail(reason="Coercion to string is not symmetric") def test_scalar_promotion(self): for sc1, sc2 in product(scalar_instances(), scalar_instances()): sc1, sc2 = sc1.values[0], sc2.values[0] # test all combinations: - arr = np.array([sc1, sc2]) + try: + arr = np.array([sc1, sc2]) + except (TypeError, ValueError): + # The promotion between two times can fail + # XFAIL (ValueError): Some object casts are currently undefined + continue assert arr.shape == (2,) try: dt1, dt2 = sc1.dtype, sc2.dtype @@ -255,11 +250,10 @@ def test_scalar_coercion(self, scalar): # Ensure we have a full-precision number if available scalar = type(scalar)((scalar * 2)**0.5) - if is_parametric_dtype(scalar.dtype) or type(scalar) is rational: - # datetime with unit will be named "datetime64[unit]" + if type(scalar) is rational: # Rational generally fails due to a missing cast. In the future # object casts should automatically be defined based on `setitem`. - pytest.xfail("0-D object array to a unit-less datetime cast fails") + pytest.xfail("Rational to object cast is undefined currently.") # Use casting from object: arr = np.array(scalar, dtype=object).astype(scalar.dtype) @@ -279,9 +273,7 @@ def test_scalar_coercion(self, scalar): @pytest.mark.xfail(IS_PYPY, reason="`int(np.complex128(3))` fails on PyPy") @pytest.mark.filterwarnings("ignore::numpy.ComplexWarning") - # After change, can enable times here, and below and it will work, - # Right now times are too complex, so map out some details below. - @pytest.mark.parametrize("cast_to", scalar_instances(times=False)) + @pytest.mark.parametrize("cast_to", scalar_instances()) def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to): """ Test that in most cases: @@ -293,10 +285,7 @@ def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to): """ dtype = cast_to.dtype # use to parametrize only the target dtype - # XFAIL: Some extended precision tests fail, because assigning to - # complex256 will use float(float128). Rational fails currently. - for scalar in scalar_instances( - times=False, extended_precision=False, user_dtype=False): + for scalar in scalar_instances(times=False): scalar = scalar.values[0] if dtype.type == np.void: @@ -306,7 +295,7 @@ def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to): # this, but has different rules than the cast. with pytest.raises(TypeError): np.array(scalar).astype(dtype) - # XFAIL: np.array(scalar, dtype=dtype) + np.array(scalar, dtype=dtype) np.array([scalar], dtype=dtype) continue @@ -342,9 +331,6 @@ class TestTimeScalars: param(np.timedelta64(123, "s"), id="timedelta64[s]"), param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"), param(np.datetime64(1, "D"), id="datetime64[D]")],) - @pytest.mark.xfail( - reason="This uses int(scalar) or float(scalar) to assign, which " - "fails. However, casting currently does not fail.") def test_coercion_basic(self, dtype, scalar): arr = np.array(scalar, dtype=dtype) cast = np.array(scalar).astype(dtype) @@ -369,25 +355,48 @@ def test_coercion_timedelta_convert_to_number(self, dtype, scalar): assert_array_equal(arr, cast) assert_array_equal(cast, cast) + @pytest.mark.parametrize("dtype", ["S6", "U6"]) @pytest.mark.parametrize(["val", "unit"], [param(123, "s", id="[s]"), param(123, "D", id="[D]")]) - @pytest.mark.parametrize("scalar_type", [np.datetime64, np.timedelta64]) - @pytest.mark.xfail(reason="Error not raised for assignment") - def test_coercion_assignment_times(self, scalar_type, val, unit): - scalar = scalar_type(val, unit) + def test_coercion_assignment_datetime(self, val, unit, dtype): + # String from datetime64 assignment is currently special cased to + # never use casting. This is because casting will error in this + # case, and traditionally in most cases the behaviour is maintained + # like this. (`np.array(scalar, dtype="U6")` would have failed before) + # TODO: This discrepency _should_ be resolved, either by relaxing the + # cast, or by deprecating the first part. + scalar = np.datetime64(val, unit) + dtype = np.dtype(dtype) + cut_string = dtype.type(str(scalar)[:6]) + + arr = np.array(scalar, dtype=dtype) + assert arr[()] == cut_string + ass = np.ones((), dtype=dtype) + ass[()] = scalar + assert ass[()] == cut_string - # The error type is not ideal, fails because string is too short: - with pytest.raises(RuntimeError): - np.array(scalar, dtype="S6") - with pytest.raises(RuntimeError): - cast = np.array(scalar).astype("S6") - ass = np.ones((), dtype="S6") with pytest.raises(RuntimeError): - ass[()] = scalar + # However, unlike the above assignment using `str(scalar)[:6]` + # due to being handled by the string DType and not be casting + # the explicit cast fails: + np.array(scalar).astype(dtype) + @pytest.mark.parametrize(["val", "unit"], + [param(123, "s", id="[s]"), param(123, "D", id="[D]")]) + def test_coercion_assignment_timedelta(self, val, unit): + scalar = np.timedelta64(val, unit) + + # Unlike datetime64, timedelta allows the unsafe cast: + np.array(scalar, dtype="S6") + cast = np.array(scalar).astype("S6") + ass = np.ones((), dtype="S6") + ass[()] = scalar + expected = scalar.astype("S")[:6] + assert cast[()] == expected + assert ass[()] == expected + class TestNested: - @pytest.mark.xfail(reason="No deprecation warning given.") def test_nested_simple(self): initial = [1.2] nested = initial @@ -417,11 +426,6 @@ def test_pathological_self_containing(self): arr = np.array([l, [None], l], dtype=object) assert arr.shape == (3, 1) - @pytest.mark.xfail( - reason="For arrays and memoryview, this used to not complain " - "and assign to a too small array instead. For other " - "array-likes the error is different because fewer (only " - "MAXDIM-1) dimensions are found, failing the last test.") @pytest.mark.parametrize("arraylike", arraylikes()) def test_nested_arraylikes(self, arraylike): # We try storing an array like into an array, but the array-like @@ -432,10 +436,6 @@ def test_nested_arraylikes(self, arraylike): # assigned to it (which does work for object or if `float(arraylike)` # works). initial = arraylike(np.ones((1, 1))) - #if not isinstance(initial, (np.ndarray, memoryview)): - # pytest.xfail( - # "When coercing to object, these cases currently discover " - # "fewer dimensions than ndarray failing the second part.") nested = initial for i in range(np.MAXDIMS - 1): @@ -463,11 +463,6 @@ def test_uneven_depth_ragged(self, arraylike): assert out[0] is arr assert type(out[1]) is list - if not isinstance(arr, (np.ndarray, memoryview)): - pytest.xfail( - "does not raise ValueError below, because it discovers " - "the dimension as (2,) and not (2, 2, 2)") - # Array is ragged in the third dimension: with pytest.raises(ValueError): # This is a broadcast error during assignment, because @@ -500,7 +495,7 @@ def __len__(self): obj.append(mylist([1, 2])) - with pytest.raises(ValueError): # changes to RuntimeError + with pytest.raises(RuntimeError): np.array(obj) # Note: We do not test a shrinking list. These do very evil things @@ -517,8 +512,8 @@ def __len__(self): obj.append([2, 3]) obj.append(mylist([1, 2])) - #with pytest.raises(RuntimeError): # Will error in the future - np.array(obj) + with pytest.raises(RuntimeError): + np.array(obj) def test_replace_0d_array(self): # List to coerce, `mylist` will mutate the first element @@ -534,8 +529,8 @@ def __getitem__(self): # Runs into a corner case in the new code, the `array(2)` is cached # so replacing it invalidates the cache. obj.append([np.array(2), baditem()]) - # with pytest.raises(RuntimeError): # Will error in the future - np.array(obj) + with pytest.raises(RuntimeError): + np.array(obj) class TestArrayLikes: diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index fef1e24d8f5e..59a3954fd6d8 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -2329,9 +2329,21 @@ def test_corecursive_input(self): obj_arr = np.array([None]) obj_arr[0] = a - # gh-11154: This shouldn't cause a C stack overflow - assert_raises(RecursionError, obj_arr.astype, 'M8') - assert_raises(RecursionError, obj_arr.astype, 'm8') + # At some point this caused a stack overflow (gh-11154). Now raises + # ValueError since the nested list cannot be converted to a datetime. + assert_raises(ValueError, obj_arr.astype, 'M8') + assert_raises(ValueError, obj_arr.astype, 'm8') + + @pytest.mark.parametrize("shape", [(), (1,)]) + def test_discovery_from_object_array(self, shape): + arr = np.array("2020-10-10", dtype=object).reshape(shape) + res = np.array("2020-10-10", dtype="M8").reshape(shape) + assert res.dtype == np.dtype("M8[D]") + assert_equal(arr.astype("M8"), res) + arr[...] = np.bytes_("2020-10-10") # try a numpy string type + assert_equal(arr.astype("M8"), res) + arr = arr.astype("S") + assert_equal(arr.astype("S").astype("M8"), res) @pytest.mark.parametrize("time_unit", [ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps", "fs", "as", diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 239d20c9d130..68502addaded 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -537,6 +537,22 @@ def test_deprecate_ragged_arrays(): np.array(arg) +class TestTooDeepDeprecation(_VisibleDeprecationTestCase): + # NumPy 1.20, 2020-05-08 + # This is a bit similar to the above ragged array deprecation case. + message = re.escape("Creating an ndarray from nested sequences exceeding") + + def test_deprecation(self): + nested = [1] + for i in range(np.MAXDIMS - 1): + nested = [nested] + self.assert_not_deprecated(np.array, args=(nested,)) + self.assert_not_deprecated(np.array, + args=(nested,), kwargs=dict(dtype=object)) + + self.assert_deprecated(np.array, args=([nested],)) + + class TestToString(_DeprecationTestCase): # 2020-03-06 1.19.0 message = re.escape("tostring() is deprecated. Use tobytes() instead.") diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py index f6e263774ae6..1069cbe8dd3a 100644 --- a/numpy/core/tests/test_indexing.py +++ b/numpy/core/tests/test_indexing.py @@ -538,6 +538,15 @@ def test_indexing_array_negative_strides(self): arr[slices] = 10 assert_array_equal(arr, 10.) + def test_character_assignment(self): + # This is an example a function going through CopyObject which + # used to have an untested special path for scalars + # (the character special dtype case, should be deprecated probably) + arr = np.zeros((1, 5), dtype="c") + arr[0] = np.str_("asdfg") # must assign as a sequence + assert_array_equal(arr[0], np.array("asdfg", dtype="c")) + assert arr[0, 1] == b"s" # make sure not all were set to "a" for both + class TestFieldIndexing: def test_scalar_return_type(self): # Field access on an array should return an array, even if it diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 01169f938836..fb3a5f50b2de 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -1012,6 +1012,8 @@ def __len__(self): with assert_raises(ValueError): a[:] = C() # Segfault! + np.array(C()) == list(C()) + def test_failed_len_sequence(self): # gh-7393 class A: diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index cf18a5d9354e..0b921fcb7f83 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -2450,7 +2450,8 @@ def test_bad_array_interface(self): class T: __array_interface__ = {} - np.array([T()]) + with assert_raises(ValueError): + np.array([T()]) def test_2d__array__shape(self): class T(object): diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 76a92f5ca50e..27f14a5e72a4 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -215,6 +215,17 @@ def test_creation_maskcreation(self): y = array([1, 2, 3], mask=x._mask, copy=True) assert_(not np.may_share_memory(x.mask, y.mask)) + def test_masked_singleton_array_creation_warns(self): + # The first works, but should not (ideally), there may be no way + # to solve this, however, as long as `np.ma.masked` is an ndarray. + np.array(np.ma.masked) + with pytest.warns(UserWarning): + # Tries to create a float array, using `float(np.ma.masked)`. + # We may want to define this is invalid behaviour in the future! + # (requiring np.ma.masked to be a known NumPy scalar probably + # with a DType.) + np.array([3., np.ma.masked]) + def test_creation_with_list_of_maskedarrays(self): # Tests creating a masked array from a list of masked arrays. x = array(np.arange(5), mask=[1, 0, 0, 0, 0])