8000 DEP: Deprecate promotion to strings · numpy/numpy@c39cbb0 · GitHub
[go: up one dir, main page]

Skip to content

Commit c39cbb0

Browse files
committed
DEP: Deprecate promotion to strings
1 parent 94d3302 commit c39cbb0

10 files changed

+205
-68
lines changed

numpy/core/src/multiarray/common.c

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "usertypes.h"
1313

1414
#include "common.h"
15+
#include "convert_datatype.h"
1516
#include "npy_buffer.h"
1617

1718
#include "get_attr_string.h"
@@ -81,6 +82,12 @@ _array_find_python_scalar_type(PyObject *op)
8182
* PyArray_DTypeFromObject encountered a string type, and that the recursive
8283
* search must be restarted so that string representation lengths can be
8384
* computed for all scalar types.
85+
*
86+
* DEPRECATED NumPy 19.0, 2020-03
87+
* The use of RETRY_WITH_STRING and RETRY_WITH_UNICODE is only necessary
88+
* because promotion of numbers to strings was valid. By deprecating promotion
89+
* this path is effectively unnecessary. Users have to provide dtype="U"
90+
* instead.
8491
*/
8592
#define RETRY_WITH_STRING 1
8693
#define RETRY_WITH_UNICODE 2
@@ -104,19 +111,33 @@ _array_find_python_scalar_type(PyObject *op)
104111
PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype)
105112
{
106113
int res;
114+
npy_bool string_promotion = NPY_FALSE;
107115

108-
res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_dtype, 0);
116+
res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_dtype,
117+
0, &string_promotion);
109118
if (res == RETRY_WITH_STRING) {
110-
res = PyArray_DTypeFromObjectHelper(obj, maxdims,
111-
out_dtype, NPY_STRING);
119+
res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_dtype,
120+
NPY_STRING, &string_promotion);
112121
if (res == RETRY_WITH_UNICODE) {
113-
res = PyArray_DTypeFromObjectHelper(obj, maxdims,
114-
out_dtype, NPY_UNICODE);
122+
res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_dtype,
123+
NPY_UNICODE, &string_promotion);
115124
}
116125
}
117126
else if (res == RETRY_WITH_UNICODE) {
118-
res = PyArray_DTypeFromObjectHelper(obj, maxdims,
119-
out_dtype, NPY_UNICODE);
127+
res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_dtype,
128+
NPY_UNICODE, &string_promotion);
129+
}
130+
if (string_promotion) {
131+
/* Deprecated NumPy 1.19, 2020-04 */
132+
if (DEPRECATE(
133+
"Creating an array from a mix of strings and numbers "
134+
"is deprecated and will require specifying the datatype. "
135+
"Use `np.array(..., dtype=object)` if you wish an object array "
136+
"and `np.array(..., dtype='U')` or `np.array(..., dtype='S')` "
137+
"if you wish string/bytes array respectively.") < 0){
138+
Py_SETREF(*out_dtype, NULL);
139+
return -1;
140+
}
120141
}
121142
return res;
122143
}
@@ -174,7 +195,8 @@ PyArray_DTypeFromObjectStringDiscovery(
174195

175196
NPY_NO_EXPORT int
176197
PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
177-
PyArray_Descr **out_dtype, int string_type)
198+
PyArray_Descr **out_dtype, int string_type,
199+
npy_bool *string_promotion)
178200
{
179201
int i, size;
180202
PyArray_Descr *dtype = NULL;
@@ -209,14 +231,17 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
209231
}
210232
}
211233
else {
234+
if (!PyArray_IsScalar(obj, Flexible)) {
235+
*string_promotion = NPY_TRUE;
236+
}
212237
dtype = PyArray_DTypeFromObjectStringDiscovery(
213238
obj, *out_dtype, string_type);
214239
if (dtype == NULL) {
215240
goto fail;
216241
}
217242

218243
/* nothing to do, dtype is already correct */
219-
if (dtype == *out_dtype){
244+
if (dtype == *out_dtype) {
220245
Py_DECREF(dtype);
221246
return 0;
222247
}
@@ -232,6 +257,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
232257
Py_DECREF(dtype);
233258
dtype = PyArray_DTypeFromObjectStringDiscovery(
234259
obj, *out_dtype, string_type);
260+
*string_promotion = NPY_TRUE;
235261
if (dtype == NULL) {
236262
goto fail;
237263
}
@@ -453,8 +479,8 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
453479

454480
/* Recursive call for each sequence item */
455481
for (i = 0; i < size; ++i) {
456-
int res = PyArray_DTypeFromObjectHelper(objects[i], maxdims - 1,
457-
out_dtype, string_type);
482+
int res = PyArray_DTypeFromObjectHelper(objects[i],
483+
maxdims - 1, out_dtype, string_type, string_promotion);
458484
if (res < 0) {
459485
Py_DECREF(seq);
460486
goto fail;
@@ -486,7 +512,9 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
486512
}
487513
/* Do type promotion with 'out_dtype' */
488514
else {
489-
PyArray_Descr *res_dtype = PyArray_PromoteTypes(dtype, *out_dtype);
515+
/* If string type is already set, this will not warn again. */
516+
PyArray_Descr *res_dtype = PyArray_PromoteTypes_int(
517+
dtype, *out_dtype, string_promotion);
490518
Py_DECREF(dtype);
491519
if (res_dtype == NULL) {
492520
goto fail;

numpy/core/src/multiarray/common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims,
3939

4040
NPY_NO_EXPORT int
4141
PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
42-
PyArray_Descr **out_dtype, int string_status);
42+
PyArray_Descr **out_dtype, int string_status,
43+
npy_bool *string_promotion);
4344

4445
/*
4546
* Returns NULL without setting an exception if no scalar is matched, a

numpy/core/src/multiarray/convert_datatype.c

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,40 @@ ensure_dtype_nbo(PyArray_Descr *type)
10741074
*/
10751075
NPY_NO_EXPORT PyArray_Descr *
10761076
PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
1077+
{
1078+
npy_bool string_promotion = NPY_FALSE;
1079+
PyArray_Descr *res;
1080+
res = PyArray_PromoteTypes_int(type1, type2, &string_promotion);
1081+
if (string_promotion) {
1082+
/* Deprecated NumPy 1.19, 2020-04 */
1083+
if (DEPRECATE(
1084+
"Promotion of numbers and bools to strings is deprecated. "
1085+
"This will return an error in the future, manually use object "
1086+
"or the string datatype in the future.\n"
1087+
"For example you may need to cast using `arr.astype('U')` "
1088+
"before concatenating an array integers to an array of "
1089+
"strings.") < 0) {
1090+
Py_DECREF(res);
1091+
return NULL;
1092+
}
1093+
}
1094+
return res;
1095+
}
1096+
1097+
1098+
/**
1099+
* Same as PyArray_PromoteTypes, but returns whether a deprecated string
1100+
* promotion occured instead of giving a warning directly.
1101+
*
1102+
* @param type1
1103+
* @param type2
1104+
* @param string_promotion boolean flag returning True if a deprecated string
1105+
* promotion occured.
1106+
* @returns New descriptor or NULL on error.
1107+
*/
1108+
NPY_NO_EXPORT PyArray_Descr *
1109+
PyArray_PromoteTypes_int(PyArray_Descr *type1, PyArray_Descr *type2,
1110+
npy_bool *string_promotion)
10771111
{
10781112
int type_num1, type_num2, ret_type_num;
10791113

@@ -1192,6 +1226,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
11921226
switch (type_num1) {
11931227
/* BOOL can convert to anything except datetime/void */
11941228
case NPY_BOOL:
1229+
*string_promotion = NPY_TRUE;
11951230
if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) {
11961231
int char_size = 1;
11971232
if (type_num2 == NPY_UNICODE) {
@@ -1234,8 +1269,10 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
12341269
return d;
12351270
}
12361271
}
1237-
/* Allow NUMBER -> STRING */
1272+
/* Allow NUMBER (or bool) -> STRING */
12381273
else if (PyTypeNum_ISNUMBER(type_num2)) {
1274+
/* Deprecated NumPy 1.19, 2020-04 */
1275+
*string_promotion = NPY_TRUE;
12391276
PyArray_Descr *ret = NULL;
12401277
PyArray_Descr *temp = PyArray_DescrNew(type1);
12411278
PyDataType_MAKEUNSIZED(temp);
@@ -1276,8 +1313,10 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
12761313
return d;
12771314
}
12781315
}
1279-
/* Allow NUMBER -> UNICODE */
1316+
/* Allow NUMBER (or bool) -> UNICODE */
12801317
else if (PyTypeNum_ISNUMBER(type_num2)) {
1318+
/* Deprecated NumPy 1.19, 2020-04 */
1319+
*string_promotion = NPY_TRUE;
12811320
PyArray_Descr *ret = NULL;
12821321
PyArray_Descr *temp = PyArray_DescrNew(type1);
12831322
PyDataType_MAKEUNSIZED(temp);
@@ -1307,6 +1346,8 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
13071346
/* BOOL can convert to almost anything */
13081347
case NPY_BOOL:
13091348
if (type_num2 == NPY_STRING || type_num2 == NPY_UNICODE) {
1349+
/* Deprecated NumPy 1.19, 2020-04 */
1350+
*string_promotion = NPY_TRUE;
13101351
int char_size = 1;
13111352
if (type_num2 == NPY_UNICODE) {
13121353
char_size = 4;
@@ -1327,8 +1368,10 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
13271368
}
13281369
break;
13291370
case NPY_STRING:
1330-
/* Allow NUMBER -> STRING */
1371+
/* Allow NUMBER (or bool) -> STRING */
13311372
if (PyTypeNum_ISNUMBER(type_num1)) {
1373+
/* Deprecated NumPy 1.19, 2020-03 */
1374+
*string_promotion = NPY_TRUE;
13321375
PyArray_Descr *ret = NULL;
13331376
PyArray_Descr *temp = PyArray_DescrNew(type2);
13341377
PyDataType_MAKEUNSIZED(temp);
@@ -1347,8 +1390,10 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
13471390
}
13481391
break;
13491392
case NPY_UNICODE:
1350-
/* Allow NUMBER -> UNICODE */
1393+
/* Allow NUMBER (or bool) -> UNICODE */
13511394
if (PyTypeNum_ISNUMBER(type_num1)) {
1395+
/* Deprecated NumPy 1.19, 2020-04 */
1396+
*string_promotion = NPY_TRUE;
13521397
PyArray_Descr *ret = NULL;
13531398
PyArray_Descr *temp = PyArray_DescrNew(type2);
13541399
PyDataType_MAKEUNSIZED(temp);

numpy/core/src/multiarray/convert_datatype.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,8 @@ NPY_NO_EXPORT PyArray_Descr *
4242
PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
4343
PyArray_Descr *flex_dtype);
4444

45+
NPY_NO_EXPORT PyArray_Descr *
46+
PyArray_PromoteTypes_int(PyArray_Descr *type1, PyArray_Descr *type2,
47+
npy_bool *string_promotion);
48+
4549
#endif

numpy/core/src/multiarray/ctors.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,6 +1748,11 @@ PyArray_GetArrayParamsFromObject_int(PyObject *op,
17481748
Py_INCREF(requested_dtype);
17491749
*out_dtype = requested_dtype;
17501750
}
1751+
else if (requested_dtype != NULL &&
1752+
PyDataType_ISDATETIME(requested_dtype)) {\
1753+
/* Convert to an object array for AdaptFlexibleDType */
1754+
*out_dtype = PyArray_DescrFromType(NPY_OBJECT);
1755+
}
17511756
else {
17521757
*out_dtype = NULL;
17531758
if (PyArray_DTypeFromObject(op, NPY_MAXDIMS, out_dtype) < 0) {

numpy/core/src/umath/ufunc_type_resolution.c

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,17 +124,22 @@ raise_no_loop_found_error(
124124
if (dtypes_tup == NULL) {
125125
return -1;
126126
}
127-
for (i = 0; i < ufunc->nargs; ++i) {
128-
Py_INCREF(dtypes[i]);
129-
PyTuple_SET_ITEM(dtypes_tup, i, (PyObject *)dtypes[i]);
130-
}
131127

132128
/* produce an error object */
133129
exc_value = PyTuple_Pack(2, ufunc, dtypes_tup);
134130
Py_DECREF(dtypes_tup);
135-
if (exc_value == NULL){
131+
if (exc_value == NULL) {
136132
return -1;
137133
}
134+
for (i = 0; i < ufunc->nargs; ++i) {
135+
PyObject *tmp = Py_None;
136+
if (dtypes[i] != NULL) {
137+
tmp = (PyObject *)dtypes[i];
138+
}
139+
Py_INCREF(tmp);
140+
PyTuple_SET_ITEM(dtypes_tup, i, tmp);
141+
}
142+
138143
PyErr_SetObject(exc_type, exc_value);
139144
Py_DECREF(exc_value);
140145

@@ -358,13 +363,30 @@ PyUFunc_SimpleBinaryComparisonTypeResolver(PyUFuncObject *ufunc,
358363
}
359364

360365
if (type_tup == NULL) {
361-
/* Input types are the result type */
362-
out_dtypes[0] = PyArray_ResultType(2, operands, 0, NULL);
363-
if (out_dtypes[0] == NULL) {
364-
return -1;
366+
/*
367+
* Input types are the result type, however, disallow flexible
368+
* (especially strings) since their ResultType is deprecated.
369+
* NOTE: User dtypes could define this, but this path is expected
370+
* to be a legacy fallback only in the future.
371+
* Deprecated NumPy 1.19, 2020-03 in ResultType, once gone, this
372+
* could be simplified or chain the error given by ResultType.
373+
*/
374+
if (!PyArray_ISFLEXIBLE(operands[0]) &&
375+
!PyArray_ISFLEXIBLE(operands[1])) {
376+
out_dtypes[0] = PyArray_ResultType(2, operands, 0, NULL);
377+
if (out_dtypes[0] == NULL) {
378+
return -1;
379+
}
380+
out_dtypes[1] = out_dtypes[0];
381+
Py_INCREF(out_dtypes[1]);
382+
}
383+
else {
384+
/* Not doing anything will lead to a loop no found error. */
385+
out_dtypes[0] = PyArray_DESCR(operands[0]);
386+
Py_INCREF(out_dtypes[0]);
387+
out_dtypes[1] = PyArray_DESCR(operands[0]);
388+
Py_INCREF(out_dtypes[1]);
365389
}
366-
out_dtypes[1] = out_dtypes[0];
367-
Py_INCREF(out_dtypes[1]);
368390
}
369391
else {
370392
PyObject *item;
@@ -517,6 +539,31 @@ PyUFunc_SimpleUniformOperationTypeResolver(
517539
out_dtypes[0] = ensure_dtype_nbo(PyArray_DESCR(operands[0]));
518540
}
519541
else {
542+
int iop;
543+
npy_bool has_flexible = 0;
544+
npy_bool has_object = 0;
545+
for (iop = 0; iop < ufunc->nin; iop++) {
546+
if (PyArray_ISOBJECT(operands[iop])) {
547+
has_object = 1;
548+
}
549+
if (PyArray_ISFLEXIBLE(operands[iop])) {
550+
has_flexible = 1;
551+
}
552+
}
553+
if (has_flexible && !has_object) {
554+
/*
555+
* DEPRECATED NumPy 1.19, 2020-03 the following check is needed
556+
* to avoid the warning within ResultType. Effectively these
557+
* types could never promote for ufuncs, so disallow them.
558+
* We have to do this before PyArray_ResultType is called,
559+
* since it would give a spurious DeprecationWarning.
560+
*/
561+
PyErr_Format(PyExc_TypeError,
562+
"No loop matching the specified signature and "
563+
"casting was found for ufunc %s. Strings and other "
564+
"flexible datatypes are unsupported.", ufunc_name);
565+
return -1;
566+
}
520567
out_dtypes[0] = PyArray_ResultType(ufunc->nin, operands, 0, NULL);
521568
}
522569
if (out_dtypes[0] == NULL) {

0 commit comments

Comments
 (0)
0