8000 ENH: convert-dtype: Abstract the flexible dtype mechanism into a func… · numpy/numpy@9fa48ed · GitHub
[go: up one dir, main page]

Skip to content

Commit 9fa48ed

Browse files
author
Mark Wiebe
committed
ENH: convert-dtype: Abstract the flexible dtype mechanism into a function
Since the datetime adds a new flexible dtype (datetime with generic units), updating all the places where flexible dtypes are adjusted was error-prone. Thus, this has been moved to a single place. At the same time, I've added sizes for the various number types, so they don't produce size-one strings by default anymore
1 parent e834cd4 commit 9fa48ed

10 files changed

+571
-259
lines changed

numpy/core/arrayprint.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ',
245245
'complexfloat' : ComplexFormat(data, precision,
246246
suppress_small),
247247
'longcomplexfloat' : LongComplexFormat(precision),
248-
'datetime' : DatetimeFormat(True, None, -1),
248+
'datetime' : DatetimeFormat(),
249249
'timedelta' : TimedeltaFormat(data),
250250
'numpystr' : repr,
251251
'str' : str}
@@ -698,16 +698,17 @@ def __call__(self, x):
698698
return r + i
699699

700700
class DatetimeFormat(object):
701-
def __init__(self, uselocaltime=True, overrideunit=None, tzoffset=-1):
702-
self.local = uselocaltime
701+
def __init__(self, overrideunit=None,
702+
timezone='local', casting='same_kind'):
703+
self.timezone = timezone
703704
self.unit = overrideunit
704-
self.tzoffset = -1
705+
self.casting = casting
705706

706707
def __call__(self, x):
707708
return "'%s'" % datetime_as_string(x,
708-
local=self.local,
709709
unit=self.unit,
710-
tzoffset=self.tzoffset)
710+
timezone=self.timezone,
711+
casting=self.casting)
711712

712713
class TimedeltaFormat(object):
713714
def __init__(self, data):

numpy/core/src/multiarray/common.c

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ _array_typedescr_fromstr(char *str)
309309
switch (typechar) {
310310
case 'b':
311311
if (size == sizeof(Bool)) {
312-
type_num = PyArray_BOOL;
312+
type_num = NPY_BOOL;
313313
}
314314
else {
315315
PyErr_SetString(PyExc_ValueError, msg);
@@ -318,22 +318,22 @@ _array_typedescr_fromstr(char *str)
318318
break;
319319
case 'u':
320320
if (size == sizeof(uintp)) {
321-
type_num = PyArray_UINTP;
321+
type_num = NPY_UINTP;
322322
}
323323
else if (size == sizeof(char)) {
324-
type_num = PyArray_UBYTE;
324+
type_num = NPY_UBYTE;
325325
}
326326
else if (size == sizeof(short)) {
327-
type_num = PyArray_USHORT;
327+
type_num = NPY_USHORT;
328328
}
329329
else if (size == sizeof(ulong)) {
330-
type_num = PyArray_ULONG;
330+
type_num = NPY_ULONG;
331331
}
332332
else if (size == sizeof(int)) {
333-
type_num = PyArray_UINT;
333+
type_num = NPY_UINT;
334334
}
335335
else if (size == sizeof(ulonglong)) {
336-
type_num = PyArray_ULONGLONG;
336+
type_num = NPY_ULONGLONG;
337337
}
338338
else {
339339
PyErr_SetString(PyExc_ValueError, msg);
@@ -342,22 +342,22 @@ _array_typedescr_fromstr(char *str)
342342
break;
343343
case 'i':
344344
if (size == sizeof(intp)) {
345-
type_num = PyArray_INTP;
345+
type_num = NPY_INTP;
346346
}
347347
else if (size == sizeof(char)) {
348-
type_num = PyArray_BYTE;
348+
type_num = NPY_BYTE;
349349
}
350350
else if (size == sizeof(short)) {
351-
type_num = PyArray_SHORT;
351+
type_num = NPY_SHORT;
352352
}
353353
else if (size == sizeof(long)) {
354-
type_num = PyArray_LONG;
354+
type_num = NPY_LONG;
355355
}
356356
else if (size == sizeof(int)) {
357-
type_num = PyArray_INT;
357+
type_num = NPY_INT;
358358
}
359359
else if (size == sizeof(longlong)) {
360-
type_num = PyArray_LONGLONG;
360+
type_num = NPY_LONGLONG;
361361
}
362362
else {
363363
PyErr_SetString(PyExc_ValueError, msg);
@@ -366,13 +366,13 @@ _array_typedescr_fromstr(char *str)
366366
break;
367367
case 'f':
368368
if (size == sizeof(float)) {
369-
type_num = PyArray_FLOAT;
369+
type_num = NPY_FLOAT;
370370
}
371371
else if (size == sizeof(double)) {
372-
type_num = PyArray_DOUBLE;
372+
type_num = NPY_DOUBLE;
373373
}
374374
else if (size == sizeof(longdouble)) {
375-
type_num = PyArray_LONGDOUBLE;
375+
type_num = NPY_LONGDOUBLE;
376376
}
377377
else {
378378
PyErr_SetString(PyExc_ValueError, msg);
@@ -381,13 +381,13 @@ _array_typedescr_fromstr(char *str)
381381
break;
382382
case 'c':
383383
if (size == sizeof(float)*2) {
384-
type_num = PyArray_CFLOAT;
384+
type_num = NPY_CFLOAT;
385385
}
386386
else if (size == sizeof(double)*2) {
387-
type_num = PyArray_CDOUBLE;
387+
type_num = NPY_CDOUBLE;
388388
}
389389
else if (size == sizeof(longdouble)*2) {
390-
type_num = PyArray_CLONGDOUBLE;
390+
type_num = NPY_CLONGDOUBLE;
391391
}
392392
else {
393393
PyErr_SetString(PyExc_ValueError, msg);
@@ -396,22 +396,22 @@ _array_typedescr_fromstr(char *str)
396396
break;
397397
case 'O':
398398
if (size == sizeof(PyObject *)) {
399-
type_num = PyArray_OBJECT;
399+
type_num = NPY_OBJECT;
400400
}
401401
else {
402402
PyErr_SetString(PyExc_ValueError, msg);
403403
return NULL;
404404
}
405405
break;
406-
case PyArray_STRINGLTR:
407-
type_num = PyArray_STRING;
406+
case NPY_STRINGLTR:
407+
type_num = NPY_STRING;
408408
break;
409-
case PyArray_UNICODELTR:
410-
type_num = PyArray_UNICODE;
409+
case NPY_UNICODELTR:
410+
type_num = NPY_UNICODE;
411411
size <<= 2;
412412
break;
413413
case 'V':
414-
type_num = PyArray_VOID;
414+
type_num = NPY_VOID;
415415
break;
416416
default:
417417
PyErr_SetString(PyExc_ValueError, msg);

numpy/core/src/multiarray/convert_datatype.c

Lines changed: 132 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "convert_datatype.h"
1919
#include "_datetime.h"
20+
#include "datetime_strings.h"
2021

2122
/*NUMPY_API
2223
* For backward compatibility
@@ -31,30 +32,11 @@ NPY_NO_EXPORT PyObject *
3132
PyArray_CastToType(PyArrayObject *arr, PyArray_Descr *dtype, int fortran)
3233
{
3334
PyObject *out;
34-
PyArray_Descr *arr_dtype;
3535

36-
arr_dtype = PyArray_DESCR(arr);
37-
38-
if (dtype->elsize == 0) {
39-
PyArray_DESCR_REPLACE(dtype);
40-
if (dtype == NULL) {
41-
return NULL;
42-
}
43-
44-
if (arr_dtype->type_num == dtype->type_num) {
45-
dtype->elsize = arr_dtype->elsize;
46-
}
47-
else if (arr_dtype->type_num == NPY_STRING &&
48-
dtype->type_num == NPY_UNICODE) {
49-
dtype->elsize = arr_dtype->elsize * 4;
50-
}
51-
else if (arr_dtype->type_num == NPY_UNICODE &&
52-
dtype->type_num == NPY_STRING) {
53-
dtype->elsize = arr_dtype->elsize / 4;
54-
}
55-
else if (dtype->type_num == NPY_VOID) {
56-
dtype->elsize = arr_dtype->elsize;
57-
}
36+
/* If the requested dtype is flexible, adapt it */
37+
PyArray_AdaptFlexibleType((PyObject *)arr, PyArray_DESCR(arr), &dtype);
38+
if (dtype == NULL) {
39+
return NULL;
5840
}
5941

6042
out = PyArray_NewFromDescr(Py_TYPE(arr), dtype,
@@ -136,6 +118,133 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num)
136118
return NULL;
137119
}
138120

121+
/*
122+
* This function calls Py_DECREF on flex_dtype, and replaces it with
123+
* a new dtype that has been adapted based on the values in data_dtype
124+
* and data_obj. If the flex_dtype is not flexible, it leaves it as is.
125+
*
126+
* The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID,
127+
* and NPY_DATETIME with generic units.
128+
*/
129+
NPY_NO_EXPORT void
130+
PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype,
131+
PyArray_Descr **flex_dtype)
132+
{
133+
PyArray_DatetimeMetaData *meta;
134+
135+
/* Flexible types with expandable size */
136+
if ((*flex_dtype)->elsize == 0) {
137+
/* First replace the flex dtype */
138+
PyArray_DESCR_REPLACE(*flex_dtype);
139+
if (*flex_dtype == NULL) {
140+
return;
141+
}
142+
143+
if (data_dtype->type_num == (*flex_dtype)->type_num ||
144+
(*flex_dtype)->type_num == NPY_VOID) {
145+
(*flex_dtype)->elsize = data_dtype->elsize;
146+
}
147+
else {
148+
npy_intp size = 8;
149+
150+
/* Get a string-size estimate of the input */
151+
switch (data_dtype->type_num) {
152+
case NPY_BOOL:
153+
size = 5;
154+
break;
155+
case NPY_UBYTE:
156+
size = 3;
157+
break;
158+
case NPY_BYTE:
159+
size = 4;
160+
break;
161+
case NPY_USHORT:
162+
size = 5;
163+
break;
164+
case NPY_SHORT:
165+
size = 6;
166+
break;
167+
case NPY_UINT:
168+
size = 10;
169+
break;
170+
case NPY_INT:
171+
size = 6;
172+
break;
173+
case NPY_ULONG:
174+
size = 20;
175+
break;
176+
case NPY_LONG:
177+
size = 21;
178+
break;
179+
case NPY_ULONGLONG:
180+
size = 20;
181+
break;
182+
case NPY_LONGLONG:
183+
size = 21;
184+
break;
185+
case NPY_HALF:
186+
case NPY_FLOAT:
187+
case NPY_DOUBLE:
188+
case NPY_LONGDOUBLE:
189+
size = 32;
190+
break;
191+
case NPY_CFLOAT:
192+
case NPY_CDOUBLE:
193+
case NPY_CLONGDOUBLE:
194+
size = 64;
195+
break;
196+
case NPY_OBJECT:
197+
size = 64;
198+
break;
199+
case NPY_STRING:
200+
case NPY_VOID:
201+
size = data_dtype->elsize;
202+
break;
203+
case NPY_UNICODE:
204+
size = data_dtype->elsize / 4;
205+
break;
206+
case NPY_DATETIME:
207+
meta = get_datetime_metadata_from_dtype(data_dtype);
208+
if (meta == NULL) {
209+
Py_DECREF(*flex_dtype);
210+
*flex_dtype = NULL;
211+
return;
212+
}
213+
size = get_datetime_iso_8601_strlen(0, meta->base);
214+
break;
215+
case NPY_TIMEDELTA:
216+
size = 21;
217+
break;
218+
}
219+
220+
if ((*flex_dtype)->type_num == NPY_STRING) {
221+
(*flex_dtype)->elsize = size;
222+
}
223+
else if ((*flex_dtype)->type_num == NPY_UNICODE) {
224+
(*flex_dtype)->elsize = size * 4;
225+
}
226+
}
227+
}
228+
/* Flexible type with generic time unit that adapts */
229+
else if ((*flex_dtype)->type_num == NPY_DATETIME ||
230+
(*flex_dtype)->type_num == NPY_TIMEDELTA) {
231+
meta = get_datetime_metadata_from_dtype(*flex_dtype);
232+
if (meta == NULL) {
233+
Py_DECREF(*flex_dtype);
234+
*flex_dtype = NULL;
235+
return;
236+
}
237+
238+
if (meta->base == NPY_FR_GENERIC) {
239+
/* Detect the unit from the input's data */
240+
PyArray_Descr *dtype = find_object_datetime_type(data_obj,
241+
(*flex_dtype)->type_num);
242+
Py_DECREF(*flex_dtype);
243+
*flex_dtype = dtype;
244+
}
245+
}
246+
}
247+
139248
/*
140249
* Must be broadcastable.
141250
* This code is very similar to PyArray_CopyInto/PyArray_MoveInto

numpy/core/src/multiarray/convert_datatype.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,16 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn);
1313
NPY_NO_EXPORT int
1414
PyArray_ValidType(int type);
1515

16+
/*
17+
* This function calls Py_DECREF on flex_dtype, and replaces it with
18+
* a new dtype that has been adapted based on the values in data_dtype
19+
* and data_obj. If the flex_dtype is not flexible, it leaves it as is.
20+
*
21+
* The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID,
22+
* and NPY_DATETIME with generic units.
23+
*/
24+
NPY_NO_EXPORT void
25+
PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype,
26+
PyArray_Descr **flex_dtype);
27+
1628
#endif

0 commit comments

Comments
 (0)
0