8000 Datetime dev 2 by mwiebe · Pull Request #93 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

Datetime dev 2 #93

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jun 22, 2011
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
ENH: convert-dtype: Abstract the flexible dtype mechanism into a func…
…tion

Since the datetime adds a new flexible dtype (datetime with generic units),
updating all the places where flexible dtypes are adjusted was error-prone.
Thus, this has been moved to a single place.

At the same time, I've added sizes for the various number types, so
they don't produce size-one strings by default anymore
  • Loading branch information
Mark Wiebe committed Jun 18, 2011
commit 9fa48edb19a731dcc87e7feb497a9e728c15e4d5
13 changes: 7 additions & 6 deletions numpy/core/arrayprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def _array2string(a, max_line_width, precision, suppress_small, separator=' ',
'complexfloat' : ComplexFormat(data, precision,
suppress_small),
'longcomplexfloat' : LongComplexFormat(precision),
'datetime' : DatetimeFormat(True, None, -1),
'datetime' : DatetimeFormat(),
'timedelta' : TimedeltaFormat(data),
'numpystr' : repr,
'str' : str}
Expand Down Expand Up @@ -698,16 +698,17 @@ def __call__(self, x):
return r + i

class DatetimeFormat(object):
def __init__(self, uselocaltime=True, overrideunit=None, tzoffset=-1):
self.local = uselocaltime
def __init__(self, overrideunit=None,
timezone='local', casting='same_kind'):
self.timezone = timezone
self.unit = overrideunit
self.tzoffset = -1
self.casting = casting

def __call__(self, x):
return "'%s'" % datetime_as_string(x,
local=self.local,
unit=self.unit,
tzoffset=self.tzoffset)
timezone=self.timezone,
casting=self.casting)

class TimedeltaFormat(object):
def __init__(self, data):
Expand Down
50 changes: 25 additions & 25 deletions numpy/core/src/multiarray/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ _array_typedescr_fromstr(char *str)
switch (typechar) {
case 'b':
if (size == sizeof(Bool)) {
type_num = PyArray_BOOL;
type_num = NPY_BOOL;
}
else {
PyErr_SetString(PyExc_ValueError, msg);
Expand All @@ -318,22 +318,22 @@ _array_typedescr_fromstr(char *str)
break;
case 'u':
if (size == sizeof(uintp)) {
type_num = PyArray_UINTP;
type_num = NPY_UINTP;
}
else if (size == sizeof(char)) {
type_num = PyArray_UBYTE;
type_num = NPY_UBYTE;
}
else if (size == sizeof(short)) {
type_num = PyArray_USHORT;
type_num = NPY_USHORT;
}
else if (size == sizeof(ulong)) {
type_num = PyArray_ULONG;
type_num = NPY_ULONG;
}
else if (size == sizeof(int)) {
type_num = PyArray_UINT;
type_num = NPY_UINT;
}
else if (size == sizeof(ulonglong)) {
type_num = PyArray_ULONGLONG;
type_num = NPY_ULONGLONG;
}
else {
PyErr_SetString(PyExc_ValueError, msg);
Expand All @@ -342,22 +342,22 @@ _array_typedescr_fromstr(char *str)
break;
case 'i':
if (size == sizeof(intp)) {
type_num = PyArray_INTP;
type_num = NPY_INTP;
}
else if (size == sizeof(char)) {
type_num = PyArray_BYTE;
type_num = NPY_BYTE;
}
else if (size == sizeof(short)) {
type_num = PyArray_SHORT;
type_num = NPY_SHORT;
}
else if (size == sizeof(long)) {
type_num = PyArray_LONG;
type_num = NPY_LONG;
}
else if (size == sizeof(int)) {
type_num = PyArray_INT;
type_num = NPY_INT;
}
else if (size == sizeof(longlong)) {
type_num = PyArray_LONGLONG;
type_num = NPY_LONGLONG;
}
else {
PyErr_SetString(PyExc_ValueError, msg);
Expand All @@ -366,13 +366,13 @@ _array_typedescr_fromstr(char *str)
break;
case 'f':
if (size == sizeof(float)) {
type_num = PyArray_FLOAT;
type_num = NPY_FLOAT;
}
else if (size == sizeof(double)) {
type_num = PyArray_DOUBLE;
type_num = NPY_DOUBLE;
}
else if (size == sizeof(longdouble)) {
type_num = PyArray_LONGDOUBLE;
type_num = NPY_LONGDOUBLE;
}
else {
PyErr_SetString(PyExc_ValueError, msg);
Expand All @@ -381,13 +381,13 @@ _array_typedescr_fromstr(char *str)
break;
case 'c':
if (size == sizeof(float)*2) {
type_num = PyArray_CFLOAT;
type_num = NPY_CFLOAT;
}
else if (size == sizeof(double)*2) {
type_num = PyArray_CDOUBLE;
type_num = NPY_CDOUBLE;
}
else if (size == sizeof(longdouble)*2) {
type_num = PyArray_CLONGDOUBLE;
type_num = NPY_CLONGDOUBLE;
}
else {
PyErr_SetString(PyExc_ValueError, msg);
Expand All @@ -396,22 +396,22 @@ _array_typedescr_fromstr(char *str)
break;
case 'O':
if (size == sizeof(PyObject *)) {
type_num = PyArray_OBJECT;
type_num = NPY_OBJECT;
}
else {
PyErr_SetString(PyExc_ValueError, msg);
return NULL;
}
break;
case PyArray_STRINGLTR:
type_num = PyArray_STRING;
case NPY_STRINGLTR:
type_num = NPY_STRING;
break;
case PyArray_UNICODELTR:
type_num = PyArray_UNICODE;
case NPY_UNICODELTR:
type_num = NPY_UNICODE;
size <<= 2;
break;
case 'V':
type_num = PyArray_VOID;
type_num = NPY_VOID;
break;
default:
PyErr_SetString(PyExc_ValueError, msg);
Expand Down
155 changes: 132 additions & 23 deletions numpy/core/src/multiarray/convert_datatype.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "convert_datatype.h"
#include "_datetime.h"
#include "datetime_strings.h"

/*NUMPY_API
* For backward compatibility
Expand All @@ -31,30 +32,11 @@ NPY_NO_EXPORT PyObject *
PyArray_CastToType(PyArrayObject *arr, PyArray_Descr *dtype, int fortran)
{
PyObject *out;
PyArray_Descr *arr_dtype;

arr_dtype = PyArray_DESCR(arr);

if (dtype->elsize == 0) {
PyArray_DESCR_REPLACE(dtype);
if (dtype == NULL) {
return NULL;
}

if (arr_dtype->type_num == dtype->type_num) {
dtype->elsize = arr_dtype->elsize;
}
else if (arr_dtype->type_num == NPY_STRING &&
dtype->type_num == NPY_UNICODE) {
dtype->elsize = arr_dtype->elsize * 4;
}
else if (arr_dtype->type_num == NPY_UNICODE &&
dtype->type_num == NPY_STRING) {
dtype->elsize = arr_dtype->elsize / 4;
}
else if (dtype->type_num == NPY_VOID) {
dtype->elsize = arr_dtype->elsize;
}
/* If the requested dtype is flexible, adapt it */
PyArray_AdaptFlexibleType((PyObject *)arr, PyArray_DESCR(arr), &dtype);
if (dtype == NULL) {
return NULL;
}

out = PyArray_NewFromDescr(Py_TYPE(arr), dtype,
Expand Down Expand Up @@ -136,6 +118,133 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num)
return NULL;
}

/*
* This function calls Py_DECREF on flex_dtype, and replaces it with
* a new dtype that has been adapted based on the values in data_dtype
* and data_obj. If the flex_dtype is not flexible, it leaves it as is.
*
* The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID,
* and NPY_DATETIME with generic units.
*/
NPY_NO_EXPORT void
PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype,
PyArray_Descr **flex_dtype)
{
PyArray_DatetimeMetaData *meta;

/* Flexible types with expandable size */
if ((*flex_dtype)->elsize == 0) {
/* First replace the flex dtype */
PyArray_DESCR_REPLACE(*flex_dtype);
if (*flex_dtype == NULL) {
return;
}

if (data_dtype->type_num == (*flex_dtype)->type_num ||
(*flex_dtype)->type_num == NPY_VOID) {
(*flex_dtype)->elsize = data_dtype->elsize;
}
else {
npy_intp size = 8;

/* Get a string-size estimate of the input */
switch (data_dtype->type_num) {
case NPY_BOOL:
size = 5;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not clear on where these estimates come from.

Copy link
F438
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I came up with them, they're 1 more than necessary (for the NULL character) in many of the cases. Maybe multiples of 8 would be good, to end up with nicely aligned data more often.

break;
case NPY_UBYTE:
size = 3;
break;
case NPY_BYTE:
size = 4;
break;
case NPY_USHORT:
size = 5;
break;
case NPY_SHORT:
size = 6;
break;
case NPY_UINT:
size = 10;
break;
case NPY_INT:
size = 6;
break;
case NPY_ULONG:
size = 20;
break;
case NPY_LONG:
size = 21;
break;
case NPY_ULONGLONG:
size = 20;
break;
case NPY_LONGLONG:
size = 21;
break;
case NPY_HALF:
case NPY_FLOAT:
case NPY_DOUBLE:
case NPY_LONGDOUBLE:
size = 32;
break;
case NPY_CFLOAT:
case NPY_CDOUBLE:
case NPY_CLONGDOUBLE:
size = 64;
break;
case NPY_OBJECT:
size = 64;
break;
case NPY_STRING:
case NPY_VOID:
size = data_dtype->elsize;
break;
case NPY_UNICODE:
size = data_dtype->elsize / 4;
break;
case NPY_DATETIME:
meta = get_datetime_metadata_from_dtype(data_dtype);
if (meta == NULL) {
Py_DECREF(*flex_dtype);
*flex_dtype = NULL;
return;
}
size = get_datetime_iso_8601_strlen(0, meta->base);
break;
case NPY_TIMEDELTA:
size = 21;
break;
}

if ((*flex_dtype)->type_num == NPY_STRING) {
(*flex_dtype)->elsize = size;
}
else if ((*flex_dtype)->type_num == NPY_UNICODE) {
(*flex_dtype)->elsize = size * 4;
}
}
}
/* Flexible type with generic time unit that adapts */
else if ((*flex_dtype)->type_num == NPY_DATETIME ||
(*flex_dtype)->type_num == NPY_TIMEDELTA) {
meta = get_datetime_metadata_from_dtype(*flex_dtype);
if (meta == NULL) {
Py_DECREF(*flex_dtype);
*flex_dtype = NULL;
return;
}

if (meta->base == NPY_FR_GENERIC) {
/* Detect the unit from the input's data */
PyArray_Descr *dtype = find_object_datetime_type(data_obj,
(*flex_dtype)->type_num);
Py_DECREF(*flex_dtype);
*flex_dtype = dtype;
}
}
}

/*
* Must be broadcastable.
* This code is very similar to PyArray_CopyInto/PyArray_MoveInto
Expand Down
12 changes: 12 additions & 0 deletions numpy/core/src/multiarray/convert_datatype.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,16 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn);
NPY_NO_EXPORT int
PyArray_ValidType(int type);

/*
* This function calls Py_DECREF on flex_dtype, and replaces it with
* a new dtype that has been adapted based on the values in data_dtype
* and data_obj. If the flex_dtype is not flexible, it leaves it as is.
*
* The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID,
* and NPY_DATETIME with generic units.
*/
NPY_NO_EXPORT void
PyArray_AdaptFlexibleType(PyObject *data_obj, PyArray_Descr *data_dtype,
PyArray_Descr **flex_dtype);

#endif
Loading
0