8000 BUG: datetime64 hash. · walshb/numpy@fa643d6 · GitHub
[go: up one dir, main page]

Skip to content

Commit fa643d6

Browse files
committed
BUG: datetime64 hash.
numpy#3836
1 parent 70fde29 commit fa643d6

File tree

6 files changed

+284
-77
lines changed

6 files changed

+284
-77
lines changed

numpy/_core/include/numpy/ndarraytypes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ typedef struct {
841841
npy_int32 month, day, hour, min, sec, us, ps, as;
842842
} npy_datetimestruct;
843843

844-
/* This is not used internally. */
844+
/* This structure contains an exploded view of a timedelta value */
845845
typedef struct {
846846
npy_int64 day;
847847
npy_int32 sec, us, ps, as;

numpy/_core/src/multiarray/_datetime.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,4 +328,10 @@ find_object_datetime_type(PyObject *obj, int type_num);
328328
NPY_NO_EXPORT int
329329
PyArray_InitializeDatetimeCasts(void);
330330

331+
NPY_NO_EXPORT npy_hash_t
332+
datetime_hash(PyArray_DatetimeMetaData *meta, npy_datetime dt);
333+
334+
NPY_NO_EXPORT npy_hash_t
335+
timedelta_hash(PyArray_DatetimeMetaData *meta, npy_timedelta td);
336+
331337
#endif /* NUMPY_CORE_SRC_MULTIARRAY__DATETIME_H_ */

numpy/_core/src/multiarray/datetime.c

Lines changed: 186 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -2818,85 +2818,228 @@ convert_datetime_to_pyobject(npy_datetime dt, PyArray_DatetimeMetaData *meta)
28182818
}
28192819

28202820
/*
2821-
* Converts a timedelta into a PyObject *.
2821+
* We require that if d is a PyDateTime, then
2822+
* hash(numpy.datetime64(d)) == hash(d).
2823+
* Where possible, convert dt to a PyDateTime and hash it.
28222824
*
2823-
* Not-a-time is returned as the string "NaT".
2824-
* For microseconds or coarser, returns a datetime.timedelta.
2825-
* For units finer than microseconds, returns an integer.
2825+
* NOTE: "equals" across PyDate, PyDateTime and np.datetime64 is not transitive:
2826+
* datetime.datetime(1970, 1, 1) == np.datetime64(0, 'us')
2827+
* np.datetime64(0, 'us') == np.datetime64(0, 'D')
2828+
* datetime.datetime(1970, 1, 1) != np.datetime64(0, 'D') # date, not datetime!
2829+
*
2830+
* But:
2831+
* datetime.date(1970, 1, 1) == np.datetime64(0, 'D')
2832+
*
2833+
* For hash(datetime64(0, 'D')) we could return either PyDate.hash or PyDateTime.hash.
2834+
* We choose PyDateTime.hash to match datetime64(0, 'us')
28262835
*/
2827-
NPY_NO_EXPORT PyObject *
2828-
convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
2836+
NPY_NO_EXPORT npy_hash_t
2837+
datetime_hash(PyArray_DatetimeMetaData *meta, npy_datetime dt)
28292838
{
2830-
npy_timedelta value;
2831-
int days = 0, seconds = 0, useconds = 0;
2839+
PyObject *obj;
2840+
npy_hash_t res;
2841+
npy_datetimestruct dts;
28322842

2833-
/*
2834-
* Convert NaT (not-a-time) into None.
2835-
*/
2836-
if (td == NPY_DATETIME_NAT) {
2837-
Py_RETURN_NONE;
2843+
if (dt == NPY_DATETIME_NAT || meta->base == NPY_FR_GENERIC) {
2844+
return -2;
28382845
}
28392846

2840-
/*
2841-
* If the type's precision is greater than microseconds, is
2842-
* Y/M/B (nonlinear units), or is generic units, return an int
2843-
*/
2844-
if (meta->base > NPY_FR_us ||
2845-
meta->base == NPY_FR_Y ||
2846-
meta->base == NPY_FR_M ||
2847-
meta->base == NPY_FR_GENERIC) {
2848-
return PyLong_FromLongLong(td);
2847+
if (NpyDatetime_ConvertDatetime64ToDatetimeStruct(meta, dt, &dts) < 0) {
2848+
return -1;
2849+
}
2850+
2851+
if (dts.year < 1 || dts.year > 9999
2852+
|| dts.ps != 0 || dts.as != 0) {
2853+
/* NpyDatetime_ConvertDatetime64ToDatetimeStruct does memset,
2854+
* so this is safe from loose struct packing. */
2855+
obj = PyBytes_FromStringAndSize((const char *)&dts, sizeof(dts));
2856+
} else {
2857+
obj = PyDateTime_FromDateAndTime(dts.year, dts.month, dts.day,
2858+
dts.hour, dts.min, dts.sec, dts.us);
28492859
}
28502860

2851-
value = td;
2861+
if (obj == NULL) {
2862+
return -1;
2863+
}
2864+
2865+
res = PyObject_Hash(obj);
2866+
2867+
Py_DECREF(obj);
2868+
2869+
return res;
2870+
}
2871+
2872+
static int
2873+
convert_timedelta_to_timedeltastruct(PyArray_DatetimeMetaData *meta,
2874+
npy_timedelta td,
2875+
npy_timedeltastruct *out)
2876+
{
2877+
memset(out, 0, sizeof(npy_timedeltastruct));
28522878

28532879
/* Apply the unit multiplier (TODO: overflow treatment...) */
2854-
value *= meta->num;
2880+
td *= meta->num;
28552881

28562882
/* Convert to days/seconds/useconds */
28572883
switch (meta->base) {
28582884
case NPY_FR_W:
2859-
days = value * 7;
2885+
out->day = td * 7;
28602886
break;
28612887
case NPY_FR_D:
2862-
days = value;
2888+
out->day = td;
28632889
break;
28642890
case NPY_FR_h:
2865-
days = extract_unit_64(&value, 24ULL);
2866-
seconds = value*60*60;
2891+
out->day = extract_unit_64(&td, 24LL);
2892+
out->sec = (npy_int32)(td * 60*60);
28672893
break;
28682894
case NPY_FR_m:
2869-
days = extract_unit_64(&value, 60ULL*24);
2870-
seconds = value*60;
2895+
out->day = extract_unit_64(&td, 60LL*24);
2896+
out->sec = (npy_int32)(td * 60);
28712897
break;
28722898
case NPY_FR_s:
2873-
days = extract_unit_64(&value, 60ULL*60*24);
2874-
seconds = value;
2899+
out->day = extract_unit_64(&td, 60LL*60*24);
2900+
out->sec = (npy_int32)td;
28752901
break;
28762902
case NPY_FR_ms:
2877-
days = extract_unit_64(&value, 1000ULL*60*60*24);
2878-
seconds = extract_unit_64(&value, 1000ULL);
2879-
useconds = value*1000;
2903+
out->day = extract_unit_64(&td, 1000LL*60*60*24);
2904+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL);
2905+
out->us = (npy_int32)(td * 1000LL);
28802906
break;
28812907
case NPY_FR_us:
2882-
days = extract_unit_64(&value, 1000ULL*1000*60*60*24);
2883-
seconds = extract_unit_64(&value, 1000ULL*1000);
2884-
useconds = value;
2908+
out->day = extract_unit_64(&td, 1000LL*1000*60*60*24);
2909+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000);
2910+
out->us = (npy_int32)td;
28852911
break;
2886-
default:
2887-
// unreachable, handled by the `if` above
2888-
assert(NPY_FALSE);
2912+
case NPY_FR_ns:
2913+
out->day = extract_unit_64(&td, 1000LL*1000*1000*60*60*24);
2914+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000);
2915+
out->us = (npy_int32)extract_unit_64(&td, 1000LL);
2916+
out->ps = (npy_int32)(td * 1000LL);
2917+
break;
2918+
case NPY_FR_ps:
2919+
out->day = extract_unit_64(&td, 1000LL*1000*1000*1000*60*60*24);
2920+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000);
2921+
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000);
2922+
out->ps = (npy_int32)td;
28892923
break;
2924+
case NPY_FR_fs:
2925+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000*1000);
2926+
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000);
2927+
out->ps = (npy_int32)extract_unit_64(&td, 1000LL);
2928+
out->as = (npy_int32)(td * 1000LL);
2929+
break;
2930+
case NPY_FR_as:
2931+
out->sec = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000*1000*1000);
2932+
out->us = (npy_int32)extract_unit_64(&td, 1000LL*1000*1000*1000);
2933+
out->ps = (npy_int32)extract_unit_64(&td, 1000LL*1000);
2934+
57AE out->as = (npy_int32)td;
2935+
break;
2936+
default:
2937+
PyErr_SetString(PyExc_RuntimeError,
2938+
"NumPy timedelta metadata is corrupted with invalid "
2939+
"base unit");
2940+
return -1;
28902941
}
2942+
2943+
return 0;
2944+
}
2945+
2946+
/*
2947+
* Converts a timedelta into a PyObject *.
2948+
*
2949+
* Not-a-time is returned as the string "NaT".
2950+
* For microseconds or coarser, returns a datetime.timedelta.
2951+
* For units finer than microseconds, returns an integer.
2952+
*/
2953+
NPY_NO_EXPORT PyObject *
2954+
convert_timedelta_to_pyobject(npy_timedelta td, PyArray_DatetimeMetaData *meta)
2955+
{
2956+
npy_timedeltastruct tds;
2957+
2958+
/*
2959+
* Convert NaT (not-a-time) into None.
2960+
*/
2961+
if (td == NPY_DATETIME_NAT) {
2962+
Py_RETURN_NONE;
2963+
}
2964+
2965+
/*
2966+
* If the type's precision is greater than microseconds, is
2967+
* Y/M/B (nonlinear units), or is generic units, return an int
2968+
*/
2969+
if (meta->base > NPY_FR_us ||
2970+
meta->base == NPY_FR_Y ||
2971+
meta->base == NPY_FR_M ||
2972+
meta->base == NPY_FR_GENERIC) {
2973+
return PyLong_FromLongLong(td);
2974+
}
2975+
2976+
if (convert_timedelta_to_timedeltastruct(meta, td, &tds) < 0) {
2977+
return NULL;
2978+
}
2979+
28912980
/*
28922981
* If F438 it would overflow the datetime.timedelta days, return a raw int
28932982
*/
2894-
if (days < -999999999 || days > 999999999) {
2983+
if (tds.day < -999999999 || tds.day > 999999999) {
28952984
return PyLong_FromLongLong(td);
28962985
}
28972986
else {
2898-
return PyDelta_FromDSU(days, seconds, useconds);
2987+
return PyDelta_FromDSU(tds.day, tds.sec, tds.us);
2988+
}
2989+
}
2990+
2991+
/*
2992+
* We require that if d is a PyDelta, then
2993+
* hash(numpy.timedelta64(d)) == hash(d).
2994+
* Where possible, convert dt to a PyDelta and hash it.
2995+
*/
2996+
NPY_NO_EXPORT npy_hash_t
2997+
timedelta_hash(PyArray_DatetimeMetaData *meta, npy_timedelta td)
2998+
{
2999+
PyObject *obj;
3000+
npy_hash_t res;
3001+
npy_timedeltastruct tds;
3002+
3003+
if (td == NPY_DATETIME_NAT) {
3004+
return -2;
28993005
}
3006+
3007+
if (meta->base == NPY_FR_GENERIC) {
3008+
/* generic compares equal to *every* other base, so no single hash works. */
3009+
PyErr_SetString(PyExc_ValueError, "Can't hash generic timedelta64");
3010+
return -1;
3011+
}
3012+
3013+
/* Y and M can be converted to each other but not to other units */
3014+
3015+
if (meta->base == NPY_FR_Y) {
3016+
obj = PyLong_FromLong(td * 12);
3017+
} else if (meta->base == NPY_FR_M) {
3018+
obj = PyLong_FromLong(td);
3019+
} else {
3020+
if (convert_timedelta_to_timedeltastruct(meta, td, &tds) < 0) {
3021+
return -1;
3022+
}
3023+
3024+
if (tds.day < -999999999 || tds.day > 999999999
3025+
|| tds.ps != 0 || tds.as != 0) {
3026+
/* convert_timedelta_to_timedeltastruct does memset,
3027+
* so this is safe from loose struct packing. */
3028+
obj = PyBytes_FromStringAndSize((const char *)&tds, sizeof(tds));
3029+
} else {
3030+
obj = PyDelta_FromDSU(tds.day, tds.sec, tds.us);
3031+
}
3032+
}
3033+
3034+
if (obj == NULL) {
3035+
return -1;
3036+
}
3037+
3038+
res = PyObject_Hash(obj);
3039+
3040+
Py_DECREF(obj);
3041+
3042+
return res;
29003043
}
29013044

29023045
/*

numpy/_core/src/multiarray/scalartypes.c.src

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3904,45 +3904,16 @@ static inline npy_hash_t
39043904
* #lname = datetime, timedelta#
39053905
* #name = Datetime, Timedelta#
39063906
*/
3907-
#if NPY_SIZEOF_HASH_T==NPY_SIZEOF_DATETIME
39083907
static npy_hash_t
39093908
@lname@_arrtype_hash(PyObject *obj)
39103909
{
3911-
npy_hash_t x = (npy_hash_t)(PyArrayScalar_VAL(obj, @name@));
3912-
if (x == -1) {
3913-
x = -2;
3914-
}
3915-
return x;
3910+
PyArray_Descr *dtype = PyArray_DescrFromScalar(obj);
3911+
PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(dtype);
3912+
return @lname@_hash(meta, PyArrayScalar_VAL(obj, @name@));
39163913
}
3917-
#elif NPY_SIZEOF_LONGLONG==NPY_SIZEOF_DATETIME
3918-
static npy_hash_t
3919-
@lname@_arrtype_hash(PyObject *obj)
3920-
{
3921-
npy_hash_t y;
3922-
npy_longlong x = (PyArrayScalar_VAL(obj, @name@));
3923-
3924-
if ((x <= LONG_MAX)) {
3925-
y = (npy_hash_t) x;
3926-
}
3927-
else {
3928-
union Mask {
3929-
long hashvals[2];
3930-
npy_longlong v;
3931-
} both;
3932-
3933-
both.v = x;
3934-
y = both.hashvals[0] + (1000003)*both.hashvals[1];
3935-
}
3936-
if (y == -1) {
3937-
y = -2;
3938-
}
3939-
return y;
3940-
}
3941-
#endif
39423914
/**end repeat**/
39433915

39443916

3945-
39463917
/* Wrong thing to do for longdouble, but....*/
39473918

39483919
/**begin repeat

0 commit comments

Comments
 (0)
0