8000 ENH: cache dtype.__hash__ · numpy/numpy@cca2c1a · GitHub
[go: up one dir, main page]

Skip to content

Commit cca2c1a

Browse files
committed
ENH: cache dtype.__hash__
Computing the type of a dhash can be slow for complex (e.g. structured) dtypes. Hashing dtypes can be useful in some applications, such as when doing type-based dispatching, and speed can be critical in those cases. This enhancement caches the once-computed hash value in the dtype structure, so as to save time on further lookups. The cached value is invalidated in the rare cases where the dtype is mutated. Benchmarks numbers: python3.4 -m timeit -s "import numpy as np; t=np.dtype('uint64')" "hash(t)" * before patch: 1000000 loops, best of 3: 0.498 usec per loop * after patch: 10000000 loops, best of 3: 0.0616 usec per loop python3.4 -m timeit -s "import numpy as np; t=np.dtype([(s, 'f') for s in 'abcdefghij'])" "hash(t)" * before patch: 100000 loops, best of 3: 4.43 usec per loop * after patch: 10000000 loops, best of 3: 0.0603 usec per loop Closes #5339.
1 parent c3cd4bf commit cca2c1a

File tree

8 files changed

+50
-18
lines changed

8 files changed

+50
-18
lines changed

doc/release/1.10.0-notes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ C API
6161
The changes to *swapaxes* also apply to the *PyArray_SwapAxes* C function,
6262
which now returns a view in all cases.
6363

64+
The dtype structure (PyArray_Descr) has a new member at the end to cache
65+
its hash value. This shouldn't affect any well-written applications.
66+
6467
recarray field return types
6568
~~~~~~~~~~~~~~~~~~~~~~~~~~~
6669
Previously the returned types for recarray fields accessed by attribute and by

numpy/core/include/numpy/ndarraytypes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,10 @@ typedef struct _PyArray_Descr {
619619
* for NumPy 1.7.0.
620620
*/
621621
NpyAuxData *c_metadata;
622+
/* Cached hash value (-1 if not yet computed).
623+
* This was added for NumPy 2.0.0.
624+
*/
625+
npy_hash_t hash;
622626
} PyArray_Descr;
623627

624628
typedef struct _arr_descr {

numpy/core/include/numpy/npy_3kcompat.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -486,19 +486,6 @@ NpyCapsule_Check(PyObject *ptr)
486486

487487
#endif
488488

489-
/*
490-
* Hash value compatibility.
491-
* As of Python 3.2 hash values are of type Py_hash_t.
492-
* Previous versions use C long.
493-
*/
494-
#if PY_VERSION_HEX < 0x03020000
495-
typedef long npy_hash_t;
496-
#define NPY_SIZEOF_HASH_T NPY_SIZEOF_LONG
497-
#else
498-
typedef Py_hash_t npy_hash_t;
499-
#define NPY_SIZEOF_HASH_T NPY_SIZEOF_INTP
500-
#endif
501-
502489
#ifdef __cplusplus
503490
}
504491
#endif

numpy/core/include/numpy/npy_common.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,19 @@ typedef long npy_long;
316316
typedef float npy_float;
317317
typedef double npy_double;
318318

319+
/*
320+
* Hash value compatibility.
321+
* As of Python 3.2 hash values are of type Py_hash_t.
322+
* Previous versions use C long.
323+
*/
324+
#if PY_VERSION_HEX < 0x03020000
325+
typedef long npy_hash_t;
326+
#define NPY_SIZEOF_HASH_T NPY_SIZEOF_LONG
327+
#else
328+
typedef Py_hash_t npy_hash_t;
329+
#define NPY_SIZEOF_HASH_T NPY_SIZEOF_INTP
330+
#endif
331+
319332
/*
320333
* Disabling C99 complex usage: a lot of C code in numpy/scipy rely on being
321334
* able to do .real/.imag. Will have to convert code first.

numpy/core/src/multiarray/arraytypes.c.src

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4031,6 +4031,8 @@ static PyArray_Descr @from@_Descr = {
40314031
NULL,
40324032
/* c_metadata */
40334033
NULL,
4034+
/* hash */
4035+
-1,
40344036
};
40354037

40364038
/**end repeat**/
@@ -4172,6 +4174,8 @@ NPY_NO_EXPORT PyArray_Descr @from@_Descr = {
41724174
NULL,
41734175
/* c_metadata */
41744176
NULL,
4177+
/* hash */
4178+
-1,
41754179
};
41764180

41774181
/**end repeat**/

numpy/core/src/multiarray/descriptor.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,6 +1591,7 @@ PyArray_DescrNew(PyArray_Descr *base)
15911591
}
15921592
Py_XINCREF(newdescr->typeobj);
15931593
Py_XINCREF(newdescr->metadata);
1594+
newdescr->hash = -1;
15941595

15951596
return newdescr;
15961597
}
@@ -1994,6 +1995,8 @@ arraydescr_names_set(PyArray_Descr *self, PyObject *val)
19941995
return -1;
19951996
}
19961997
}
1998+
/* Invalidate cached hash value */
1999+
self->hash = -1;
19972000
/* Update dictionary keys in fields */
19982001
new_names = PySequence_Tuple(val);
19992002
new_fields = PyDict_New();
@@ -2443,6 +2446,8 @@ arraydescr_setstate(PyArray_Descr *self, PyObject *args)
24432446
version);
24442447
return NULL;
24452448
}
2449+
/* Invalidate cached hash value */
2450+
self->hash = -1;
24462451

24472452
if (version == 1 || version == 0) {
24482453
if (fields != Py_None) {

numpy/core/src/multiarray/hashdescr.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,6 @@ PyArray_DescrHash(PyObject* odescr)
301301
{
302302
PyArray_Descr *descr;
303303
int st;
304-
npy_hash_t hash;
305304

306305
if (!PyArray_DescrCheck(odescr)) {
307306
PyErr_SetString(PyExc_ValueError,
@@ -310,10 +309,12 @@ PyArray_DescrHash(PyObject* odescr)
310309
}
311310
descr = (PyArray_Descr*)odescr;
312311

313-
st = _PyArray_DescrHashImp(descr, &hash);
314-
if (st) {
315-
return -1;
312+
if (descr->hash == -1) {
313+
st = _PyArray_DescrHashImp(descr, &descr->hash);
314+
if (st) {
315+
return -1;
316+
}
316317
}
317318

318-
return hash;
319+
return descr->hash;
319320
}

numpy/core/tests/test_dtype.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,21 @@ def test_different_titles(self):
125125
'titles': ['RRed pixel', 'Blue pixel']})
126126
assert_dtype_not_equal(a, b)
127127

128+
def test_mutate(self):
129+
# Mutating a dtype should reset the cached hash value
130+
a = np.dtype([('yo', np.int)])
131+
b = np.dtype([('yo', np.int)])
132+
c = np.dtype([('ye', np.int)])
133+
assert_dtype_equal(a, b)
134+
assert_dtype_not_equal(a, c)
135+
a.names = ['ye']
136+
assert_dtype_equal(a, c)
137+
assert_dtype_not_equal(a, b)
138+
state = b.__reduce__()[2]
139+
a.__setstate__(state)
140+
assert_dtype_equal(a, b)
141+
assert_dtype_not_equal(a, c)
142+
128143
def test_not_lists(self):
129144
"""Test if an appropriate exception is raised when passing bad values to
130145
the dtype constructor.

0 commit comments

Comments
 (0)
0