8000 gh-122854: Add Py_HashBuffer() function by vstinner · Pull Request #122855 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions Doc/c-api/hash.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,25 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`.

.. versionadded:: 3.13


.. c:function:: Py_hash_t Py_HashBuffer(const void *ptr, Py_ssize_t len)

Compute and return the hash value of a buffer of *len* bytes
starting at address *ptr*. The hash is guaranteed to match that of
:class:`bytes`, :class:`memoryview`, and other built-in objects
that implement the :ref:`buffer protocol <bufferobjects>`.

Use this function to implement hashing for immutable objects whose
:c:member:`~PyTypeObject.tp_richcompare` function compares to another
object's buffer.

*len* must be greater than or equal to ``0``.

This function always succeeds.

.. versionadded:: 3.14


.. c:function:: Py_hash_t PyObject_GenericHash(PyObject *obj)

Generic hashing function that is meant to be put into a type
Expand Down
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,9 @@ New Features
similar to ``sep.join(iterable)`` in Python.
(Contributed by Victor Stinner in :gh:`121645`.)

* Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
(Contributed by Antoine Pitrou and Victor Stinner in :gh:`122854`.)


Porting to Python 3.14
----------------------
Expand Down
2 changes: 2 additions & 0 deletions Include/cpython/pyhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,5 @@ PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);

PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr);
PyAPI_FUNC(Py_hash_t) PyObject_GenericHash(PyObject *);

PyAPI_FUNC(Py_hash_t) Py_HashBuffer(const void *ptr, Py_ssize_t len);
3 changes: 0 additions & 3 deletions Include/internal/pycore_pyhash.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ _Py_HashPointerRaw(const void *ptr)
return (Py_hash_t)x;
}

// Export for '_datetime' shared extension
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);

/* Hash secret
*
* memory layout on 64 bit systems
Expand Down
10 changes: 10 additions & 0 deletions Lib/test/test_capi/test_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ def python_hash_pointer(x):
VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1)
self.assertEqual(hash_pointer(VOID_P_MAX), -2)

def test_hash_buffer(self):
hash_buffer = _testcapi.hash_buffer

def check(data):
self.assertEqual(hash_buffer(data), hash(data))

check(b'')
check(b'abc')
check(b'x' * 1024)


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add :c:func:`Py_HashBuffer` to compute and return the hash value of a buffer.
Patch by Antoine Pitrou and Victor Stinner.
2 changes: 1 addition & 1 deletion Modules/_datetimemodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -3842,7 +3842,7 @@ datetime_date_replace_impl(PyDateTime_Date *self, int year, int month,
static Py_hash_t
generic_hash(unsigned char *data, int len)
{
return _Py_HashBytes(data, len);
return Py_HashBuffer(data, len);
}


Expand Down
3 changes: 1 addition & 2 deletions Modules/_hashopenssl.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include <stdbool.h>
#include "Python.h"
#include "pycore_hashtable.h"
#include "pycore_pyhash.h" // _Py_HashBytes()
#include "pycore_strhex.h" // _Py_strhex()
#include "hashlib.h"

Expand Down Expand Up @@ -186,7 +185,7 @@ static const py_hashentry_t py_hashes[] = {

static Py_uhash_t
py_hashentry_t_hash_name(const void *key) {
return _Py_HashBytes(key, strlen((const char *)key));
return Py_HashBuffer(key, strlen((const char *)key));
}

static int
Expand Down
2 changes: 1 addition & 1 deletion Modules/_sre/sre.c
Original file line number Diff line number Diff line change
Expand Up @@ -2944,7 +2944,7 @@ pattern_hash(PatternObject *self)
return -1;
}

hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
hash2 = Py_HashBuffer(self->code, sizeof(self->code[0]) * self->codesize);
hash ^= hash2;

hash ^= self->flags;
Expand Down
29 changes: 25 additions & 4 deletions Modules/_testcapi/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args))
}


static PyObject *
long_from_hash(Py_hash_t hash)
{
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
return PyLong_FromLongLong(hash);
}


static PyObject *
hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
{
Expand All @@ -54,8 +62,21 @@ hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg)
}

Py_hash_t hash = Py_HashPointer(ptr);
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
return PyLong_FromLongLong(hash);
return long_from_hash(hash);
}


static PyObject *
hash_buffer(PyObject *Py_UNUSED(module), PyObject *args)
{
char *ptr;
Py_ssize_t len;
if (!PyArg_ParseTuple(args, "y#", &ptr, &len)) {
return NULL;
}

Py_hash_t hash = Py_HashBuffer(ptr, len);
return long_from_hash(hash);
}


Expand All @@ -64,14 +85,14 @@ object_generichash(PyObject *Py_UNUSED(module), PyObject *arg)
{
NULLABLE(arg);
Py_hash_t hash = PyObject_GenericHash(arg);
Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash));
return PyLong_FromLongLong(hash);
return long_from_hash(hash);
}


static PyMethodDef test_methods[] = {
{"hash_getfuncdef", hash_getfuncdef, METH_NOARGS},
{"hash_pointer", hash_pointer, METH_O},
{"hash_buffer", hash_buffer, METH_VARARGS},
{"object_generichash", object_generichash, METH_O},
{NULL},
};
Expand Down
3 changes: 1 addition & 2 deletions Modules/_xxtestfuzz/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#endif

#include <Python.h>
#include "pycore_pyhash.h" // _Py_HashBytes()
#include <stdlib.h>
#include <inttypes.h>

Expand Down Expand Up @@ -45,7 +44,7 @@ static int fuzz_builtin_int(const char* data, size_t size) {
/* Pick a random valid base. (When the fuzzed function takes extra
parameters, it's somewhat normal to hash the input to generate those
parameters. We want to exercise all code paths, so we do so here.) */
int base = _Py_HashBytes(data, size) % 37;
int base = Py_HashBuffer(data, size) % 37;
if (base == 1) {
// 1 is the only number between 0 and 36 that is not a valid base.
base = 0;
Expand Down
2 changes: 1 addition & 1 deletion Objects/bytesobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1598,7 +1598,7 @@ _Py_COMP_DIAG_PUSH
_Py_COMP_DIAG_IGNORE_DEPR_DECLS
if (a->ob_shash == -1) {
/* Can't fail */
a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
a->ob_shash = Py_HashBuffer(a->ob_sval, Py_SIZE(a));
}
return a->ob_shash;
_Py_COMP_DIAG_POP
Expand Down
4 changes: 2 additions & 2 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2561,12 +2561,12 @@ hash_const(const void *key)
if (PySlice_Check(op)) {
PySliceObject *s = (PySliceObject *)op;
PyObject *data[3] = { s->start, s->stop, s->step };
return _Py_HashBytes(&data, sizeof(data));
return Py_HashBuffer(&data, sizeof(data));
}
else if (PyTuple_CheckExact(op)) {
Py_ssize_t size = PyTuple_GET_SIZE(op);
PyObject **data = _PyTuple_ITEMS(op);
return _Py_HashBytes(data, sizeof(PyObject *) * size);
return Py_HashBuffer(data, sizeof(PyObject *) * size);
}
Py_hash_t h = PyObject_Hash(op);
if (h == -1) {
Expand Down
2 changes: 1 addition & 1 deletion Objects/memoryobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3087,7 +3087,7 @@ memory_hash(PyObject *_self)
}

/* Can't fail */
self->hash = _Py_HashBytes(mem, view->len);
self->hash = Py_HashBuffer(mem, view->len);

if (mem != view->buf)
PyMem_Free(mem);
Expand Down
2 changes: 1 addition & 1 deletion Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -11688,7 +11688,7 @@ unicode_hash(PyObject *self)
if (hash != -1) {
return hash;
}
x = _Py_HashBytes(PyUnicode_DATA(self),
x = Py_HashBuffer(PyUnicode_DATA(self),
PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));

FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x);
Expand Down
2 changes: 1 addition & 1 deletion Python/import.c
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep)
static Py_uhash_t
hashtable_hash_str(const void *key)
{
return _Py_HashBytes(key, strlen((const char *)key));
return Py_HashBuffer(key, strlen((const char *)key));
}

static int
Expand Down
15 changes: 9 additions & 6 deletions Python/pyhash.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ extern PyHash_FuncDef PyHash_Func;
static PyHash_FuncDef PyHash_Func;
#endif

/* Count _Py_HashBytes() calls */
/* Count Py_HashBuffer() calls */
#ifdef Py_HASH_STATS
#define Py_HASH_STATS_MAX 32
static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
Expand Down Expand Up @@ -146,9 +146,8 @@ PyObject_GenericHash(PyObject *obj)
}

Py_hash_t
_Py_HashBytes(const void *src, Py_ssize_t len)
Py_HashBuffer(const void *ptr, Py_ssize_t len)
{
Py_hash_t x;
/*
We make the hash of the empty string be 0, rather than using
(prefix ^ suffix), since this slightly obfuscates the hash secret
Expand All @@ -161,11 +160,12 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
#endif

Py_hash_t x;
#if Py_HASH_CUTOFF > 0
if (len < Py_HASH_CUTOFF) {
/* Optimize hashing of very small strings with inline DJBX33A. */
Py_uhash_t hash;
const unsigned char *p = src;
const unsigned char *p = ptr;
hash = 5381; /* DJBX33A starts with 5381 */

switch(len) {
Expand All @@ -186,10 +186,13 @@ _Py_HashBytes(const void *src, Py_ssize_t len)
}
else
#endif /* Py_HASH_CUTOFF */
x = PyHash_Func.hash(src, len);
{
x = PyHash_Func.hash(ptr, len);
}

if (x == -1)
if (x == -1) {
return -2;
}
return x;
}

Expand Down
0