From 832fb72b07c771abd8332462af0511def316901e Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 28 Nov 2023 00:26:13 +0100
Subject: [PATCH 1/2] gh-111545: Benchmark PyHash_Double()

---
 Modules/_testinternalcapi.c | 219 ++++++++++++++++++++++++++++++++++++
 bench.py                    |   7 ++
 profile_task.py             |   7 ++
 run.sh                      |   7 ++
 4 files changed, 240 insertions(+)
 create mode 100644 bench.py
 create mode 100644 profile_task.py
 create mode 100755 run.sh

diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 4607a3faf17f74..b0a280465dae9b 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -1625,6 +1625,222 @@ get_type_module_name(PyObject *self, PyObject *type)
 }
 
 
+Py_NO_INLINE static Py_hash_t
+hash_api_A(PyObject *inst, double v)
+{
+    int e, sign;
+    double m;
+    Py_uhash_t x, y;
+
+    if (!Py_IS_FINITE(v)) {
+        if (Py_IS_INFINITY(v))
+            return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
+        else
+            return _Py_HashPointer(inst);
+    }
+
+    m = frexp(v, &e);
+
+    sign = 1;
+    if (m < 0) {
+        sign = -1;
+        m = -m;
+    }
+
+    /* process 28 bits at a time;  this should work well both for binary
+       and hexadecimal floating point. */
+    x = 0;
+    while (m) {
+        x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
+        m *= 268435456.0;  /* 2**28 */
+        e -= 28;
+        y = (Py_uhash_t)m;  /* pull out integer part */
+        m -= y;
+        x += y;
+        if (x >= _PyHASH_MODULUS)
+            x -= _PyHASH_MODULUS;
+    }
+
+    /* adjust for the exponent;  first reduce it modulo _PyHASH_BITS */
+    e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
+    x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
+
+    x = x * sign;
+    if (x == (Py_uhash_t)-1)
+        x = (Py_uhash_t)-2;
+    return (Py_hash_t)x;
+}
+
+Py_NO_INLINE static int
+hash_api_B(double v, Py_hash_t *result)
+{
+    int e, sign;
+    double m;
+    Py_uhash_t x, y;
+
+    if (!Py_IS_FINITE(v)) {
+        if (Py_IS_INFINITY(v)) {
+            *result = (v > 0 ? _PyHASH_INF : -_PyHASH_INF);
+            return 1;
+        }
+        else {
+            assert(Py_IS_NAN(v));
+            *result = _PyHASH_NAN;
+            return 0;
+        }
+    }
+
+    m = frexp(v, &e);
+
+    sign = 1;
+    if (m < 0) {
+        sign = -1;
+        m = -m;
+    }
+
+    /* process 28 bits at a time;  this should work well both for binary
+       and hexadecimal floating point. */
+    x = 0;
+    while (m) {
+        x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
+        m *= 268435456.0;  /* 2**28 */
+        e -= 28;
+        y = (Py_uhash_t)m;  /* pull out integer part */
+        m -= y;
+        x += y;
+        if (x >= _PyHASH_MODULUS)
+            x -= _PyHASH_MODULUS;
+    }
+
+    /* adjust for the exponent;  first reduce it modulo _PyHASH_BITS */
+    e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
+    x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
+
+    x = x * sign;
+    if (x == (Py_uhash_t)-1)
+        x = (Py_uhash_t)-2;
+    *result = (Py_hash_t)x;
+    return 1;
+}
+
+Py_NO_INLINE static Py_hash_t
+hash_api_C(double v)
+{
+    int e, sign;
+    double m;
+    Py_uhash_t x, y;
+
+    if (!Py_IS_FINITE(v)) {
+        if (Py_IS_INFINITY(v)) {
+            return (v > 0 ? _PyHASH_INF : -_PyHASH_INF);
+        }
+        else {
+            assert(Py_IS_NAN(v));
+            return _PyHASH_NAN;
+        }
+    }
+
+    m = frexp(v, &e);
+
+    sign = 1;
+    if (m < 0) {
+        sign = -1;
+        m = -m;
+    }
+
+    /* process 28 bits at a time;  this should work well both for binary
+       and hexadecimal floating point. */
+    x = 0;
+    while (m) {
+        x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
+        m *= 268435456.0;  /* 2**28 */
+        e -= 28;
+        y = (Py_uhash_t)m;  /* pull out integer part */
+        m -= y;
+        x += y;
+        if (x >= _PyHASH_MODULUS)
+            x -= _PyHASH_MODULUS;
+    }
+
+    /* adjust for the exponent;  first reduce it modulo _PyHASH_BITS */
+    e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
+    x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
+
+    x = x * sign;
+    if (x == (Py_uhash_t)-1)
+        x = (Py_uhash_t)-2;
+    return (Py_hash_t)x;
+}
+
+
+static PyObject *
+bench_api_A(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    Py_ssize_t loops;
+    double d;
+    if (!PyArg_ParseTuple(args, "nd", &loops, &d)) {
+        return NULL;
+    }
+    PyObject *obj = Py_None;
+
+    _PyTime_t t1 = _PyTime_GetPerfCounter();
+    for (Py_ssize_t i=0; i < loops; i++) {
+        Py_hash_t hash = hash_api_A(obj, d);
+        if (hash == 0) {
+            return NULL;
+        }
+    }
+    _PyTime_t t2 = _PyTime_GetPerfCounter();
+
+    return PyFloat_FromDouble(_PyTime_AsSecondsDouble(t2 - t1));
+}
+
+
+static PyObject *
+bench_api_B(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    Py_ssize_t loops;
+    double d;
+    if (!PyArg_ParseTuple(args, "nd", &loops, &d)) {
+        return NULL;
+    }
+
+    _PyTime_t t1 = _PyTime_GetPerfCounter();
+    for (Py_ssize_t i=0; i < loops; i++) {
+        Py_hash_t hash;
+        (void)hash_api_B(d, &hash);
+        if (hash == 0) {
+            return NULL;
+        }
+    }
+    _PyTime_t t2 = _PyTime_GetPerfCounter();
+
+    return PyFloat_FromDouble(_PyTime_AsSecondsDouble(t2 - t1));
+}
+
+
+static PyObject *
+bench_api_C(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    Py_ssize_t loops;
+    double d;
+    if (!PyArg_ParseTuple(args, "nd", &loops, &d)) {
+        return NULL;
+    }
+
+    _PyTime_t t1 = _PyTime_GetPerfCounter();
+    for (Py_ssize_t i=0; i < loops; i++) {
+        Py_hash_t hash = hash_api_C(d);
+        if (hash == 0) {
+            return NULL;
+        }
+    }
+    _PyTime_t t2 = _PyTime_GetPerfCounter();
+
+    return PyFloat_FromDouble(_PyTime_AsSecondsDouble(t2 - t1));
+}
+
+
 static PyMethodDef module_functions[] = {
     {"get_configs", get_configs, METH_NOARGS},
     {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@@ -1688,6 +1904,9 @@ static PyMethodDef module_functions[] = {
     {"restore_crossinterp_data", restore_crossinterp_data,       METH_VARARGS},
     _TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF
     {"get_type_module_name",    get_type_module_name,            METH_O},
+    {"bench_api_A", bench_api_A, METH_VARARGS},
+    {"bench_api_B", bench_api_B, METH_VARARGS},
+    {"bench_api_C", bench_api_C, METH_VARARGS},
     {NULL, NULL} /* sentinel */
 };
 
diff --git a/bench.py b/bench.py
new file mode 100644
index 00000000000000..e8e591f2075422
--- /dev/null
+++ b/bench.py
@@ -0,0 +1,7 @@
+import pyperf
+import _testinternalcapi
+runner = pyperf.Runner()
+d = 1.0
+runner.bench_time_func('bench_api_A', _testinternalcapi.bench_api_A, d)
+runner.bench_time_func('bench_api_B', _testinternalcapi.bench_api_B, d)
+runner.bench_time_func('bench_api_C', _testinternalcapi.bench_api_C, d)
diff --git a/profile_task.py b/profile_task.py
new file mode 100644
index 00000000000000..02639a5e47c061
--- /dev/null
+++ b/profile_task.py
@@ -0,0 +1,7 @@
+import _testinternalcapi
+loops = 2**23
+d = 1.0
+for _ in range(10):
+    _testinternalcapi.bench_api_A(loops, d)
+    _testinternalcapi.bench_api_B(loops, d)
+    _testinternalcapi.bench_api_C(loops, d)
diff --git a/run.sh b/run.sh
new file mode 100755
index 00000000000000..677e0d9d655269
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,7 @@
+set -e -x
+git clean -fdx
+./configure --enable-optimizations PROFILE_TASK="profile_task.py"
+make
+./python -m venv env
+env/bin/python -m pip install pyperf
+env/bin/python bench.py -o bench.json -v

From b24683355467e41f5f79f814c9eea9fba932265c Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 28 Nov 2023 00:48:10 +0100
Subject: [PATCH 2/2] Fix build: define _PyHASH_NAN

---
 Modules/_testinternalcapi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index b0a280465dae9b..31a4f0594e6f90 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -1624,6 +1624,7 @@ get_type_module_name(PyObject *self, PyObject *type)
     return _PyType_GetModuleName((PyTypeObject *)type);
 }
 
+#define _PyHASH_NAN 0
 
 Py_NO_INLINE static Py_hash_t
 hash_api_A(PyObject *inst, double v)