From 832fb72b07c771abd8332462af0511def316901e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 28 Nov 2023 00:26:13 +0100 Subject: [PATCH 1/2] gh-111545: Benchmark PyHash_Double() --- Modules/_testinternalcapi.c | 219 ++++++++++++++++++++++++++++++++++++ bench.py | 7 ++ profile_task.py | 7 ++ run.sh | 7 ++ 4 files changed, 240 insertions(+) create mode 100644 bench.py create mode 100644 profile_task.py create mode 100755 run.sh diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 4607a3faf17f74..b0a280465dae9b 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1625,6 +1625,222 @@ get_type_module_name(PyObject *self, PyObject *type) } +Py_NO_INLINE static Py_hash_t +hash_api_A(PyObject *inst, double v) +{ + int e, sign; + double m; + Py_uhash_t x, y; + + if (!Py_IS_FINITE(v)) { + if (Py_IS_INFINITY(v)) + return v > 0 ? _PyHASH_INF : -_PyHASH_INF; + else + return _Py_HashPointer(inst); + } + + m = frexp(v, &e); + + sign = 1; + if (m < 0) { + sign = -1; + m = -m; + } + + /* process 28 bits at a time; this should work well both for binary + and hexadecimal floating point. */ + x = 0; + while (m) { + x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28); + m *= 268435456.0; /* 2**28 */ + e -= 28; + y = (Py_uhash_t)m; /* pull out integer part */ + m -= y; + x += y; + if (x >= _PyHASH_MODULUS) + x -= _PyHASH_MODULUS; + } + + /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */ + e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS); + x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e); + + x = x * sign; + if (x == (Py_uhash_t)-1) + x = (Py_uhash_t)-2; + return (Py_hash_t)x; +} + +Py_NO_INLINE static int +hash_api_B(double v, Py_hash_t *result) +{ + int e, sign; + double m; + Py_uhash_t x, y; + + if (!Py_IS_FINITE(v)) { + if (Py_IS_INFINITY(v)) { + *result = (v > 0 ? _PyHASH_INF : -_PyHASH_INF); + return 1; + } + else { + assert(Py_IS_NAN(v)); + *result = _PyHASH_NAN; + return 0; + } + } + + m = frexp(v, &e); + + sign = 1; + if (m < 0) { + sign = -1; + m = -m; + } + + /* process 28 bits at a time; this should work well both for binary + and hexadecimal floating point. */ + x = 0; + while (m) { + x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28); + m *= 268435456.0; /* 2**28 */ + e -= 28; + y = (Py_uhash_t)m; /* pull out integer part */ + m -= y; + x += y; + if (x >= _PyHASH_MODULUS) + x -= _PyHASH_MODULUS; + } + + /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */ + e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS); + x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e); + + x = x * sign; + if (x == (Py_uhash_t)-1) + x = (Py_uhash_t)-2; + *result = (Py_hash_t)x; + return 1; +} + +Py_NO_INLINE static Py_hash_t +hash_api_C(double v) +{ + int e, sign; + double m; + Py_uhash_t x, y; + + if (!Py_IS_FINITE(v)) { + if (Py_IS_INFINITY(v)) { + return (v > 0 ? _PyHASH_INF : -_PyHASH_INF); + } + else { + assert(Py_IS_NAN(v)); + return _PyHASH_NAN; + } + } + + m = frexp(v, &e); + + sign = 1; + if (m < 0) { + sign = -1; + m = -m; + } + + /* process 28 bits at a time; this should work well both for binary + and hexadecimal floating point. */ + x = 0; + while (m) { + x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28); + m *= 268435456.0; /* 2**28 */ + e -= 28; + y = (Py_uhash_t)m; /* pull out integer part */ + m -= y; + x += y; + if (x >= _PyHASH_MODULUS) + x -= _PyHASH_MODULUS; + } + + /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */ + e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS); + x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e); + + x = x * sign; + if (x == (Py_uhash_t)-1) + x = (Py_uhash_t)-2; + return (Py_hash_t)x; +} + + +static PyObject * +bench_api_A(PyObject *Py_UNUSED(module), PyObject *args) +{ + Py_ssize_t loops; + double d; + if (!PyArg_ParseTuple(args, "nd", &loops, &d)) { + return NULL; + } + PyObject *obj = Py_None; + + _PyTime_t t1 = _PyTime_GetPerfCounter(); + for (Py_ssize_t i=0; i < loops; i++) { + Py_hash_t hash = hash_api_A(obj, d); + if (hash == 0) { + return NULL; + } + } + _PyTime_t t2 = _PyTime_GetPerfCounter(); + + return PyFloat_FromDouble(_PyTime_AsSecondsDouble(t2 - t1)); +} + + +static PyObject * +bench_api_B(PyObject *Py_UNUSED(module), PyObject *args) +{ + Py_ssize_t loops; + double d; + if (!PyArg_ParseTuple(args, "nd", &loops, &d)) { + return NULL; + } + + _PyTime_t t1 = _PyTime_GetPerfCounter(); + for (Py_ssize_t i=0; i < loops; i++) { + Py_hash_t hash; + (void)hash_api_B(d, &hash); + if (hash == 0) { + return NULL; + } + } + _PyTime_t t2 = _PyTime_GetPerfCounter(); + + return PyFloat_FromDouble(_PyTime_AsSecondsDouble(t2 - t1)); +} + + +static PyObject * +bench_api_C(PyObject *Py_UNUSED(module), PyObject *args) +{ + Py_ssize_t loops; + double d; + if (!PyArg_ParseTuple(args, "nd", &loops, &d)) { + return NULL; + } + + _PyTime_t t1 = _PyTime_GetPerfCounter(); + for (Py_ssize_t i=0; i < loops; i++) { + Py_hash_t hash = hash_api_C(d); + if (hash == 0) { + return NULL; + } + } + _PyTime_t t2 = _PyTime_GetPerfCounter(); + + return PyFloat_FromDouble(_PyTime_AsSecondsDouble(t2 - t1)); +} + + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -1688,6 +1904,9 @@ static PyMethodDef module_functions[] = { {"restore_crossinterp_data", restore_crossinterp_data, METH_VARARGS}, _TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF {"get_type_module_name", get_type_module_name, METH_O}, + {"bench_api_A", bench_api_A, METH_VARARGS}, + {"bench_api_B", bench_api_B, METH_VARARGS}, + {"bench_api_C", bench_api_C, METH_VARARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/bench.py b/bench.py new file mode 100644 index 00000000000000..e8e591f2075422 --- /dev/null +++ b/bench.py @@ -0,0 +1,7 @@ +import pyperf +import _testinternalcapi +runner = pyperf.Runner() +d = 1.0 +runner.bench_time_func('bench_api_A', _testinternalcapi.bench_api_A, d) +runner.bench_time_func('bench_api_B', _testinternalcapi.bench_api_B, d) +runner.bench_time_func('bench_api_C', _testinternalcapi.bench_api_C, d) diff --git a/profile_task.py b/profile_task.py new file mode 100644 index 00000000000000..02639a5e47c061 --- /dev/null +++ b/profile_task.py @@ -0,0 +1,7 @@ +import _testinternalcapi +loops = 2**23 +d = 1.0 +for _ in range(10): + _testinternalcapi.bench_api_A(loops, d) + _testinternalcapi.bench_api_B(loops, d) + _testinternalcapi.bench_api_C(loops, d) diff --git a/run.sh b/run.sh new file mode 100755 index 00000000000000..677e0d9d655269 --- /dev/null +++ b/run.sh @@ -0,0 +1,7 @@ +set -e -x +git clean -fdx +./configure --enable-optimizations PROFILE_TASK="profile_task.py" +make +./python -m venv env +env/bin/python -m pip install pyperf +env/bin/python bench.py -o bench.json -v From b24683355467e41f5f79f814c9eea9fba932265c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 28 Nov 2023 00:48:10 +0100 Subject: [PATCH 2/2] Fix build: define _PyHASH_NAN --- Modules/_testinternalcapi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index b0a280465dae9b..31a4f0594e6f90 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1624,6 +1624,7 @@ get_type_module_name(PyObject *self, PyObject *type) return _PyType_GetModuleName((PyTypeObject *)type); } +#define _PyHASH_NAN 0 Py_NO_INLINE static Py_hash_t hash_api_A(PyObject *inst, double v)