From a3215e1ec2c9250996b9d435c192946b6a2994dc Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Wed, 15 Jan 2025 10:38:55 -0500 Subject: [PATCH 01/10] gh-128657: fix _hashopenssl ref/data race --- Modules/_hashopenssl.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 082929be3c77b7..755f5dff9a5f9e 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -26,6 +26,7 @@ #include "Python.h" #include "pycore_hashtable.h" #include "pycore_strhex.h" // _Py_strhex() +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_RELAXED #include "hashlib.h" /* EVP is the preferred interface to hashing in OpenSSL */ @@ -369,6 +370,7 @@ static PY_EVP_MD* py_digest_by_name(PyObject *module, const char *name, enum Py_hash_type py_ht) { PY_EVP_MD *digest = NULL; + PY_EVP_MD *other_digest = NULL; _hashlibstate *state = get_hashlib_state(module); py_hashentry_t *entry = (py_hashentry_t *)_Py_hashtable_get( state->hashtable, (const void*)name @@ -379,20 +381,28 @@ py_digest_by_name(PyObject *module, const char *name, enum Py_hash_type py_ht) case Py_ht_evp: case Py_ht_mac: case Py_ht_pbkdf2: - if (entry->evp == NULL) { - entry->evp = PY_EVP_MD_fetch(entry->ossl_name, NULL); + digest = FT_ATOMIC_LOAD_PTR_RELAXED(entry->evp); + if (digest == NULL) { + digest = PY_EVP_MD_fetch(entry->ossl_name, NULL); + // exchange just in case another thread did same thing at same time + other_digest = _Py_atomic_exchange_ptr(&entry->evp, digest); } - digest = entry->evp; break; case Py_ht_evp_nosecurity: - if (entry->evp_nosecurity == NULL) { - entry->evp_nosecurity = PY_EVP_MD_fetch(entry->ossl_name, "-fips"); + digest = FT_ATOMIC_LOAD_PTR_RELAXED(entry->evp_nosecurity); + if (digest == NULL) { + digest = PY_EVP_MD_fetch(entry->ossl_name, "-fips"); + // exchange just in case another thread did same thing at same time + other_digest = _Py_atomic_exchange_ptr(&entry->evp_nosecurity, digest); } - digest = entry->evp_nosecurity; break; } + // if another thread same thing at same time make sure we got same ptr + assert(other_digest == NULL || other_digest == digest); if (digest != NULL) { - PY_EVP_MD_up_ref(digest); + if (other_digest == NULL) { + PY_EVP_MD_up_ref(digest); + } } } else { // Fall back for looking up an unindexed OpenSSL specific name. From b91ba0b7549dd2c7054dfcf358d4a4cc0aa26341 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 15 Jan 2025 15:45:22 +0000 Subject: [PATCH 02/10] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst diff --git a/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst new file mode 100644 index 00000000000000..9c90a5c306179b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst @@ -0,0 +1 @@ +Fix a possible data and PY_EVP_MD refcount race in `_hashopenssl:py_digest_by_name()` under free-threading. From 519388df61557912108ec730fb9923d59fc205f8 Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Wed, 15 Jan 2025 10:52:20 -0500 Subject: [PATCH 03/10] Update 2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst --- .../next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst index 9c90a5c306179b..f9ef51598cbf1f 100644 --- a/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst +++ b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst @@ -1 +1 @@ -Fix a possible data and PY_EVP_MD refcount race in `_hashopenssl:py_digest_by_name()` under free-threading. +Fix a possible data and PY_EVP_MD refcount race in _hashopenssl.c `py_digest_by_name()` under free-threading. From 2767d89faad3191ea2fd5862976efcde628ffdce Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Wed, 15 Jan 2025 10:53:43 -0500 Subject: [PATCH 04/10] Update 2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst --- .../next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst index f9ef51598cbf1f..ce3166ae2f356b 100644 --- a/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst +++ b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst @@ -1 +1 @@ -Fix a possible data and PY_EVP_MD refcount race in _hashopenssl.c `py_digest_by_name()` under free-threading. +Fix a possible data and PY_EVP_MD refcount race in _hashopenssl.c py_digest_by_name() under free-threading. From 4563481955e8f48bd967bd35e4329099e36ceded Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Thu, 16 Jan 2025 08:04:55 -0500 Subject: [PATCH 05/10] ifdef Py_GIL_DISABLED, change news --- .../2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst | 2 +- Modules/_hashopenssl.c | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst index ce3166ae2f356b..2bb2d23d12ea9b 100644 --- a/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst +++ b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst @@ -1 +1 @@ -Fix a possible data and PY_EVP_MD refcount race in _hashopenssl.c py_digest_by_name() under free-threading. +Fix possible extra reference when using objects returned by :func:`hashlib.sha256` under :term:`free threading`. \ No newline at end of file diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 755f5dff9a5f9e..c64d24748b6c5a 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -384,16 +384,24 @@ py_digest_by_name(PyObject *module, const char *name, enum Py_hash_type py_ht) digest = FT_ATOMIC_LOAD_PTR_RELAXED(entry->evp); if (digest == NULL) { digest = PY_EVP_MD_fetch(entry->ossl_name, NULL); +#ifdef Py_GIL_DISABLED // exchange just in case another thread did same thing at same time other_digest = _Py_atomic_exchange_ptr(&entry->evp, digest); +#else + entry->evp = digest; +#endif } break; case Py_ht_evp_nosecurity: digest = FT_ATOMIC_LOAD_PTR_RELAXED(entry->evp_nosecurity); if (digest == NULL) { digest = PY_EVP_MD_fetch(entry->ossl_name, "-fips"); +#ifdef Py_GIL_DISABLED // exchange just in case another thread did same thing at same time other_digest = _Py_atomic_exchange_ptr(&entry->evp_nosecurity, digest); +#else + entry->evp_nosecurity = digest; +#endif } break; } From ccc44dfb09120010eb5a675fd52d48e584d67fbf Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Thu, 16 Jan 2025 08:23:06 -0500 Subject: [PATCH 06/10] add newline to news entry --- .../next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst index 2bb2d23d12ea9b..3b08a9fba59620 100644 --- a/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst +++ b/Misc/NEWS.d/next/Library/2025-01-15-15-45-21.gh-issue-128657.P5LNQA.rst @@ -1 +1 @@ -Fix possible extra reference when using objects returned by :func:`hashlib.sha256` under :term:`free threading`. \ No newline at end of file +Fix possible extra reference when using objects returned by :func:`hashlib.sha256` under :term:`free threading`. From 01f498eb928532e0abae59790ee3311f009339be Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Thu, 16 Jan 2025 08:26:01 -0500 Subject: [PATCH 07/10] nit picked --- Modules/_hashopenssl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index c64d24748b6c5a..d7586feea3efcd 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -25,14 +25,14 @@ #include #include "Python.h" #include "pycore_hashtable.h" -#include "pycore_strhex.h" // _Py_strhex() +#include "pycore_strhex.h" // _Py_strhex() #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_RELAXED #include "hashlib.h" /* EVP is the preferred interface to hashing in OpenSSL */ #include #include -#include // FIPS_mode() +#include // FIPS_mode() /* We use the object interface to discover what hashes OpenSSL supports. */ #include #include From 35fd4ba04bc8c0d2d69bdbf32eedf5bfb3c5b402 Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Thu, 16 Jan 2025 12:03:17 -0500 Subject: [PATCH 08/10] added test and test_hashlib to list of tsan tests --- Lib/test/libregrtest/tsan.py | 1 + Lib/test/test_hashlib.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/Lib/test/libregrtest/tsan.py b/Lib/test/libregrtest/tsan.py index 00d5779d950e72..38915ebc12b345 100644 --- a/Lib/test/libregrtest/tsan.py +++ b/Lib/test/libregrtest/tsan.py @@ -8,6 +8,7 @@ 'test_code', 'test_enum', 'test_functools', + 'test_hashlib', 'test_httpservers', 'test_imaplib', 'test_importlib', diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 575b2cd0da7056..ed59b5220ece86 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -1196,6 +1196,29 @@ def test_file_digest(self): with open(os_helper.TESTFN, "wb") as f: hashlib.file_digest(f, "sha256") + @unittest.skipUnless(support.check_sanitizer(thread=True), "only meaningful on free-threading") + def test_gh_128657(self): + def test_hashlib_sha256(): + hash_obj = hashlib.sha256() + + def closure(barrier): + barrier.wait() + test_hashlib_sha256() + + num_workers = 40 + num_runs = 20 + + for i in range(num_runs): + barrier = threading.Barrier(num_workers) + thrds = [] + + for i in range(num_workers): + thrds.append(thrd := threading.Thread(target=closure, args=(barrier,))) + thrd.start() + + for thrd in thrds: + thrd.join() + if __name__ == "__main__": unittest.main() From c7c497cb1c13c1b9ed6fc70cc5fcdac2eb27e10b Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Sat, 18 Jan 2025 13:28:37 -0500 Subject: [PATCH 09/10] requested changes --- Lib/test/test_hashlib.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index ed59b5220ece86..96307830003400 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -1196,8 +1196,13 @@ def test_file_digest(self): with open(os_helper.TESTFN, "wb") as f: hashlib.file_digest(f, "sha256") - @unittest.skipUnless(support.check_sanitizer(thread=True), "only meaningful on free-threading") - def test_gh_128657(self): + # There was a data race in py_digest_by_name() which caused tsan to + # complain and might sometimes cause an extra refcount in `PY_EVP_MD` + # structure if two threads attempted to create a `sha256()` at the same + # time. See gh-128657. + @unittest.skipUnless(support.Py_GIL_DISABLED, + "only meaningful on free-threading") + def test_py_digest_by_name_data_race(self): def test_hashlib_sha256(): hash_obj = hashlib.sha256() @@ -1206,18 +1211,17 @@ def closure(barrier): test_hashlib_sha256() num_workers = 40 - num_runs = 20 + barrier = threading.Barrier(num_workers) - for i in range(num_runs): - barrier = threading.Barrier(num_workers) - thrds = [] + with threading_helper.catch_threading_exception() as cm: + threads = [threading.Thread(target=closure, args=(barrier,)) + for _ in range(num_workers)] - for i in range(num_workers): - thrds.append(thrd := threading.Thread(target=closure, args=(barrier,))) - thrd.start() + with threading_helper.start_threads(threads): + pass - for thrd in thrds: - thrd.join() + if cm.exc_value: + raise cm.exc_value if __name__ == "__main__": From 1bd904fec250c6cca2da0e91b47e03a5b4aeb27a Mon Sep 17 00:00:00 2001 From: Tomasz Pytel Date: Fri, 7 Feb 2025 17:35:16 -0500 Subject: [PATCH 10/10] revert tests --- Lib/test/libregrtest/tsan.py | 1 - Lib/test/test_hashlib.py | 27 --------------------------- 2 files changed, 28 deletions(-) diff --git a/Lib/test/libregrtest/tsan.py b/Lib/test/libregrtest/tsan.py index 38915ebc12b345..00d5779d950e72 100644 --- a/Lib/test/libregrtest/tsan.py +++ b/Lib/test/libregrtest/tsan.py @@ -8,7 +8,6 @@ 'test_code', 'test_enum', 'test_functools', - 'test_hashlib', 'test_httpservers', 'test_imaplib', 'test_importlib', diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 96307830003400..575b2cd0da7056 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -1196,33 +1196,6 @@ def test_file_digest(self): with open(os_helper.TESTFN, "wb") as f: hashlib.file_digest(f, "sha256") - # There was a data race in py_digest_by_name() which caused tsan to - # complain and might sometimes cause an extra refcount in `PY_EVP_MD` - # structure if two threads attempted to create a `sha256()` at the same - # time. See gh-128657. - @unittest.skipUnless(support.Py_GIL_DISABLED, - "only meaningful on free-threading") - def test_py_digest_by_name_data_race(self): - def test_hashlib_sha256(): - hash_obj = hashlib.sha256() - - def closure(barrier): - barrier.wait() - test_hashlib_sha256() - - num_workers = 40 - barrier = threading.Barrier(num_workers) - - with threading_helper.catch_threading_exception() as cm: - threads = [threading.Thread(target=closure, args=(barrier,)) - for _ in range(num_workers)] - - with threading_helper.start_threads(threads): - pass - - if cm.exc_value: - raise cm.exc_value - if __name__ == "__main__": unittest.main()