CI: only run tests that might use threads for TSAN run

numpy · ngoldbaum · Feb 5, 2025 · Feb 3, 2025 · Feb 4, 2025 · Feb 4, 2025
commit 59f815d7bc0764bedff1b9aa1f8bee0b90bb081e
diff --git a/.github/workflows/compiler_sanitizers.yml b/.github/workflows/compiler_sanitizers.yml
@@ -116,9 +116,8 @@ jobs:
         python -m spin build -j2 -- -Db_sanitize=thread
     - name: Test
       run: |
-        # pass -s to pytest to see TSAN errors and warnings, otherwise pytest captures them
+        # These tests are slow, so only run tests in files that do "import threading" to make them count
         TSAN_OPTIONS=allocator_may_return_null=1:halt_on_error=1 \
-        python -m spin test -- -v -s --timeout=600 --durations=10
-    - name: Setup tmate session
-      if: ${{ failure() }}
-      uses: mxschmitt/action-tmate@v3
+        python -m spin test \
+        `find numpy -name "test*.py" | xargs grep -l "import threading" | tr '\n' ' '` \
+        -- -v -s --timeout=600 --durations=10
diff --git a/numpy/_core/tests/test_indexing.py b/numpy/_core/tests/test_indexing.py
@@ -590,32 +590,6 @@ def test_too_many_advanced_indices(self, index, num, original_ndim):
         with pytest.raises(IndexError):
             arr[(index,) * num] = 1.
 
-    @pytest.mark.skipif(IS_WASM, reason="no threading")
-    def test_structured_advanced_indexing(self):
-        # Test that copyswap(n) used by integer array indexing is threadsafe
-        # for structured datatypes, see gh-15387. This test can behave randomly.
-        from concurrent.futures import ThreadPoolExecutor
-
-        # Create a deeply nested dtype to make a failure more likely:
-        dt = np.dtype([("", "f8")])
-        dt = np.dtype([("", dt)] * 2)
-        dt = np.dtype([("", dt)] * 2)
-        # The array should be large enough to likely run into threading issues
-        arr = np.random.uniform(size=(6000, 8)).view(dt)[:, 0]
-
-        rng = np.random.default_rng()
-
-        def func(arr):
-            indx = rng.integers(0, len(arr), size=6000, dtype=np.intp)
-            arr[indx]
-
-        tpe = ThreadPoolExecutor(max_workers=8)
-        futures = [tpe.submit(func, arr) for _ in range(10)]
-        for f in futures:
-            f.result()
-
-        assert arr.dtype is dt
-
     def test_nontuple_ndindex(self):
         a = np.arange(25).reshape((5, 5))
         assert_equal(a[[0, 1]], np.array([a[0], a[1]]))

diff --git a/numpy/_core/tests/test_multithreading.py b/numpy/_core/tests/test_multithreading.py
@@ -1,4 +1,6 @@
+import concurrent.futures
 import threading
+import string
 
 import numpy as np
 import pytest
@@ -165,3 +167,90 @@ def closure(b):
             x = np.repeat(x0, 2, axis=0)[::2]
 
     run_threaded(closure, max_workers=10, pass_barrier=True)
+
+
+def test_structured_advanced_indexing():
+    # Test that copyswap(n) used by integer array indexing is threadsafe
+    # for structured datatypes, see gh-15387. This test can behave randomly.
+
+    # Create a deeply nested dtype to make a failure more likely:
+    dt = np.dtype([("", "f8")])
+    dt = np.dtype([("", dt)] * 2)
+    dt = np.dtype([("", dt)] * 2)
+    # The array should be large enough to likely run into threading issues
+    arr = np.random.uniform(size=(6000, 8)).view(dt)[:, 0]
+
+    rng = np.random.default_rng()
+
+    def func(arr):
+        indx = rng.integers(0, len(arr), size=6000, dtype=np.intp)
+        arr[indx]
+
+    tpe = concurrent.futures.ThreadPoolExecutor(max_workers=8)
+    futures = [tpe.submit(func, arr) for _ in range(10)]
+    for f in futures:
+        f.result()
+
+    assert arr.dtype is dt
+
+
+def test_structured_threadsafety2():
+    # Nonzero (and some other functions) should be threadsafe for
+    # structured datatypes, see gh-15387. This test can behave randomly.
+    from concurrent.futures import ThreadPoolExecutor
+
+    # Create a deeply nested dtype to make a failure more likely:
+    dt = np.dtype([("", "f8")])
+    dt = np.dtype([("", dt)])
+    dt = np.dtype([("", dt)] * 2)
+    # The array should be large enough to likely run into threading issues
+    arr = np.random.uniform(size=(5000, 4)).view(dt)[:, 0]
+
+    def func(arr):
+        arr.nonzero()
+
+    tpe = ThreadPoolExecutor(max_workers=8)
+    futures = [tpe.submit(func, arr) for _ in range(10)]
+    for f in futures:
+        f.result()
+
+    assert arr.dtype is dt
+
+
+def test_stringdtype_multithreaded_access_and_mutation(
+        dtype, random_string_list):
+    # this test uses an RNG and may crash or cause deadlocks if there is a
+    # threading bug
+    rng = np.random.default_rng(0x4D3D3D3)
+
+    chars = list(string.ascii_letters + string.digits)
+    chars = np.array(chars, dtype="U1")
+    ret = rng.choice(chars, size=100 * 10, replace=True)
+    random_string_list = ret.view("U100")
+
+    def func(arr):
+        rnd = rng.random()
+        # either write to random locations in the array, compute a ufunc, or
+        # re-initialize the array
+        if rnd < 0.25:
+            num = np.random.randint(0, arr.size)
+            arr[num] = arr[num] + "hello"
+        elif rnd < 0.5:
+            if rnd < 0.375:
+                np.add(arr, arr)
+            else:
+                np.add(arr, arr, out=arr)
+        elif rnd < 0.75:
+            if rnd < 0.875:
+                np.multiply(arr, np.int64(2))
+            else:
+                np.multiply(arr, np.int64(2), out=arr)
+        else:
+            arr[:] = random_string_list
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=8) as tpe:
+        arr = np.array(random_string_list, dtype=dtype)
+        futures = [tpe.submit(func, arr) for _ in range(500)]
+
+        for f in futures:
+            f.result()
diff --git a/numpy/_core/tests/test_nep50_promotions.py b/numpy/_core/tests/test_nep50_promotions.py
@@ -5,8 +5,6 @@
 """
 
 import operator
-import threading
-import warnings
 
 import numpy as np
 

diff --git a/numpy/_core/tests/test_numeric.py b/numpy/_core/tests/test_numeric.py
@@ -1956,29 +1956,6 @@ def __bool__(self):
         a = np.array([[ThrowsAfter(15)]] * 10)
         assert_raises(ValueError, np.nonzero, a)
 
-    @pytest.mark.skipif(IS_WASM, reason="wasm doesn't have threads")
-    def test_structured_threadsafety(self):
-        # Nonzero (and some other functions) should be threadsafe for
-        # structured datatypes, see gh-15387. This test can behave randomly.
-        from concurrent.futures import ThreadPoolExecutor
-
-        # Create a deeply nested dtype to make a failure more likely:
-        dt = np.dtype([("", "f8")])
-        dt = np.dtype([("", dt)])
-        dt = np.dtype([("", dt)] * 2)
-        # The array should be large enough to likely run into threading issues
-        arr = np.random.uniform(size=(5000, 4)).view(dt)[:, 0]
-
-        def func(arr):
-            arr.nonzero()
-
-        tpe = ThreadPoolExecutor(max_workers=8)
-        futures = [tpe.submit(func, arr) for _ in range(10)]
-        for f in futures:
-            f.result()
-
-        assert arr.dtype is dt
-
 
 class TestIndex:
     def test_boolean(self):

diff --git a/numpy/_core/tests/test_stringdtype.py b/numpy/_core/tests/test_stringdtype.py
@@ -1,4 +1,3 @@
-import concurrent.futures
 import itertools
 import os
 import pickle
@@ -11,48 +10,15 @@
 
 from numpy.dtypes import StringDType
 from numpy._core.tests._natype import pd_NA
-from numpy.testing import assert_array_equal, IS_WASM, IS_PYPY
+from numpy.testing import assert_array_equal, IS_PYPY
+from numpy.testing._private.utils import get_stringdtype_dtype as get_dtype
 
 
 @pytest.fixture
 def string_list():
     return ["abc", "def", "ghi" * 10, "A¢☃€ 😊" * 100, "Abc" * 1000, "DEF"]
 
 
-@pytest.fixture
-def random_string_list():
-    chars = list(string.ascii_letters + string.digits)
-    chars = np.array(chars, dtype="U1")
-    ret = np.random.choice(chars, size=100 * 10, replace=True)
-    return ret.view("U100")
-
-
-@pytest.fixture(params=[True, False])
-def coerce(request):
-    return request.param
-
-
-@pytest.fixture(
-    params=["unset", None, pd_NA, np.nan, float("nan"), "__nan__"],
-    ids=["unset", "None", "pandas.NA", "np.nan", "float('nan')", "string nan"],
-)
-def na_object(request):
-    return request.param
-
-
-def get_dtype(na_object, coerce=True):
-    # explicit is check for pd_NA because != with pd_NA returns pd_NA
-    if na_object is pd_NA or na_object != "unset":
-        return StringDType(na_object=na_object, coerce=coerce)
-    else:
-        return StringDType(coerce=coerce)
-
-
-@pytest.fixture()
-def dtype(na_object, coerce):
-    return get_dtype(na_object, coerce)
-
-
 # second copy for cast tests to do a cartesian product over dtypes
 @pytest.fixture(params=[True, False])
 def coerce2(request):
@@ -1208,40 +1174,6 @@ def test_growing_strings(dtype):
     assert_array_equal(arr, uarr)
 
 
-@pytest.mark.skipif(IS_WASM, reason="no threading support in wasm")
-def test_threaded_access_and_mutation(dtype, random_string_list):
-    # this test uses an RNG and may crash or cause deadlocks if there is a
-    # threading bug
-    rng = np.random.default_rng(0x4D3D3D3)
-
-    def func(arr):
-        rnd = rng.random()
-        # either write to random locations in the array, compute a ufunc, or
-        # re-initialize the array
-        if rnd < 0.25:
-            num = np.random.randint(0, arr.size)
-            arr[num] = arr[num] + "hello"
-        elif rnd < 0.5:
-            if rnd < 0.375:
-                np.add(arr, arr)
-            else:
-                np.add(arr, arr, out=arr)
-        elif rnd < 0.75:
-            if rnd < 0.875:
-                np.multiply(arr, np.int64(2))
-            else:
-                np.multiply(arr, np.int64(2), out=arr)
-        else:
-            arr[:] = random_string_list
-
-    with concurrent.futures.ThreadPoolExecutor(max_workers=8) as tpe:
-        arr = np.array(random_string_list, dtype=dtype)
-        futures = [tpe.submit(func, arr) for _ in range(500)]
-
-        for f in futures:
-            f.result()
-
-
 UFUNC_TEST_DATA = [
     "hello" * 10,
     "Ae¢☃€ 😊" * 20,

diff --git a/numpy/conftest.py b/numpy/conftest.py
@@ -2,6 +2,7 @@
 Pytest configuration and fixtures for the Numpy test suite.
 """
 import os
+import string
 import sys
 import tempfile
 from contextlib import contextmanager
@@ -10,9 +11,11 @@
 import hypothesis
 import pytest
 import numpy
+import numpy as np
 
 from numpy._core._multiarray_tests import get_fpu_mode
-from numpy.testing._private.utils import NOGIL_BUILD
+from numpy._core.tests._natype import pd_NA
+from numpy.testing._private.utils import NOGIL_BUILD, get_stringdtype_dtype
 
 try:
     from scipy_doctest.conftest import dt_config
@@ -231,3 +234,28 @@ def warnings_errors_and_rng(test=None):
         'numpy/f2py/_backends/_distutils.py',
     ]
 
+
+@pytest.fixture
+def random_string_list():
+    chars = list(string.ascii_letters + string.digits)
+    chars = np.array(chars, dtype="U1")
+    ret = np.random.choice(chars, size=100 * 10, replace=True)
+    return ret.view("U100")
+
+
+@pytest.fixture(params=[True, False])
+def coerce(request):
+    return request.param
+
+
+@pytest.fixture(
+    params=["unset", None, pd_NA, np.nan, float("nan"), "__nan__"],
+    ids=["unset", "None", "pandas.NA", "np.nan", "float('nan')", "string nan"],
+)
+def na_object(request):
+    return request.param
+
+
+@pytest.fixture()
+def dtype(na_object, coerce):
+    return get_stringdtype_dtype(na_object, coerce)
diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py
@@ -25,7 +25,7 @@
      intp, float32, empty, arange, array_repr, ndarray, isnat, array)
 from numpy import isfinite, isnan, isinf
 import numpy.linalg._umath_linalg
-from numpy._utils import _rename_parameter
+from numpy._core.tests._natype import pd_NA
 
 from io import StringIO
 
@@ -2706,3 +2706,11 @@ def run_threaded(func, max_workers=8, pass_count=False,
                 futures = [tpe.submit(func, *args) for _ in range(max_workers)]
             for f in futures:
                 f.result()
+
+
+def get_stringdtype_dtype(na_object, coerce=True):
+    # explicit is check for pd_NA because != with pd_NA returns pd_NA
+    if na_object is pd_NA or na_object != "unset":
+        return np.dtypes.StringDType(na_object=na_object, coerce=coerce)
+    else:
+        return np.dtypes.StringDType(coerce=coerce)