8000 CI: update sanitizer CI to use python compiled with ASAN and TSAN by ngoldbaum · Pull Request #28273 · numpy/numpy · GitHub
[go: up one dir, main page]

Skip to content

CI: update sanitizer CI to use python compiled with ASAN and TSAN #28273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
CI: only run tests that might use threads for TSAN run
  • Loading branch information
ngoldbaum committed Feb 4, 2025
commit 59f815d7bc0764bedff1b9aa1f8bee0b90bb081e
9 changes: 4 additions & 5 deletions 9 .github/workflows/compiler_sanitizers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,8 @@ jobs:
python -m spin build -j2 -- -Db_sanitize=thread
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might also benefit from setting up ccache

- name: Test
run: |
# pass -s to pytest to see TSAN errors and warnings, otherwise pytest captures them
# These tests are slow, so only run tests in files that do "import threading" to make them count
TSAN_OPTIONS=allocator_may_return_null=1:halt_on_error=1 \
python -m spin test -- -v -s --timeout=600 --durations=10
- name: Setup tmate session
if: ${{ failure() }}
uses: mxschmitt/action-tmate@v3
python -m spin test \
`find numpy -name "test*.py" | xargs grep -l "import threading" | tr '\n' ' '` \
-- -v -s --timeout=600 --durations=10
26 changes: 0 additions & 26 deletions numpy/_core/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,32 +590,6 @@ def test_too_many_advanced_indices(self, index, num, original_ndim):
with pytest.raises(IndexError):
arr[(index,) * num] = 1.

@pytest.mark.skipif(IS_WASM, reason="no threading")
def test_structured_advanced_indexing(self):
# Test that copyswap(n) used by integer array indexing is threadsafe
# for structured datatypes, see gh-15387. This test can behave randomly.
from concurrent.futures import ThreadPoolExecutor

# Create a deeply nested dtype to make a failure more likely:
dt = np.dtype([("", "f8")])
dt = np.dtype([("", dt)] * 2)
dt = np.dtype([("", dt)] * 2)
# The array should be large enough to likely run into threading issues
arr = np.random.uniform(size=(6000, 8)).view(dt)[:, 0]

rng = np.random.default_rng()

def func(arr):
indx = rng.integers(0, len(arr), size=6000, dtype=np.intp)
arr[indx]

tpe = ThreadPoolExecutor(max_workers=8)
futures = [tpe.submit(func, arr) for _ in range(10)]
for f in futures:
f.result()

assert arr.dtype is dt

def test_nontuple_ndindex(self):
a = np.arange(25).reshape((5, 5))
assert_equal(a[[0, 1]], np.array([a[0], a[1]]))
Expand Down
89 changes: 89 additions & 0 deletions numpy/_core/tests/test_multithreading.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import concurrent.futures
import threading
import string

import numpy as np
import pytest
Expand Down Expand Up @@ -165,3 +167,90 @@ def closure(b):
x = np.repeat(x0, 2, axis=0)[::2]

run_threaded(closure, max_workers=10, pass_barrier=True)


def test_structured_advanced_indexing():
# Test that copyswap(n) used by integer array indexing is threadsafe
# for structured datatypes, see gh-15387. This test can behave randomly.

# Create a deeply nested dtype to make a failure more likely:
dt = np.dtype([("", "f8")])
dt = np.dtype([("", dt)] * 2)
dt = np.dtype([("", dt)] * 2)
# The array should be large enough to likely run into threading issues
arr = np.random.uniform(size=(6000, 8)).view(dt)[:, 0]

rng = np.random.default_rng()

def func(arr):
indx = rng.integers(0, len(arr), size=6000, dtype=np.intp)
arr[indx]

tpe = concurrent.futures.ThreadPoolExecutor(max_workers=8)
futures = [tpe.submit(func, arr) for _ in range(10)]
for f in futures:
f.result()

assert arr.dtype is dt


def test_structured_threadsafety2():
# Nonzero (and some other functions) should be threadsafe for
# structured datatypes, see gh-15387. This test can behave randomly.
from concurrent.futures import ThreadPoolExecutor

# Create a deeply nested dtype to make a failure more likely:
dt = np.dtype([("", "f8")])
dt = np.dtype([("", dt)])
dt = np.dtype([("", dt)] * 2)
# The array should be large enough to likely run into threading issues
arr = np.random.uniform(size=(5000, 4)).view(dt)[:, 0]

def func(arr):
arr.nonzero()

tpe = ThreadPoolExecutor(max_workers=8)
futures = [tpe.submit(func, arr) for _ in range(10)]
for f in futures:
f.result()

assert arr.dtype is dt


def test_stringdtype_multithreaded_access_and_mutation(
dtype, random_string_list):
# this test uses an RNG and may crash or cause deadlocks if there is a
# threading bug
rng = np.random.default_rng(0x4D3D3D3)

chars = list(string.ascii_letters + string.digits)
chars = np.array(chars, dtype="U1")
ret = rng.choice(chars, size=100 * 10, replace=True)
random_string_list = ret.view("U100")

def func(arr):
rnd = rng.random()
# either write to random locations in the array, compute a ufunc, or
# re-initialize the array
if rnd < 0.25:
num = np.random.randint(0, arr.size)
arr[num] = arr[num] + "hello"
elif rnd < 0.5:
if rnd < 0.375:
np.add(arr, arr)
else:
np.add(arr, arr, out=arr)
elif rnd < 0.75:
if rnd < 0.875:
np.multiply(arr, np.int64(2))
else:
np.multiply(arr, np.int64(2), out=arr)
else:
arr[:] = random_string_list

with concurrent.futures.ThreadPoolExecutor(max_workers=8) as tpe:
arr = np.array(random_string_list, dtype=dtype)
futures = [tpe.submit(func, arr) for _ in range(500)]

for f in futures:
f.result()
2 changes: 0 additions & 2 deletions numpy/_core/tests/test_nep50_promotions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
"""

import operator
import threading
import warnings

import numpy as np

Expand Down
23 changes: 0 additions & 23 deletions numpy/_core/tests/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -1956,29 +1956,6 @@ def __bool__(self):
a = np.array([[ThrowsAfter(15)]] * 10)
assert_raises(ValueError, np.nonzero, a)

@pytest.mark.skipif(IS_WASM, reason="wasm doesn't have threads")
def test_structured_threadsafety(self):
# Nonzero (and some other functions) should be threadsafe for
# structured datatypes, see gh-15387. This test can behave randomly.
from concurrent.futures import ThreadPoolExecutor

# Create a deeply nested dtype to make a failure more likely:
dt = np.dtype([("", "f8")])
dt = np.dtype([("", dt)])
dt = np.dtype([("", dt)] * 2)
# The array should be large enough to likely run into threading issues
arr = np.random.uniform(size=(5000, 4)).view(dt)[:, 0]

def func(arr):
arr.nonzero()

tpe = ThreadPoolExecutor(max_workers=8)
futures = [tpe.submit(func, arr) for _ in range(10)]
for f in futures:
f.result()

assert arr.dtype is dt


class TestIndex:
def test_boolean(self):
Expand Down
72 changes: 2 additions & 70 deletions numpy/_core/tests/test_stringdtype.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import concurrent.futures
import itertools
import os
import pickle
Expand All @@ -11,48 +10,15 @@

from numpy.dtypes import StringDType
from numpy._core.tests._natype import pd_NA
from numpy.testing import assert_array_equal, IS_WASM, IS_PYPY
from numpy.testing import assert_array_equal, IS_PYPY
from numpy.testing._private.utils import get_stringdtype_dtype as get_dtype


@pytest.fixture
def string_list():
return ["abc", "def", "ghi" * 10, "A¢☃€ 😊" * 100, "Abc" * 1000, "DEF"]


@pytest.fixture
def random_string_list():
chars = list(string.ascii_letters + string.digits)
chars = np.array(chars, dtype="U1")
ret = np.random.choice(chars, size=100 * 10, replace=True)
return ret.view("U100")


@pytest.fixture(params=[True, False])
def coerce(request):
return request.param


@pytest.fixture(
params=["unset", None, pd_NA, np.nan, float("nan"), "__nan__"],
ids=["unset", "None", "pandas.NA", "np.nan", "float('nan')", "string nan"],
)
def na_object(request):
return request.param


def get_dtype(na_object, coerce=True):
# explicit is check for pd_NA because != with pd_NA returns pd_NA
if na_object is pd_NA or na_object != "unset":
return StringDType(na_object=na_object, coerce=coerce)
else:
return StringDType(coerce=coerce)


@pytest.fixture()
def dtype(na_object, coerce):
return get_dtype(na_object, coerce)


# second copy for cast tests to do a cartesian product over dtypes
@pytest.fixture(params=[True, False])
def coerce2(request):
Expand Down Expand Up @@ -1208,40 +1174,6 @@ def test_growing_strings(dtype):
assert_array_equal(arr, uarr)


@pytest.mark.skipif(IS_WASM, reason="no threading support in wasm")
def test_threaded_access_and_mutation(dtype, random_string_list):
# this test uses an RNG and may crash or cause deadlocks if there is a
# threading bug
rng = np.random.default_rng(0x4D3D3D3)

def func(arr):
rnd = rng.random()
# either write to random locations in the array, compute a ufunc, or
# re-initialize the array
if rnd < 0.25:
num = np.random.randint(0, arr.size)
arr[num] = arr[num] + "hello"
elif rnd < 0.5:
if rnd < 0.375:
np.add(arr, arr)
else:
np.add(arr, arr, out=arr)
elif rnd < 0.75:
if rnd < 0.875:
np.multiply(arr, np.int64(2))
else:
np.multiply(arr, np.int64(2), out=arr)
else:
arr[:] = random_string_list

with concurrent.futures.ThreadPoolExecutor(max_workers=8) as tpe:
arr = np.array(random_string_list, dtype=dtype)
futures = [tpe.submit(func, arr) for _ in range(500)]

for f in futures:
f.result()


UFUNC_TEST_DATA = [
"hello" * 10,
"Ae¢☃€ 😊" * 20,
Expand Down
30 changes: 29 additions & 1 deletion numpy/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Pytest configuration and fixtures for the Numpy test suite.
"""
import os
import string
import sys
import tempfile
from contextlib import contextmanager
Expand All @@ -10,9 +11,11 @@
import hypothesis
import pytest
import numpy
import numpy as np

from numpy._core._multiarray_tests import get_fpu_mode
from numpy.testing._private.utils import NOGIL_BUILD
from numpy._core.tests._natype import pd_NA
from numpy.testing._private.utils import NOGIL_BUILD, get_stringdtype_dtype

try:
from scipy_doctest.conftest import dt_config
Expand Down Expand Up @@ -231,3 +234,28 @@ def warnings_errors_and_rng(test=None):
'numpy/f2py/_backends/_distutils.py',
]


@pytest.fixture
def random_string_list():
chars = list(string.ascii_letters + string.digits)
chars = np.array(chars, dtype="U1")
ret = np.random.choice(chars, size=100 * 10, replace=True)
return ret.view("U100")


@pytest.fixture(params=[True, False])
def coerce(request):
return request.param


@pytest.fixture(
params=["unset", None, pd_NA, np.nan, float("nan"), "__nan__"],
ids=["unset", "None", "pandas.NA", "np.nan", "float('nan')", "string nan"],
)
def na_object(request):
return request.param


@pytest.fixture()
def dtype(na_object, coerce):
return get_stringdtype_dtype(na_object, coerce)
10 changes: 9 additions & 1 deletion numpy/testing/_private/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
intp, float32, empty, arange, array_repr, ndarray, isnat, array)
from numpy import isfinite, isnan, isinf
import numpy.linalg._umath_linalg
from numpy._utils import _rename_parameter
from numpy._core.tests._natype import pd_NA

from io import StringIO

Expand Down Expand Up @@ -2706,3 +2706,11 @@ def run_threaded(func, max_workers=8, pass_count=False,
futures = [tpe.submit(func, *args) for _ in range(max_workers)]
for f in futures:
f.result()


def get_stringdtype_dtype(na_object, coerce=True):
# explicit is check for pd_NA because != with pd_NA returns pd_NA
if na_object is pd_NA or na_object != "unset":
return np.dtypes.StringDType(na_object=na_object, coerce=coerce)
else:
return np.dtypes.StringDType(coerce=coerce)
Loading
0