From 2f4ace73ee323932f2e5b9e32da696b47e7bda51 Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Sun, 15 Jun 2025 15:21:03 +0300 Subject: [PATCH 01/26] BUG: fix linting (#29210) --- numpy/_core/tests/test_multiarray.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py index f66109a3d8b5..b164f1dada3b 100644 --- a/numpy/_core/tests/test_multiarray.py +++ b/numpy/_core/tests/test_multiarray.py @@ -7333,7 +7333,7 @@ def apply_mode(m, mode): (m.shape[0] * 2, m.shape[1] * 2), dtype=m.dtype, order=order )[::2, ::2] retval[...] = m - return retval + return retval is_complex = np.issubdtype(dtype, np.complexfloating) m1 = self.m1.astype(dtype) + (1j if is_complex else 0) @@ -7341,7 +7341,7 @@ def apply_mode(m, mode): dot_res = np.dot(m1, m2) mo = np.zeros_like(dot_res) - for mode in itertools.product(*[modes]*3): + for mode in itertools.product(*[modes] * 3): m1_, m2_, mo_ = [apply_mode(*x) for x in zip([m1, m2, mo], mode)] assert_equal(np.matmul(m1_, m2_, out=mo_), dot_res) From aea8869549c110c46afbe75ed2e162b702d3d106 Mon Sep 17 00:00:00 2001 From: Mugundanmcw Date: Tue, 17 Jun 2025 16:09:16 +0530 Subject: [PATCH 02/26] CI: Add WoA validation setup to windows.yml --- .github/workflows/windows.yml | 74 +++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index e760e37780a7..18d02081fd67 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -85,6 +85,80 @@ jobs: run: | spin test -- --timeout=600 --durations=10 + python64bit_openblas_winarm64: + name: arm64, LPARM64 OpenBLAS + runs-on: windows-11-arm + # To enable this job on a fork, comment out: + if: github.repository == 'numpy/numpy' + strategy: + fail-fast: false + matrix: + compiler-pyversion: + - ["MSVC", "3.11"] + - ["Clang-cl", "3.14t-dev"] + + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: recursive + fetch-tags: true + persist-credentials: false + + - name: Setup Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + with: + python-version: ${{ matrix.compiler-pyversion[1] }} + architecture: arm64 + + - name: Setup MSVC + if: matrix.compiler-pyversion[0] == 'MSVC' + uses: bus1/cabuild/action/msdevshell@e22aba57d6e74891d059d66501b6b5aed8123c4d # v1 + with: + architecture: arm64 + + - name: Install build dependencies from PyPI + run: | + pip install -r requirements/build_requirements.txt + + - name: Install pkg-config + run: | + choco install -y --stoponfirstfailure --checksum 6004DF17818F5A6DBF19CB335CC92702 pkgconfiglite + echo "PKG_CONFIG_PATH=${{ github.workspace }}/.openblas" >> $env:GITHUB_ENV + + - name: Install Clang-cl + if: matrix.compiler-pyversion[0] == 'Clang-cl' + run: | + uses: ./.github/windows_arm64_steps + + - name: Install NumPy (MSVC) + if: matrix.compiler-pyversion[0] == 'MSVC' + run: | + pip install -r requirements/ci_requirements.txt + spin build --with-scipy-openblas=32 -j2 -- --vsenv + + - name: Install NumPy (Clang-cl) + if: matrix.compiler-pyversion[0] == 'Clang-cl' + run: | + "[binaries]","c = 'clang-cl'","cpp = 'clang-cl'","ar = 'llvm-lib'","c_ld = 'lld-link'","cpp_ld = 'lld-link'" | Out-File $PWD/clang-cl-arm64.ini -Encoding ascii + pip install -r requirements/ci_requirements.txt + spin build --with-scipy-openblas=32 -j2 -- --vsenv --native-file=$PWD/clang-cl-arm64.ini + + - name: Meson Log + shell: bash + if: ${{ failure() }} + run: | + cat build/meson-logs/meson-log.txt + + - name: Install test dependencies + run: | + python -m pip install -r requirements/test_requirements.txt + python -m pip install threadpoolctl + + - name: Run test suite + run: | + spin test -- --timeout=600 --durations=10 + msvc_python_no_openblas: name: MSVC, ${{ matrix.architecture }} Python , no BLAS runs-on: ${{ matrix.os }} From d8c4f2ba8604fbb0e7f22a5df8ea3a355a53385e Mon Sep 17 00:00:00 2001 From: Mugundanmcw Date: Tue, 17 Jun 2025 16:12:41 +0530 Subject: [PATCH 03/26] CI: Create action.yml for LLVM Win-ARM64 as reusable blocks --- .github/windows_arm64_steps /action.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .github/windows_arm64_steps /action.yml diff --git a/.github/windows_arm64_steps /action.yml b/.github/windows_arm64_steps /action.yml new file mode 100644 index 000000000000..86517b6246e7 --- /dev/null +++ b/.github/windows_arm64_steps /action.yml @@ -0,0 +1,16 @@ +name: Build Dependencies(Win-ARM64) +description: "Setup LLVM for Win-ARM64 builds" + +runs: + using: "composite" + steps: + - name: Install LLVM + shell: pwsh + run: | + Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-20.1.6/LLVM-20.1.6-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe + Start-Process -FilePath ".\LLVM-woa64.exe" -ArgumentList "/S" -Wait + echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append + echo "CC=clang-cl" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + echo "CXX=clang-cl" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + echo "FC=flang-new" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + From f02bb58cb634286739a1c5eb9dc35bc0e086efa7 Mon Sep 17 00:00:00 2001 From: Mugundanmcw Date: Tue, 17 Jun 2025 16:16:56 +0530 Subject: [PATCH 04/26] CI: Modify wheel.yml to use clang-cl for Win-ARM64 --- .github/workflows/wheels.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 68352eb1fc7c..ab6bbb618899 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -127,11 +127,9 @@ jobs: with: architecture: 'x86' - - name: Setup MSVC arm64 + - name: Setup LLVM for Windows ARM64 if: ${{ matrix.buildplat[1] == 'win_arm64' }} - uses: bus1/cabuild/action/msdevshell@e22aba57d6e74891d059d66501b6b5aed8123c4d # v1 - with: - architecture: 'arm64' + uses: ./.github/windows_arm64_steps - name: pkg-config-for-win run: | From fb0dff6061addfa3b7d2cba9ecc4dfedf5dad954 Mon Sep 17 00:00:00 2001 From: Mugundanmcw Date: Tue, 17 Jun 2025 16:32:03 +0530 Subject: [PATCH 05/26] CI: fix action.yml naming --- .github/{windows_arm64_steps => windows_arm64_steps}/action.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{windows_arm64_steps => windows_arm64_steps}/action.yml (100%) diff --git a/.github/windows_arm64_steps /action.yml b/.github/windows_arm64_steps/action.yml similarity index 100% rename from .github/windows_arm64_steps /action.yml rename to .github/windows_arm64_steps/action.yml From f36c0daa423244ca6887ab426e3c6149a13a2667 Mon Sep 17 00:00:00 2001 From: Mugundanmcw Date: Tue, 17 Jun 2025 16:40:39 +0530 Subject: [PATCH 06/26] CI: Fix reusable LLVM block --- .github/workflows/windows.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 18d02081fd67..e723b787a5de 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -128,8 +128,7 @@ jobs: - name: Install Clang-cl if: matrix.compiler-pyversion[0] == 'Clang-cl' - run: | - uses: ./.github/windows_arm64_steps + uses: ./.github/windows_arm64_steps - name: Install NumPy (MSVC) if: matrix.compiler-pyversion[0] == 'MSVC' From 03d57303675386998ad005d28c7c3ca7177694d1 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Tue, 17 Jun 2025 17:49:15 +0200 Subject: [PATCH 07/26] MAINT: Fix some undef warnings (#29216) As noted by Chuck in gh-29138, there are some undef warnings that seem not nice this should fix them. The fact that `NPY_LONG`, etc. are not defined at macro expansion time is a bit of a trap, maybe it would be nice to have CI fail for this... --- numpy/_core/src/_simd/_simd.c | 2 +- numpy/_core/src/multiarray/descriptor.c | 2 +- numpy/_core/src/umath/_operand_flag_tests.c | 2 +- numpy/_core/src/umath/_rational_tests.c | 2 +- numpy/_core/src/umath/_struct_ufunc_tests.c | 2 +- numpy/_core/src/umath/_umath_tests.c.src | 2 +- .../_core/tests/examples/limited_api/limited_api_latest.c | 8 ++++---- numpy/f2py/rules.py | 2 +- numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c | 2 +- 9 files changed, 12 insertions(+), 12 deletions(-) diff --git a/numpy/_core/src/_simd/_simd.c b/numpy/_core/src/_simd/_simd.c index 2f0a5df6375c..d25d7bbf1c38 100644 --- a/numpy/_core/src/_simd/_simd.c +++ b/numpy/_core/src/_simd/_simd.c @@ -88,7 +88,7 @@ PyMODINIT_FUNC PyInit__simd(void) NPY_MTARGETS_CONF_DISPATCH(NPY_CPU_HAVE, ATTACH_MODULE, MAKE_MSVC_HAPPY) NPY_MTARGETS_CONF_BASELINE(ATTACH_BASELINE_MODULE, MAKE_MSVC_HAPPY) -#if Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED // signal this module supports running with the GIL disabled PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif diff --git a/numpy/_core/src/multiarray/descriptor.c b/numpy/_core/src/multiarray/descriptor.c index 5708e5c6ecb7..f520e3c4bceb 100644 --- a/numpy/_core/src/multiarray/descriptor.c +++ b/numpy/_core/src/multiarray/descriptor.c @@ -2095,7 +2095,7 @@ static PyMemberDef arraydescr_members[] = { {"alignment", T_PYSSIZET, offsetof(PyArray_Descr, alignment), READONLY, NULL}, {"flags", -#if NPY_ULONGLONG == NPY_UINT64 +#if NPY_SIZEOF_LONGLONG == 8 T_ULONGLONG, offsetof(PyArray_Descr, flags), READONLY, NULL}, #else #error Assuming long long is 64bit, if not replace with getter function. diff --git a/numpy/_core/src/umath/_operand_flag_tests.c b/numpy/_core/src/umath/_operand_flag_tests.c index 9747b7946512..5cdff6220280 100644 --- a/numpy/_core/src/umath/_operand_flag_tests.c +++ b/numpy/_core/src/umath/_operand_flag_tests.c @@ -77,7 +77,7 @@ PyMODINIT_FUNC PyInit__operand_flag_tests(void) ((PyUFuncObject*)ufunc)->iter_flags = NPY_ITER_REDUCE_OK; PyModule_AddObject(m, "inplace_add", (PyObject*)ufunc); -#if Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED // signal this module supports running with the GIL disabled PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif diff --git a/numpy/_core/src/umath/_rational_tests.c b/numpy/_core/src/umath/_rational_tests.c index a95c89b373df..d257bc22d051 100644 --- a/numpy/_core/src/umath/_rational_tests.c +++ b/numpy/_core/src/umath/_rational_tests.c @@ -1355,7 +1355,7 @@ PyMODINIT_FUNC PyInit__rational_tests(void) { GCD_LCM_UFUNC(gcd,NPY_INT64,"greatest common denominator of two integers"); GCD_LCM_UFUNC(lcm,NPY_INT64,"least common multiple of two integers"); -#if Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED // signal this module supports running with the GIL disabled PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif diff --git a/numpy/_core/src/umath/_struct_ufunc_tests.c b/numpy/_core/src/umath/_struct_ufunc_tests.c index 8edbdc00b6f3..56c4be117e44 100644 --- a/numpy/_core/src/umath/_struct_ufunc_tests.c +++ b/numpy/_core/src/umath/_struct_ufunc_tests.c @@ -157,7 +157,7 @@ PyMODINIT_FUNC PyInit__struct_ufunc_tests(void) PyDict_SetItemString(d, "add_triplet", add_triplet); Py_DECREF(add_triplet); -#if Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED // signal this module supports running with the GIL disabled PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif diff --git a/numpy/_core/src/umath/_umath_tests.c.src b/numpy/_core/src/umath/_umath_tests.c.src index 9f2818d14526..845f51ebc94f 100644 --- a/numpy/_core/src/umath/_umath_tests.c.src +++ b/numpy/_core/src/umath/_umath_tests.c.src @@ -944,7 +944,7 @@ PyMODINIT_FUNC PyInit__umath_tests(void) { // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#if Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED // signal this module supports running with the GIL disabled PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif diff --git a/numpy/_core/tests/examples/limited_api/limited_api_latest.c b/numpy/_core/tests/examples/limited_api/limited_api_latest.c index 13668f2f0ebf..92d83ea977a1 100644 --- a/numpy/_core/tests/examples/limited_api/limited_api_latest.c +++ b/numpy/_core/tests/examples/limited_api/limited_api_latest.c @@ -1,11 +1,11 @@ -#if Py_LIMITED_API != PY_VERSION_HEX & 0xffff0000 - # error "Py_LIMITED_API not defined to Python major+minor version" -#endif - #include #include #include +#if Py_LIMITED_API != PY_VERSION_HEX & 0xffff0000 + # error "Py_LIMITED_API not defined to Python major+minor version" +#endif + static PyModuleDef moduledef = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "limited_api_latest" diff --git a/numpy/f2py/rules.py b/numpy/f2py/rules.py index 667ef287f92b..68c49e60028e 100644 --- a/numpy/f2py/rules.py +++ b/numpy/f2py/rules.py @@ -286,7 +286,7 @@ #initcommonhooks# #interface_usercode# -#if Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED // signal whether this module supports running with the GIL disabled PyUnstable_Module_SetGIL(m , #gil_used#); #endif diff --git a/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c b/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c index b66672a43e21..25866f1a40ec 100644 --- a/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c +++ b/numpy/f2py/tests/src/array_from_pyobj/wrapmodule.c @@ -223,7 +223,7 @@ PyMODINIT_FUNC PyInit_test_array_from_pyobj_ext(void) { on_exit(f2py_report_on_exit,(void*)"array_from_pyobj.wrap.call"); #endif -#if Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED // signal whether this module supports running with the GIL disabled PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif From 3f99204758206967cce4af0f460de4946443f62d Mon Sep 17 00:00:00 2001 From: Joren Hammudoglu Date: Tue, 17 Jun 2025 19:30:32 +0200 Subject: [PATCH 08/26] MAINT: bump `mypy` to `1.16.1` (#29219) --- environment.yml | 2 +- requirements/test_requirements.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/environment.yml b/environment.yml index 770a83218133..75c6626abaf8 100644 --- a/environment.yml +++ b/environment.yml @@ -25,7 +25,7 @@ dependencies: - hypothesis # For type annotations - typing_extensions>=4.5.0 - - mypy=1.16.0 + - mypy=1.16.1 - orjson # makes mypy faster # For building docs - sphinx>=4.5.0 diff --git a/requirements/test_requirements.txt b/requirements/test_requirements.txt index 17260753db4a..e50919ef4f7b 100644 --- a/requirements/test_requirements.txt +++ b/requirements/test_requirements.txt @@ -12,9 +12,8 @@ pytest-timeout # For testing types. Notes on the restrictions: # - Mypy relies on C API features not present in PyPy # NOTE: Keep mypy in sync with environment.yml -mypy==1.16.0; platform_python_implementation != "PyPy" +mypy==1.16.1; platform_python_implementation != "PyPy" typing_extensions>=4.5.0 # for optional f2py encoding detection charset-normalizer tzdata - From f6a17f099ba3ae2acabc288bf2297fd821725ffc Mon Sep 17 00:00:00 2001 From: Joren Hammudoglu Date: Tue, 17 Jun 2025 19:31:38 +0200 Subject: [PATCH 09/26] TYP: Workaround for a mypy issue in ``ndarray.__iter__`` (#29218) --- numpy/__init__.pyi | 12 ++++++++---- numpy/typing/tests/data/reveal/ndarray_misc.pyi | 10 ++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 272e52f88e83..ce879e208e49 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -807,6 +807,7 @@ _RealNumberT = TypeVar("_RealNumberT", bound=floating | integer) _FloatingT_co = TypeVar("_FloatingT_co", bound=floating, default=floating, covariant=True) _IntegerT = TypeVar("_IntegerT", bound=integer) _IntegerT_co = TypeVar("_IntegerT_co", bound=integer, default=integer, covariant=True) +_NonObjectScalarT = TypeVar("_NonObjectScalarT", bound=np.bool | number | flexible | datetime64 | timedelta64) _NBit = TypeVar("_NBit", bound=NBitBase, default=Any) # pyright: ignore[reportDeprecated] _NBit1 = TypeVar("_NBit1", bound=NBitBase, default=Any) # pyright: ignore[reportDeprecated] @@ -2572,10 +2573,13 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeT_co, _DTypeT_co]): def __len__(self) -> int: ... def __contains__(self, value: object, /) -> builtins.bool: ... - @overload # == 1-d & object_ - def __iter__(self: ndarray[tuple[int], dtype[object_]], /) -> Iterator[Any]: ... - @overload # == 1-d - def __iter__(self: ndarray[tuple[int], dtype[_ScalarT]], /) -> Iterator[_ScalarT]: ... + # NOTE: This weird `Never` tuple works around a strange mypy issue where it assigns + # `tuple[int]` to `tuple[Never]` or `tuple[int, int]` to `tuple[Never, Never]`. + # This way the bug only occurs for 9-D arrays, which are probably not very common. + @overload + def __iter__(self: ndarray[tuple[Never, Never, Never, Never, Never, Never, Never, Never, Never]], /) -> Iterator[Any]: ... + @overload # == 1-d & dtype[T \ object_] + def __iter__(self: ndarray[tuple[int], dtype[_NonObjectScalarT]], /) -> Iterator[_NonObjectScalarT]: ... @overload # >= 2-d def __iter__(self: ndarray[tuple[int, int, *tuple[int, ...]], dtype[_ScalarT]], /) -> Iterator[NDArray[_ScalarT]]: ... @overload # ?-d diff --git a/numpy/typing/tests/data/reveal/ndarray_misc.pyi b/numpy/typing/tests/data/reveal/ndarray_misc.pyi index 465ce7679b49..4cbb90621ca9 100644 --- a/numpy/typing/tests/data/reveal/ndarray_misc.pyi +++ b/numpy/typing/tests/data/reveal/ndarray_misc.pyi @@ -6,6 +6,7 @@ function-based counterpart in `../from_numeric.py`. """ +from collections.abc import Iterator import ctypes as ct import operator from types import ModuleType @@ -29,6 +30,10 @@ AR_m: npt.NDArray[np.timedelta64] AR_U: npt.NDArray[np.str_] AR_V: npt.NDArray[np.void] +AR_f8_1d: np.ndarray[tuple[int], np.dtype[np.float64]] +AR_f8_2d: np.ndarray[tuple[int, int], np.dtype[np.float64]] +AR_f8_3d: np.ndarray[tuple[int, int, int], np.dtype[np.float64]] + ctypes_obj = AR_f8.ctypes assert_type(AR_f8.__dlpack__(), CapsuleType) @@ -235,3 +240,8 @@ assert_type(AR_m.to_device("cpu"), npt.NDArray[np.timedelta64]) assert_type(f8.__array_namespace__(), ModuleType) assert_type(AR_f8.__array_namespace__(), ModuleType) + +assert_type(iter(AR_f8), Iterator[Any]) # any-D +assert_type(iter(AR_f8_1d), Iterator[np.float64]) # 1-D +assert_type(iter(AR_f8_2d), Iterator[npt.NDArray[np.float64]]) # 2-D +assert_type(iter(AR_f8_3d), Iterator[npt.NDArray[np.float64]]) # 3-D From 32f4afaad7cb035e1b46987285d34c27c57dbe57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Tue, 17 Jun 2025 19:34:26 +0200 Subject: [PATCH 10/26] ENH: improve Timsort with powersort merge-policy (#29208) Implement the improved merge policy for Timsort, as developed by Munro and Wild. Benchmarks show a significant improvement in performance. --- numpy/_core/src/npysort/timsort.cpp | 110 +++++++++++++--------------- 1 file changed, 51 insertions(+), 59 deletions(-) diff --git a/numpy/_core/src/npysort/timsort.cpp b/numpy/_core/src/npysort/timsort.cpp index 0f0f5721e7cf..9ecf88c0aeb9 100644 --- a/numpy/_core/src/npysort/timsort.cpp +++ b/numpy/_core/src/npysort/timsort.cpp @@ -39,8 +39,9 @@ #include #include -/* enough for 32 * 1.618 ** 128 elements */ -#define TIMSORT_STACK_SIZE 128 +/* enough for 32 * 1.618 ** 128 elements. + If powersort was used in all cases, 90 would suffice, as 32 * 2 ** 90 >= 32 * 1.618 ** 128 */ +#define RUN_STACK_SIZE 128 static npy_intp compute_min_run(npy_intp num) @@ -58,6 +59,7 @@ compute_min_run(npy_intp num) typedef struct { npy_intp s; /* start pointer */ npy_intp l; /* length */ + int power; /* node "level" for powersort merge strategy */ } run; /* buffer for argsort. Declared here to avoid multiple declarations. */ @@ -383,60 +385,51 @@ merge_at_(type *arr, const run *stack, const npy_intp at, buffer_ *buffer) return 0; } -template +/* See https://github.com/python/cpython/blob/ea23c897cd25702e72a04e06664f6864f07a7c5d/Objects/listsort.txt +* for a detailed explanation. +* In CPython, *num* is called *n*, but we changed it for consistency with the NumPy implementation. +*/ static int -try_collapse_(type *arr, run *stack, npy_intp *stack_ptr, buffer_ *buffer) +powerloop(npy_intp s1, npy_intp n1, npy_intp n2, npy_intp num) { - int ret; - npy_intp A, B, C, top; - top = *stack_ptr; - - while (1 < top) { - B = stack[top - 2].l; - C = stack[top - 1].l; - - if ((2 < top && stack[top - 3].l <= B + C) || - (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) { - A = stack[top - 3].l; - - if (A <= C) { - ret = merge_at_(arr, stack, top - 3, buffer); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 3].l += B; - stack[top - 2] = stack[top - 1]; - --top; - } - else { - ret = merge_at_(arr, stack, top - 2, buffer); - - if (NPY_UNLIKELY(ret < 0)) { - return ret; - } - - stack[top - 2].l += C; - --top; - } + int result = 0; + npy_intp a = 2 * s1 + n1; /* 2*a */ + npy_intp b = a + n1 + n2; /* 2*b */ + for (;;) { + ++result; + if (a >= num) { /* both quotient bits are 1 */ + a -= num; + b -= num; } - else if (1 < top && B <= C) { - ret = merge_at_(arr, stack, top - 2, buffer); + else if (b >= num) { /* a/num bit is 0, b/num bit is 1 */ + break; + } + a <<= 1; + b <<= 1; + } + return result; +} +template +static int +found_new_run_(type *arr, run *stack, npy_intp *stack_ptr, npy_intp n2, + npy_intp num, buffer_ *buffer) +{ + int ret; + if (*stack_ptr > 0) { + npy_intp s1 = stack[*stack_ptr - 1].s; + npy_intp n1 = stack[*stack_ptr - 1].l; + int power = powerloop(s1, n1, n2, num); + while (*stack_ptr > 1 && stack[*stack_ptr - 2].power > power) { + ret = merge_at_(arr, stack, *stack_ptr - 2, buffer); if (NPY_UNLIKELY(ret < 0)) { return ret; } - - stack[top - 2].l += C; - --top; - } - else { - break; + stack[*stack_ptr - 2].l += stack[*stack_ptr - 1].l; + --(*stack_ptr); } + stack[*stack_ptr - 1].power = power; } - - *stack_ptr = top; return 0; } @@ -491,7 +484,7 @@ timsort_(void *start, npy_intp num) int ret; npy_intp l, n, stack_ptr, minrun; buffer_ buffer; - run stack[TIMSORT_STACK_SIZE]; + run stack[RUN_STACK_SIZE]; buffer.pw = NULL; buffer.size = 0; stack_ptr = 0; @@ -499,15 +492,14 @@ timsort_(void *start, npy_intp num) for (l = 0; l < num;) { n = count_run_((type *)start, l, num, minrun); + ret = found_new_run_((type *)start, stack, &stack_ptr, n, num, &buffer); + if (NPY_UNLIKELY(ret < 0)) + goto cleanup; + + // Push the new run onto the stack. stack[stack_ptr].s = l; stack[stack_ptr].l = n; ++stack_ptr; - ret = try_collapse_((type *)start, stack, &stack_ptr, &buffer); - - if (NPY_UNLIKELY(ret < 0)) { - goto cleanup; - } - l += n; } @@ -897,7 +889,7 @@ atimsort_(void *v, npy_intp *tosort, npy_intp num) int ret; npy_intp l, n, stack_ptr, minrun; buffer_intp buffer; - run stack[TIMSORT_STACK_SIZE]; + run stack[RUN_STACK_SIZE]; buffer.pw = NULL; buffer.size = 0; stack_ptr = 0; @@ -1371,7 +1363,7 @@ string_timsort_(void *start, npy_intp num, void *varr) size_t len = elsize / sizeof(type); int ret; npy_intp l, n, stack_ptr, minrun; - run stack[TIMSORT_STACK_SIZE]; + run stack[RUN_STACK_SIZE]; string_buffer_ buffer; /* Items that have zero size don't make sense to sort */ @@ -1800,7 +1792,7 @@ string_atimsort_(void *start, npy_intp *tosort, npy_intp num, void *varr) size_t len = elsize / sizeof(type); int ret; npy_intp l, n, stack_ptr, minrun; - run stack[TIMSORT_STACK_SIZE]; + run stack[RUN_STACK_SIZE]; buffer_intp buffer; /* Items that have zero size don't make sense to sort */ @@ -2253,7 +2245,7 @@ npy_timsort(void *start, npy_intp num, void *varr) PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; int ret; npy_intp l, n, stack_ptr, minrun; - run stack[TIMSORT_STACK_SIZE]; + run stack[RUN_STACK_SIZE]; buffer_char buffer; /* Items that have zero size don't make sense to sort */ @@ -2689,7 +2681,7 @@ npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr) PyArray_CompareFunc *cmp = PyDataType_GetArrFuncs(PyArray_DESCR(arr))->compare; int ret; npy_intp l, n, stack_ptr, minrun; - run stack[TIMSORT_STACK_SIZE]; + run stack[RUN_STACK_SIZE]; buffer_intp buffer; /* Items that have zero size don't make sense to sort */ From d1c67573569885087a253120c1a9f2caf3ccf084 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Tue, 17 Jun 2025 14:56:21 -0400 Subject: [PATCH 11/26] ENH: Detect CPU features on OpenBSD ARM and PowerPC64 Also while looking at this I noticed due to a compiler warning that npy__cpu_init_features_linux() was not enabled on FreeBSD with the original commit c47e9621ebf76f8085ff5ec8b01c07921d14f6a7 thus the code was doing nothing on FreeBSD ARM. --- numpy/_core/src/common/npy_cpu_features.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/numpy/_core/src/common/npy_cpu_features.c b/numpy/_core/src/common/npy_cpu_features.c index f15f636cdb1e..617225ec9eff 100644 --- a/numpy/_core/src/common/npy_cpu_features.c +++ b/numpy/_core/src/common/npy_cpu_features.c @@ -562,7 +562,7 @@ npy__cpu_init_features(void) #elif defined(NPY_CPU_PPC64) || defined(NPY_CPU_PPC64LE) -#if defined(__linux__) || defined(__FreeBSD__) +#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) #ifdef __FreeBSD__ #include // defines PPC_FEATURE_HAS_VSX #endif @@ -585,7 +585,7 @@ static void npy__cpu_init_features(void) { memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX); -#if defined(__linux__) || defined(__FreeBSD__) +#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) #ifdef __linux__ unsigned int hwcap = getauxval(AT_HWCAP); if ((hwcap & PPC_FEATURE_HAS_VSX) == 0) @@ -612,7 +612,7 @@ npy__cpu_init_features(void) npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0; npy__cpu_have[NPY_CPU_FEATURE_VSX3] = (hwcap & PPC_FEATURE2_ARCH_3_00) != 0; npy__cpu_have[NPY_CPU_FEATURE_VSX4] = (hwcap & PPC_FEATURE2_ARCH_3_1) != 0; -// TODO: AIX, OpenBSD +// TODO: AIX #else npy__cpu_have[NPY_CPU_FEATURE_VSX] = 1; #if defined(NPY_CPU_PPC64LE) || defined(NPY_HAVE_VSX2) @@ -698,7 +698,7 @@ npy__cpu_init_features_arm8(void) npy__cpu_have[NPY_CPU_FEATURE_ASIMD] = 1; } -#if defined(__linux__) || defined(__FreeBSD__) +#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) /* * we aren't sure of what kind kernel or clib we deal with * so we play it safe @@ -709,7 +709,7 @@ npy__cpu_init_features_arm8(void) #if defined(__linux__) __attribute__((weak)) unsigned long getauxval(unsigned long); // linker should handle it #endif -#ifdef __FreeBSD__ +#if defined(__FreeBSD__) || defined(__OpenBSD__) __attribute__((weak)) int elf_aux_info(int, void *, int); // linker should handle it static unsigned long getauxval(unsigned long k) @@ -807,7 +807,7 @@ static void npy__cpu_init_features(void) { memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX); -#ifdef __linux__ +#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) if (npy__cpu_init_features_linux()) return; #endif From b6a6740e106277f4b1081210bbd158fed3e0d20c Mon Sep 17 00:00:00 2001 From: Mugundanmcw Date: Wed, 18 Jun 2025 18:16:30 +0530 Subject: [PATCH 12/26] CI: Add conditions to check hash of LLVM package --- .github/windows_arm64_steps/action.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/windows_arm64_steps/action.yml b/.github/windows_arm64_steps/action.yml index 86517b6246e7..8ecb3b8a0cdd 100644 --- a/.github/windows_arm64_steps/action.yml +++ b/.github/windows_arm64_steps/action.yml @@ -4,10 +4,16 @@ description: "Setup LLVM for Win-ARM64 builds" runs: using: "composite" steps: - - name: Install LLVM + - name: Install LLVM with checksum verification shell: pwsh run: | Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-20.1.6/LLVM-20.1.6-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe + $expectedHash = "92f69a1134e32e54b07d51c6e24d9594852f6476f32c3d70471ae00fffc2d462" + $fileHash = (Get-FileHash -Path "LLVM-woa64.exe" -Algorithm SHA256).Hash + if ($fileHash -ne $expectedHash) { + Write-Error "Checksum verification failed. The downloaded file may be corrupted or tampered with." + exit 1 + } Start-Process -FilePath ".\LLVM-woa64.exe" -ArgumentList "/S" -Wait echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append echo "CC=clang-cl" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append From d52b36ee0b8bf3e7ca9eec19d8c4b41ff2c04f59 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 18 Jun 2025 19:32:32 +0200 Subject: [PATCH 13/26] strides comparison performance fix, compare discussion #29179 (#29188) --- numpy/_core/src/umath/matmul.c.src | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/numpy/_core/src/umath/matmul.c.src b/numpy/_core/src/umath/matmul.c.src index 02c4fde56bf2..6f54aeb4d968 100644 --- a/numpy/_core/src/umath/matmul.c.src +++ b/numpy/_core/src/umath/matmul.c.src @@ -27,6 +27,8 @@ ***************************************************************************** */ +#define ABS(x) ((x) < 0 ? -(x) : (x)) + #if defined(HAVE_CBLAS) /* * -1 to be conservative, in case blas internally uses a for loop with an @@ -554,9 +556,9 @@ NPY_NO_EXPORT void } else { /* matrix @ matrix * copy if not blasable, see gh-12365 & gh-23588 */ - npy_bool i1_transpose = is1_m < is1_n, - i2_transpose = is2_n < is2_p, - o_transpose = os_m < os_p; + npy_bool i1_transpose = ABS(is1_m) < ABS(is1_n), + i2_transpose = ABS(is2_n) < ABS(is2_p), + o_transpose = ABS(os_m) < ABS(os_p); npy_intp tmp_is1_m = i1_transpose ? sz : sz*dn, tmp_is1_n = i1_transpose ? sz*dm : sz, From 2b0eda38f099203bb72572a8b640b37165fc7dc5 Mon Sep 17 00:00:00 2001 From: jorenham Date: Tue, 17 Jun 2025 17:49:17 +0200 Subject: [PATCH 14/26] TYP: Support iteration of ``StringDType`` arrays --- numpy/__init__.pyi | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index ce879e208e49..bcbe95accee6 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -2577,11 +2577,15 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeT_co, _DTypeT_co]): # `tuple[int]` to `tuple[Never]` or `tuple[int, int]` to `tuple[Never, Never]`. # This way the bug only occurs for 9-D arrays, which are probably not very common. @overload - def __iter__(self: ndarray[tuple[Never, Never, Never, Never, Never, Never, Never, Never, Never]], /) -> Iterator[Any]: ... + def __iter__( + self: ndarray[tuple[Never, Never, Never, Never, Never, Never, Never, Never, Never], Any], / + ) -> Iterator[Any]: ... @overload # == 1-d & dtype[T \ object_] def __iter__(self: ndarray[tuple[int], dtype[_NonObjectScalarT]], /) -> Iterator[_NonObjectScalarT]: ... + @overload # == 1-d & StringDType + def __iter__(self: ndarray[tuple[int], dtypes.StringDType], /) -> Iterator[str]: ... @overload # >= 2-d - def __iter__(self: ndarray[tuple[int, int, *tuple[int, ...]], dtype[_ScalarT]], /) -> Iterator[NDArray[_ScalarT]]: ... + def __iter__(self: ndarray[tuple[int, int, *tuple[int, ...]], _DTypeT], /) -> Iterator[ndarray[_AnyShape, _DTypeT]]: ... @overload # ?-d def __iter__(self, /) -> Iterator[Any]: ... From f15a116ee12ec539c7ed1fafb3a0abfb82005e0f Mon Sep 17 00:00:00 2001 From: Developer-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com> Date: Wed, 18 Jun 2025 13:00:27 -0700 Subject: [PATCH 15/26] BUG: Address interaction between SME and FPSR (#29223) * BUG: Address interaction between SME and FPSR This is intended to resolve https://github.com/numpy/numpy/issues/28687 The root cause is an interaction between Arm Scalable Matrix Extension (SME) and the floating point status register (FPSR). As noted in Arm docs for FPSR, "On entry to or exit from Streaming SVE mode, FPSR.{IOC, DZC, OFC, UFC, IXC, IDC, QC} are set to 1 and the remaining bits are set to 0". This means that floating point status flags are all raised when SME is used, regardless of values or operations performed. These are manifesting now because Apple Silicon M4 supports SME and macOS 15.4 enables SME codepaths for Accelerate BLAS / LAPACK. However, SME / FPSR behavior is not specific to Apple Silicon M4 and will occur on non-Apple chips using SME as well. Changes add compile and runtime checks to determine whether BLAS / LAPACK might use SME (macOS / Accelerate only at the moment). If so, special handling of floating-point error (FPE) is added, which includes: - clearing FPE after some BLAS calls - short-circuiting FPE read after some BLAS calls All tests pass Performance is similar Another approach would have been to wrap all BLAS / LAPACK calls with save / restore FPE. However, it added a lot of overhead for the inner loops that utilize BLAS / LAPACK. Some benchmarks were 8x slower. * add blas_supports_fpe and ifdef check Address the linker & linter failures --- numpy/_core/meson.build | 1 + numpy/_core/src/common/blas_utils.c | 134 ++++++++++++++++++ numpy/_core/src/common/blas_utils.h | 30 ++++ numpy/_core/src/common/cblasfuncs.c | 3 +- numpy/_core/src/multiarray/multiarraymodule.c | 5 + numpy/_core/src/umath/matmul.c.src | 36 ++++- numpy/_core/tests/test_multiarray.py | 6 + numpy/testing/_private/utils.py | 10 ++ 8 files changed, 217 insertions(+), 8 deletions(-) create mode 100644 numpy/_core/src/common/blas_utils.c create mode 100644 numpy/_core/src/common/blas_utils.h diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build index a4d2050122c6..cd46a20b0246 100644 --- a/numpy/_core/meson.build +++ b/numpy/_core/meson.build @@ -1117,6 +1117,7 @@ src_multiarray_umath_common = [ ] if have_blas src_multiarray_umath_common += [ + 'src/common/blas_utils.c', 'src/common/cblasfuncs.c', 'src/common/python_xerbla.c', ] diff --git a/numpy/_core/src/common/blas_utils.c b/numpy/_core/src/common/blas_utils.c new file mode 100644 index 000000000000..aaf976ed70e4 --- /dev/null +++ b/numpy/_core/src/common/blas_utils.c @@ -0,0 +1,134 @@ +#include +#include +#include + +#ifdef __APPLE__ +#include +#endif + +#include "numpy/numpyconfig.h" // NPY_VISIBILITY_HIDDEN +#include "numpy/npy_math.h" // npy_get_floatstatus_barrier +#include "blas_utils.h" + +#if NPY_BLAS_CHECK_FPE_SUPPORT + +/* Return whether we're running on macOS 15.4 or later + */ +static inline bool +is_macOS_version_15_4_or_later(void){ +#if !defined(__APPLE__) + return false; +#else + char *osProductVersion = NULL; + size_t size = 0; + bool ret = false; + + // Query how large OS version string should be + if(-1 == sysctlbyname("kern.osproductversion", NULL, &size, NULL, 0)){ + goto cleanup; + } + + osProductVersion = malloc(size + 1); + + // Get the OS version string + if(-1 == sysctlbyname("kern.osproductversion", osProductVersion, &size, NULL, 0)){ + goto cleanup; + } + + osProductVersion[size] = '\0'; + + // Parse the version string + int major = 0, minor = 0; + if(2 > sscanf(osProductVersion, "%d.%d", &major, &minor)) { + goto cleanup; + } + + if(major >= 15 && minor >= 4){ + ret = true; + } + +cleanup: + if(osProductVersion){ + free(osProductVersion); + } + + return ret; +#endif +} + +/* ARM Scalable Matrix Extension (SME) raises all floating-point error flags + * when it's used regardless of values or operations. As a consequence, + * when SME is used, all FPE state is lost and special handling is needed. + * + * For NumPy, SME is not currently used directly, but can be used via + * BLAS / LAPACK libraries. This function does a runtime check for whether + * BLAS / LAPACK can use SME and special handling around FPE is required. + */ +static inline bool +BLAS_can_use_ARM_SME(void) +{ +#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK) + // ARM SME can be used by Apple's Accelerate framework for BLAS / LAPACK + // - macOS 15.4+ + // - Apple silicon M4+ + + // Does OS / Accelerate support ARM SME? + if(!is_macOS_version_15_4_or_later()){ + return false; + } + + // Does hardware support SME? + int has_SME = 0; + size_t size = sizeof(has_SME); + if(-1 == sysctlbyname("hw.optional.arm.FEAT_SME", &has_SME, &size, NULL, 0)){ + return false; + } + + if(has_SME){ + return true; + } +#endif + + // default assume SME is not used + return false; +} + +/* Static variable to cache runtime check of BLAS FPE support. + */ +static bool blas_supports_fpe = true; + +#endif // NPY_BLAS_CHECK_FPE_SUPPORT + + +NPY_VISIBILITY_HIDDEN bool +npy_blas_supports_fpe(void) +{ +#if NPY_BLAS_CHECK_FPE_SUPPORT + return blas_supports_fpe; +#else + return true; +#endif +} + +NPY_VISIBILITY_HIDDEN void +npy_blas_init(void) +{ +#if NPY_BLAS_CHECK_FPE_SUPPORT + blas_supports_fpe = !BLAS_can_use_ARM_SME(); +#endif +} + +NPY_VISIBILITY_HIDDEN int +npy_get_floatstatus_after_blas(void) +{ +#if NPY_BLAS_CHECK_FPE_SUPPORT + if(!blas_supports_fpe){ + // BLAS does not support FPE and we need to return FPE state. + // Instead of clearing and then grabbing state, just return + // that no flags are set. + return 0; + } +#endif + char *param = NULL; + return npy_get_floatstatus_barrier(param); +} diff --git a/numpy/_core/src/common/blas_utils.h b/numpy/_core/src/common/blas_utils.h new file mode 100644 index 000000000000..8c1437f88899 --- /dev/null +++ b/numpy/_core/src/common/blas_utils.h @@ -0,0 +1,30 @@ +#include + +#include "numpy/numpyconfig.h" // for NPY_VISIBILITY_HIDDEN + +/* NPY_BLAS_CHECK_FPE_SUPPORT controls whether we need a runtime check + * for floating-point error (FPE) support in BLAS. + */ +#if defined(__APPLE__) && defined(__aarch64__) && defined(ACCELERATE_NEW_LAPACK) +#define NPY_BLAS_CHECK_FPE_SUPPORT 1 +#else +#define NPY_BLAS_CHECK_FPE_SUPPORT 0 +#endif + +/* Initialize BLAS environment, if needed + */ +NPY_VISIBILITY_HIDDEN void +npy_blas_init(void); + +/* Runtime check if BLAS supports floating-point errors. + * true - BLAS supports FPE and one can rely on them to indicate errors + * false - BLAS does not support FPE. Special handling needed for FPE state + */ +NPY_VISIBILITY_HIDDEN bool +npy_blas_supports_fpe(void); + +/* If BLAS supports FPE, exactly the same as npy_get_floatstatus_barrier(). + * Otherwise, we can't rely on FPE state and need special handling. + */ +NPY_VISIBILITY_HIDDEN int +npy_get_floatstatus_after_blas(void); diff --git a/numpy/_core/src/common/cblasfuncs.c b/numpy/_core/src/common/cblasfuncs.c index f9d683d812d4..66a215dfeb64 100644 --- a/numpy/_core/src/common/cblasfuncs.c +++ b/numpy/_core/src/common/cblasfuncs.c @@ -12,6 +12,7 @@ #include "numpy/arrayobject.h" #include "numpy/npy_math.h" #include "numpy/ufuncobject.h" +#include "blas_utils.h" #include "npy_cblas.h" #include "arraytypes.h" #include "common.h" @@ -693,7 +694,7 @@ cblas_matrixproduct(int typenum, PyArrayObject *ap1, PyArrayObject *ap2, NPY_END_ALLOW_THREADS; } - int fpes = npy_get_floatstatus_barrier((char *) result); + int fpes = npy_get_floatstatus_after_blas(); if (fpes && PyUFunc_GiveFloatingpointErrors("dot", fpes) < 0) { goto fail; } diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index 7724756ba351..dcfb1226a0ab 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -43,6 +43,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "arraytypes.h" #include "arrayobject.h" #include "array_converter.h" +#include "blas_utils.h" #include "hashdescr.h" #include "descriptor.h" #include "dragon4.h" @@ -4781,6 +4782,10 @@ _multiarray_umath_exec(PyObject *m) { return -1; } +#if NPY_BLAS_CHECK_FPE_SUPPORT + npy_blas_init(); +#endif + #if defined(MS_WIN64) && defined(__GNUC__) PyErr_WarnEx(PyExc_Warning, "Numpy built with MINGW-W64 on Windows 64 bits is experimental, " \ diff --git a/numpy/_core/src/umath/matmul.c.src b/numpy/_core/src/umath/matmul.c.src index 6f54aeb4d968..11e014acec7f 100644 --- a/numpy/_core/src/umath/matmul.c.src +++ b/numpy/_core/src/umath/matmul.c.src @@ -16,6 +16,7 @@ +#include "blas_utils.h" #include "npy_cblas.h" #include "arraytypes.h" /* For TYPE_dot functions */ @@ -122,7 +123,7 @@ static inline void } } -NPY_NO_EXPORT void +static void @name@_gemv(void *ip1, npy_intp is1_m, npy_intp is1_n, void *ip2, npy_intp is2_n, void *op, npy_intp op_m, @@ -158,7 +159,7 @@ NPY_NO_EXPORT void is2_n / sizeof(@typ@), @step0@, op, op_m / sizeof(@typ@)); } -NPY_NO_EXPORT void +static void @name@_matmul_matrixmatrix(void *ip1, npy_intp is1_m, npy_intp is1_n, void *ip2, npy_intp is2_n, npy_intp is2_p, void *op, npy_intp os_m, npy_intp os_p, @@ -262,7 +263,7 @@ NPY_NO_EXPORT void * #IS_HALF = 0, 0, 0, 1, 0*13# */ -NPY_NO_EXPORT void +static void @TYPE@_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n, void *_ip2, npy_intp is2_n, npy_intp is2_p, void *_op, npy_intp os_m, npy_intp os_p, @@ -320,7 +321,7 @@ NPY_NO_EXPORT void } /**end repeat**/ -NPY_NO_EXPORT void +static void BOOL_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n, void *_ip2, npy_intp is2_n, npy_intp is2_p, void *_op, npy_intp os_m, npy_intp os_p, @@ -359,7 +360,7 @@ BOOL_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n, } } -NPY_NO_EXPORT void +static void OBJECT_matmul_inner_noblas(void *_ip1, npy_intp is1_m, npy_intp is1_n, void *_ip2, npy_intp is2_n, npy_intp is2_p, void *_op, npy_intp os_m, npy_intp os_p, @@ -631,6 +632,11 @@ NPY_NO_EXPORT void #endif } #if @USEBLAS@ && defined(HAVE_CBLAS) +#if NPY_BLAS_CHECK_FPE_SUPPORT + if (!npy_blas_supports_fpe()) { + npy_clear_floatstatus_barrier((char*)args); + } +#endif if (allocate_buffer) free(tmp_ip12op); #endif } @@ -655,7 +661,7 @@ NPY_NO_EXPORT void * #prefix = c, z, 0# * #USE_BLAS = 1, 1, 0# */ -NPY_NO_EXPORT void +static void @name@_dotc(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp n, void *NPY_UNUSED(ignore)) { @@ -751,6 +757,7 @@ OBJECT_dotc(char *ip1, npy_intp is1, char *ip2, npy_intp is2, char *op, npy_intp * CFLOAT, CDOUBLE, CLONGDOUBLE, OBJECT# * #DOT = dot*15, dotc*4# * #CHECK_PYERR = 0*18, 1# + * #CHECK_BLAS = 1*2, 0*13, 1*2, 0*2# */ NPY_NO_EXPORT void @TYPE@_vecdot(char **args, npy_intp const *dimensions, npy_intp const *steps, @@ -774,6 +781,11 @@ NPY_NO_EXPORT void } #endif } +#if @CHECK_BLAS@ && NPY_BLAS_CHECK_FPE_SUPPORT + if (!npy_blas_supports_fpe()) { + npy_clear_floatstatus_barrier((char*)args); + } +#endif } /**end repeat**/ @@ -789,7 +801,7 @@ NPY_NO_EXPORT void * #step1 = &oneF, &oneD# * #step0 = &zeroF, &zeroD# */ -NPY_NO_EXPORT void +static void @name@_vecmat_via_gemm(void *ip1, npy_intp is1_n, void *ip2, npy_intp is2_n, npy_intp is2_m, void *op, npy_intp os_m, @@ -880,6 +892,11 @@ NPY_NO_EXPORT void #endif } } +#if @USEBLAS@ && NPY_BLAS_CHECK_FPE_SUPPORT + if (!npy_blas_supports_fpe()) { + npy_clear_floatstatus_barrier((char*)args); + } +#endif } /**end repeat**/ @@ -945,5 +962,10 @@ NPY_NO_EXPORT void #endif } } +#if @USEBLAS@ && NPY_BLAS_CHECK_FPE_SUPPORT + if (!npy_blas_supports_fpe()) { + npy_clear_floatstatus_barrier((char*)args); + } +#endif } /**end repeat**/ diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py index b164f1dada3b..81e55deb3daf 100644 --- a/numpy/_core/tests/test_multiarray.py +++ b/numpy/_core/tests/test_multiarray.py @@ -31,6 +31,7 @@ from numpy.exceptions import AxisError, ComplexWarning from numpy.lib.recfunctions import repack_fields from numpy.testing import ( + BLAS_SUPPORTS_FPE, HAS_REFCOUNT, IS_64BIT, IS_PYPY, @@ -3363,6 +3364,11 @@ def test_dot(self): @pytest.mark.parametrize("dtype", [np.half, np.double, np.longdouble]) @pytest.mark.skipif(IS_WASM, reason="no wasm fp exception support") def test_dot_errstate(self, dtype): + # Some dtypes use BLAS for 'dot' operation and + # not all BLAS support floating-point errors. + if not BLAS_SUPPORTS_FPE and dtype == np.double: + pytest.skip("BLAS does not support FPE") + a = np.array([1, 1], dtype=dtype) b = np.array([-np.inf, np.inf], dtype=dtype) diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py index d7ceaeab72cc..65f4059f98fd 100644 --- a/numpy/testing/_private/utils.py +++ b/numpy/testing/_private/utils.py @@ -42,6 +42,7 @@ 'assert_no_gc_cycles', 'break_cycles', 'HAS_LAPACK64', 'IS_PYSTON', 'IS_MUSL', 'check_support_sve', 'NOGIL_BUILD', 'IS_EDITABLE', 'IS_INSTALLED', 'NUMPY_ROOT', 'run_threaded', 'IS_64BIT', + 'BLAS_SUPPORTS_FPE', ] @@ -89,6 +90,15 @@ class KnownFailureException(Exception): IS_PYPY = sys.implementation.name == 'pypy' IS_PYSTON = hasattr(sys, "pyston_version_info") HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None and not IS_PYSTON +BLAS_SUPPORTS_FPE = True +if platform.system() == 'Darwin' or platform.machine() == 'arm64': + try: + blas = np.__config__.CONFIG['Build Dependencies']['blas'] + if blas['name'] == 'accelerate': + BLAS_SUPPORTS_FPE = False + except KeyError: + pass + HAS_LAPACK64 = numpy.linalg._umath_linalg._ilp64 IS_MUSL = False From 5cbc0bcebbe031140b63d2ffba06b73e471b43b2 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Tue, 17 Jun 2025 19:57:18 -0400 Subject: [PATCH 16/26] ENH: Detect CPU features on FreeBSD / OpenBSD RISC-V64. --- numpy/_core/src/common/npy_cpu_features.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/numpy/_core/src/common/npy_cpu_features.c b/numpy/_core/src/common/npy_cpu_features.c index f15f636cdb1e..06c82fe41e27 100644 --- a/numpy/_core/src/common/npy_cpu_features.c +++ b/numpy/_core/src/common/npy_cpu_features.c @@ -846,22 +846,30 @@ npy__cpu_init_features(void) #elif defined(__riscv) && __riscv_xlen == 64 -#include +#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) + #include -#ifndef HWCAP_RVV - // https://github.com/torvalds/linux/blob/v6.8/arch/riscv/include/uapi/asm/hwcap.h#L24 - #define COMPAT_HWCAP_ISA_V (1 << ('V' - 'A')) + #ifndef HWCAP_RVV + // https://github.com/torvalds/linux/blob/v6.8/arch/riscv/include/uapi/asm/hwcap.h#L24 + #define COMPAT_HWCAP_ISA_V (1 << ('V' - 'A')) + #endif #endif static void npy__cpu_init_features(void) { memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX); - +#if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) +#ifdef __linux__ unsigned int hwcap = getauxval(AT_HWCAP); +#else + unsigned long hwcap; + elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap)); +#endif if (hwcap & COMPAT_HWCAP_ISA_V) { npy__cpu_have[NPY_CPU_FEATURE_RVV] = 1; } +#endif } /*********** Unsupported ARCH ***********/ From 7a74e12558789ecf0252dad56a77560db7411d13 Mon Sep 17 00:00:00 2001 From: Nathan Goldbaum Date: Thu, 19 Jun 2025 01:18:27 -0600 Subject: [PATCH 17/26] BUG: avoid negating unsigned integers in resize implementation (#29230) The negation of an unsigned int underflows and creates a large positive repeats, which leads to allocations failures and/or swapping. --- numpy/_core/fromnumeric.py | 3 ++- numpy/_core/tests/test_numeric.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/numpy/_core/fromnumeric.py b/numpy/_core/fromnumeric.py index 73dcd1ddc11d..e20d774d014d 100644 --- a/numpy/_core/fromnumeric.py +++ b/numpy/_core/fromnumeric.py @@ -1607,7 +1607,8 @@ def resize(a, new_shape): # First case must zero fill. The second would have repeats == 0. return np.zeros_like(a, shape=new_shape) - repeats = -(-new_size // a.size) # ceil division + # ceiling division without negating new_size + repeats = (new_size + a.size - 1) // a.size a = concatenate((a,) * repeats)[:new_size] return reshape(a, new_shape) diff --git a/numpy/_core/tests/test_numeric.py b/numpy/_core/tests/test_numeric.py index 8a72e4bfa65d..65da65ddc9f9 100644 --- a/numpy/_core/tests/test_numeric.py +++ b/numpy/_core/tests/test_numeric.py @@ -79,6 +79,13 @@ def test_negative_resize(self): with pytest.raises(ValueError, match=r"negative"): np.resize(A, new_shape=new_shape) + def test_unsigned_resize(self): + # ensure unsigned integer sizes don't lead to underflows + for dt_pair in [(np.int32, np.uint32), (np.int64, np.uint64)]: + arr = np.array([[23, 95], [66, 37]]) + assert_array_equal(np.resize(arr, dt_pair[0](1)), + np.resize(arr, dt_pair[1](1))) + def test_subclass(self): class MyArray(np.ndarray): __array_priority__ = 1. From 35079afa808ea0fc49bb13c878150ef20c4f64fa Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 19 Jun 2025 14:21:05 +0200 Subject: [PATCH 18/26] TST: Fix test that uses unininitialized memory (#29232) Tests should avoid this generally, this one is worse, since it can even fail due to warnings. --- numpy/_core/tests/test_ufunc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/numpy/_core/tests/test_ufunc.py b/numpy/_core/tests/test_ufunc.py index 21ebc02c2625..a1cd63aec523 100644 --- a/numpy/_core/tests/test_ufunc.py +++ b/numpy/_core/tests/test_ufunc.py @@ -2123,9 +2123,9 @@ class ArrayPriorityMinus1000b(ArrayPriorityBase): class ArrayPriorityMinus2000(ArrayPriorityBase): __array_priority__ = -2000 - x = ArrayPriorityMinus1000(2) - xb = ArrayPriorityMinus1000b(2) - y = ArrayPriorityMinus2000(2) + x = np.ones(2).view(ArrayPriorityMinus1000) + xb = np.ones(2).view(ArrayPriorityMinus1000b) + y = np.ones(2).view(ArrayPriorityMinus2000) assert np.add(x, y) is ArrayPriorityMinus1000 assert np.add(y, x) is ArrayPriorityMinus1000 From 4575abf725d9f14af73ee0442b05c01d0d46a7c1 Mon Sep 17 00:00:00 2001 From: Joren Hammudoglu Date: Thu, 19 Jun 2025 16:43:25 +0200 Subject: [PATCH 19/26] MAINT: bump ``ruff`` to ``0.12.0`` (#29220) --- environment.yml | 2 +- requirements/linter_requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 75c6626abaf8..17de8d3eeb5e 100644 --- a/environment.yml +++ b/environment.yml @@ -45,7 +45,7 @@ dependencies: # NOTE: breathe 4.33.0 collides with sphinx.ext.graphviz - breathe>4.33.0 # For linting - - ruff=0.11.13 + - ruff=0.12.0 - gitpython # Used in some tests - cffi diff --git a/requirements/linter_requirements.txt b/requirements/linter_requirements.txt index 45319571b561..05319a9bdb8a 100644 --- a/requirements/linter_requirements.txt +++ b/requirements/linter_requirements.txt @@ -1,3 +1,3 @@ # keep in sync with `environment.yml` -ruff==0.11.13 +ruff==0.12.0 GitPython>=3.1.30 From 7b30ce7432d0b42bb805fbf5575b0d0ba5be98ab Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Thu, 19 Jun 2025 16:47:49 +0200 Subject: [PATCH 20/26] BUG: Enforce integer limitation in concatenate (#29231) * BUG: Enforce integer limitation in concatenate Concatenate internals only deal with integer many arrays, that should be fine in practice, but a SystemError (or in principle maybe also a harder crash?) is not really. * skip 32bit systems --- numpy/_core/src/multiarray/multiarraymodule.c | 11 +++++++++-- numpy/_core/tests/test_shape_base.py | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index dcfb1226a0ab..d4766b5af7b4 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -669,10 +669,17 @@ PyArray_ConcatenateInto(PyObject *op, } /* Convert the input list into arrays */ - narrays = PySequence_Size(op); - if (narrays < 0) { + Py_ssize_t narrays_true = PySequence_Size(op); + if (narrays_true < 0) { return NULL; } + else if (narrays_true > NPY_MAX_INT) { + PyErr_Format(PyExc_ValueError, + "concatenate() only supports up to %d arrays but got %zd.", + NPY_MAX_INT, narrays_true); + return NULL; + } + narrays = (int)narrays_true; arrays = PyArray_malloc(narrays * sizeof(arrays[0])); if (arrays == NULL) { PyErr_NoMemory(); diff --git a/numpy/_core/tests/test_shape_base.py b/numpy/_core/tests/test_shape_base.py index f7b944be08b7..8de24278fc5d 100644 --- a/numpy/_core/tests/test_shape_base.py +++ b/numpy/_core/tests/test_shape_base.py @@ -1,3 +1,5 @@ +import sys + import pytest import numpy as np @@ -29,6 +31,7 @@ assert_raises, assert_raises_regex, ) +from numpy.testing._private.utils import requires_memory class TestAtleast1d: @@ -290,6 +293,17 @@ def test_exceptions(self): # No arrays to concatenate raises ValueError assert_raises(ValueError, concatenate, ()) + @pytest.mark.slow + @pytest.mark.skipif(sys.maxsize < 2**32, reason="only problematic on 64bit platforms") + @requires_memory(2 * np.iinfo(np.intc).max) + def test_huge_list_error(self): + a = np.array([1]) + max_int = np.iinfo(np.intc).max + arrs = (a,) * (max_int + 1) + msg = fr"concatenate\(\) only supports up to {max_int} arrays but got {max_int + 1}." + with pytest.raises(ValueError, match=msg): + np.concatenate(arrs) + def test_concatenate_axis_None(self): a = np.arange(4, dtype=np.float64).reshape((2, 2)) b = list(range(3)) From 1da34ea0daaedb94d2b673c70a47f4a57883a713 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 20 Jun 2025 07:43:13 +0200 Subject: [PATCH 21/26] DEP: Deprecate setting the strides attribute of a numpy array (#28925) Deprecate setting strides (mutating) on an array. --------- Co-authored-by: Charles Harris Co-authored-by: Sebastian Berg Co-authored-by: Joren Hammudoglu --- .../upcoming_changes/28925.deprecation.rst | 9 +++++ numpy/__init__.pyi | 3 +- numpy/_core/src/multiarray/getset.c | 22 ++++++++---- numpy/_core/tests/test_api.py | 3 +- numpy/_core/tests/test_deprecations.py | 7 ++++ numpy/_core/tests/test_half.py | 6 ++-- numpy/_core/tests/test_multiarray.py | 36 ++++++++++++------- numpy/_core/tests/test_nditer.py | 4 +-- numpy/_core/tests/test_regression.py | 23 ++++++------ numpy/lib/_npyio_impl.py | 2 +- 10 files changed, 75 insertions(+), 40 deletions(-) create mode 100644 doc/release/upcoming_changes/28925.deprecation.rst diff --git a/doc/release/upcoming_changes/28925.deprecation.rst b/doc/release/upcoming_changes/28925.deprecation.rst new file mode 100644 index 000000000000..a421839394fa --- /dev/null +++ b/doc/release/upcoming_changes/28925.deprecation.rst @@ -0,0 +1,9 @@ +Setting the ``strides`` attribute is deprecated +----------------------------------------------- +Setting the strides attribute is now deprecated since mutating +an array is unsafe if an array is shared, especially by multiple +threads. As an alternative, you can create a new view (no copy) via: +* `np.lib.stride_tricks.strided_window_view` if applicable, +* `np.lib.stride_tricks.as_strided` for the general case, +* or the `np.ndarray` constructor (``buffer`` is the original array) for a light-weight version. + diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index bcbe95accee6..2e9dea06ce6a 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -217,7 +217,7 @@ from typing import ( # library include `typing_extensions` stubs: # https://github.com/python/typeshed/blob/main/stdlib/typing_extensions.pyi from _typeshed import Incomplete, StrOrBytesPath, SupportsFlush, SupportsLenAndGetItem, SupportsWrite -from typing_extensions import CapsuleType, TypeVar +from typing_extensions import CapsuleType, TypeVar, deprecated from numpy import ( char, @@ -2169,6 +2169,7 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeT_co, _DTypeT_co]): def shape(self, value: _ShapeLike) -> None: ... @property def strides(self) -> _Shape: ... + @deprecated("Setting the strides on a NumPy array has been deprecated in NumPy 2.4") @strides.setter def strides(self, value: _ShapeLike) -> None: ... def byteswap(self, inplace: builtins.bool = ...) -> Self: ... diff --git a/numpy/_core/src/multiarray/getset.c b/numpy/_core/src/multiarray/getset.c index 8482b6006e3e..48da52dd3178 100644 --- a/numpy/_core/src/multiarray/getset.c +++ b/numpy/_core/src/multiarray/getset.c @@ -85,7 +85,7 @@ array_shape_set(PyArrayObject *self, PyObject *val, void* NPY_UNUSED(ignored)) /* Free old dimensions and strides */ npy_free_cache_dim_array(self); ((PyArrayObject_fields *)self)->nd = nd; - ((PyArrayObject_fields *)self)->dimensions = _dimensions; + ((PyArrayObject_fields *)self)->dimensions = _dimensions; ((PyArrayObject_fields *)self)->strides = _dimensions + nd; if (nd) { @@ -95,7 +95,7 @@ array_shape_set(PyArrayObject *self, PyObject *val, void* NPY_UNUSED(ignored)) } else { /* Free old dimensions and strides */ - npy_free_cache_dim_array(self); + npy_free_cache_dim_array(self); ((PyArrayObject_fields *)self)->nd = 0; ((PyArrayObject_fields *)self)->dimensions = NULL; ((PyArrayObject_fields *)self)->strides = NULL; @@ -116,6 +116,19 @@ array_strides_get(PyArrayObject *self, void *NPY_UNUSED(ignored)) static int array_strides_set(PyArrayObject *self, PyObject *obj, void *NPY_UNUSED(ignored)) { + if (obj == NULL) { + PyErr_SetString(PyExc_AttributeError, + "Cannot delete array strides"); + return -1; + } + + /* Deprecated NumPy 2.4, 2025-05-11 */ + if (DEPRECATE("Setting the strides on a NumPy array has been deprecated in NumPy 2.4.\n" + "As an alternative, you can create a new view using np.lib.stride_tricks.as_strided." + ) < 0 ) { + return -1; + } + PyArray_Dims newstrides = {NULL, -1}; PyArrayObject *new; npy_intp numbytes = 0; @@ -124,11 +137,6 @@ array_strides_set(PyArrayObject *self, PyObject *obj, void *NPY_UNUSED(ignored)) npy_intp upper_offset = 0; Py_buffer view; - if (obj == NULL) { - PyErr_SetString(PyExc_AttributeError, - "Cannot delete array strides"); - return -1; - } if (!PyArray_OptionalIntpConverter(obj, &newstrides) || newstrides.len == -1) { PyErr_SetString(PyExc_TypeError, "invalid strides"); diff --git a/numpy/_core/tests/test_api.py b/numpy/_core/tests/test_api.py index d427ac0399a2..bb21d79c472d 100644 --- a/numpy/_core/tests/test_api.py +++ b/numpy/_core/tests/test_api.py @@ -5,6 +5,7 @@ import numpy as np import numpy._core.umath as ncu from numpy._core._rational_tests import rational +from numpy.lib import stride_tricks from numpy.testing import ( HAS_REFCOUNT, assert_, @@ -558,7 +559,7 @@ def check_copy_result(x, y, ccontig, fcontig, strides=False): def test_contiguous_flags(): a = np.ones((4, 4, 1))[::2, :, :] - a.strides = a.strides[:2] + (-123,) + a = stride_tricks.as_strided(a, strides=a.strides[:2] + (-123,)) b = np.ones((2, 2, 1, 2, 2)).swapaxes(3, 4) def check_contig(a, ccontig, fcontig): diff --git a/numpy/_core/tests/test_deprecations.py b/numpy/_core/tests/test_deprecations.py index c4acbf9d2d69..7d4875d6d149 100644 --- a/numpy/_core/tests/test_deprecations.py +++ b/numpy/_core/tests/test_deprecations.py @@ -406,6 +406,13 @@ def __array_wrap__(self, arr): self.assert_deprecated(lambda: np.negative(test2)) assert test2.called +class TestDeprecatedArrayAttributeSetting(_DeprecationTestCase): + message = "Setting the .*on a NumPy array has been deprecated.*" + + def test_deprecated_strides_set(self): + x = np.eye(2) + self.assert_deprecated(setattr, args=(x, 'strides', x.strides)) + class TestDeprecatedDTypeParenthesizedRepeatCount(_DeprecationTestCase): message = "Passing in a parenthesized single number" diff --git a/numpy/_core/tests/test_half.py b/numpy/_core/tests/test_half.py index e2d6e6796db4..711c13655b7a 100644 --- a/numpy/_core/tests/test_half.py +++ b/numpy/_core/tests/test_half.py @@ -21,7 +21,7 @@ class TestHalf: def setup_method(self): # An array of all possible float16 values self.all_f16 = np.arange(0x10000, dtype=uint16) - self.all_f16.dtype = float16 + self.all_f16 = self.all_f16.view(float16) # NaN value can cause an invalid FP exception if HW is being used with np.errstate(invalid='ignore'): @@ -32,7 +32,7 @@ def setup_method(self): self.nonan_f16 = np.concatenate( (np.arange(0xfc00, 0x7fff, -1, dtype=uint16), np.arange(0x0000, 0x7c01, 1, dtype=uint16))) - self.nonan_f16.dtype = float16 + self.nonan_f16 = self.nonan_f16.view(float16) self.nonan_f32 = np.array(self.nonan_f16, dtype=float32) self.nonan_f64 = np.array(self.nonan_f16, dtype=float64) @@ -218,7 +218,7 @@ def test_half_values(self): 0x0001, 0x8001, 0x0000, 0x8000, 0x7c00, 0xfc00], dtype=uint16) - b.dtype = float16 + b = b.view(dtype=float16) assert_equal(a, b) def test_half_rounding(self): diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py index 81e55deb3daf..0faf35c64b98 100644 --- a/numpy/_core/tests/test_multiarray.py +++ b/numpy/_core/tests/test_multiarray.py @@ -29,6 +29,7 @@ from numpy._core.multiarray import _get_ndarray_c_version, dot from numpy._core.tests._locales import CommaDecimalPointLocale from numpy.exceptions import AxisError, ComplexWarning +from numpy.lib import stride_tricks from numpy.lib.recfunctions import repack_fields from numpy.testing import ( BLAS_SUPPORTS_FPE, @@ -382,7 +383,8 @@ def make_array(size, offset, strides): offset=offset * x.itemsize) except Exception as e: raise RuntimeError(e) - r.strides = strides = strides * x.itemsize + with pytest.warns(DeprecationWarning): + r.strides = strides * x.itemsize return r assert_equal(make_array(4, 4, -1), np.array([4, 3, 2, 1])) @@ -392,24 +394,28 @@ def make_array(size, offset, strides): assert_raises(RuntimeError, make_array, 8, 3, 1) # Check that the true extent of the array is used. # Test relies on as_strided base not exposing a buffer. - x = np.lib.stride_tricks.as_strided(np.arange(1), (10, 10), (0, 0)) + x = stride_tricks.as_strided(np.arange(1), (10, 10), (0, 0)) def set_strides(arr, strides): - arr.strides = strides + with pytest.warns(DeprecationWarning): + arr.strides = strides assert_raises(ValueError, set_strides, x, (10 * x.itemsize, x.itemsize)) # Test for offset calculations: - x = np.lib.stride_tricks.as_strided(np.arange(10, dtype=np.int8)[-1], + x = stride_tricks.as_strided(np.arange(10, dtype=np.int8)[-1], shape=(10,), strides=(-1,)) assert_raises(ValueError, set_strides, x[::-1], -1) a = x[::-1] - a.strides = 1 - a[::2].strides = 2 + with pytest.warns(DeprecationWarning): + a.strides = 1 + with pytest.warns(DeprecationWarning): + a[::2].strides = 2 # test 0d arr_0d = np.array(0) - arr_0d.strides = () + with pytest.warns(DeprecationWarning): + arr_0d.strides = () assert_raises(TypeError, set_strides, arr_0d, None) def test_fill(self): @@ -3635,7 +3641,7 @@ def test_ravel(self): a = a.reshape(2, 1, 2, 2).swapaxes(-1, -2) strides = list(a.strides) strides[1] = 123 - a.strides = strides + a = stride_tricks.as_strided(a, strides=strides) assert_(a.ravel(order='K').flags.owndata) assert_equal(a.ravel('K'), np.arange(0, 15, 2)) @@ -3644,7 +3650,7 @@ def test_ravel(self): a = a.reshape(2, 1, 2, 2).swapaxes(-1, -2) strides = list(a.strides) strides[1] = 123 - a.strides = strides + a = stride_tricks.as_strided(a, strides=strides) assert_(np.may_share_memory(a.ravel(order='K'), a)) assert_equal(a.ravel(order='K'), np.arange(2**3)) @@ -3657,7 +3663,7 @@ def test_ravel(self): # 1-element tidy strides test: a = np.array([[1]]) - a.strides = (123, 432) + a = stride_tricks.as_strided(a, strides=(123, 432)) if np.ones(1).strides == (8,): assert_(np.may_share_memory(a.ravel('K'), a)) assert_equal(a.ravel('K').strides, (a.dtype.itemsize,)) @@ -4546,7 +4552,8 @@ def test_datetime64_byteorder(self): original = np.array([['2015-02-24T00:00:00.000000000']], dtype='datetime64[ns]') original_byte_reversed = original.copy(order='K') - original_byte_reversed.dtype = original_byte_reversed.dtype.newbyteorder('S') + new_dtype = original_byte_reversed.dtype.newbyteorder('S') + original_byte_reversed = original_byte_reversed.view(dtype=new_dtype) original_byte_reversed.byteswap(inplace=True) new = pickle.loads(pickle.dumps(original_byte_reversed)) @@ -8366,10 +8373,13 @@ def test_padded_struct_array(self): self._check_roundtrip(x3) @pytest.mark.valgrind_error(reason="leaks buffer info cache temporarily.") - def test_relaxed_strides(self, c=np.ones((1, 10, 10), dtype='i8')): # noqa: B008 + def test_relaxed_strides(self, c=stride_tricks.as_strided( # noqa: B008 + np.ones((1, 10, 10), dtype='i8'), # noqa: B008 + strides=(-1, 80, 8) + ) + ): # Note: c defined as parameter so that it is persistent and leak # checks will notice gh-16934 (buffer info cache leak). - c.strides = (-1, 80, 8) # strides need to be fixed at export assert_(memoryview(c).strides == (800, 80, 8)) diff --git a/numpy/_core/tests/test_nditer.py b/numpy/_core/tests/test_nditer.py index a29a49bfb71a..f71130f16331 100644 --- a/numpy/_core/tests/test_nditer.py +++ b/numpy/_core/tests/test_nditer.py @@ -858,7 +858,7 @@ def test_iter_nbo_align_contig(): # Unaligned input a = np.zeros((6 * 4 + 1,), dtype='i1')[1:] - a.dtype = 'f4' + a = a.view('f4') a[:] = np.arange(6, dtype='f4') assert_(not a.flags.aligned) # Without 'aligned', shouldn't copy @@ -1803,7 +1803,7 @@ def test_iter_buffering(): arrays.append(np.arange(10, dtype='f4')) # Unaligned array a = np.zeros((4 * 16 + 1,), dtype='i1')[1:] - a.dtype = 'i4' + a = a.view('i4') a[:] = np.arange(16, dtype='i4') arrays.append(a) # 4-D F-order array diff --git a/numpy/_core/tests/test_regression.py b/numpy/_core/tests/test_regression.py index fbfa9311a1dc..3d44728aafaa 100644 --- a/numpy/_core/tests/test_regression.py +++ b/numpy/_core/tests/test_regression.py @@ -12,6 +12,7 @@ import numpy as np from numpy._utils import asbytes, asunicode from numpy.exceptions import AxisError, ComplexWarning +from numpy.lib.stride_tricks import as_strided from numpy.testing import ( HAS_REFCOUNT, IS_64BIT, @@ -208,7 +209,7 @@ def test_mem_dot(self): # Dummy array to detect bad memory access: _z = np.ones(10) _dummy = np.empty((0, 10)) - z = np.lib.stride_tricks.as_strided(_z, _dummy.shape, _dummy.strides) + z = as_strided(_z, _dummy.shape, _dummy.strides) np.dot(x, np.transpose(y), out=z) assert_equal(_z, np.ones(10)) # Do the same for the built-in dot: @@ -438,19 +439,16 @@ def test_lexsort_zerolen_custom_strides(self): xs = np.array([], dtype='i8') assert np.lexsort((xs,)).shape[0] == 0 # Works - xs.strides = (16,) + xs = as_strided(xs, strides=(16,)) assert np.lexsort((xs,)).shape[0] == 0 # Was: MemoryError def test_lexsort_zerolen_custom_strides_2d(self): xs = np.array([], dtype='i8') + xt = as_strided(xs, shape=(0, 2), strides=(16, 16)) + assert np.lexsort((xt,), axis=0).shape[0] == 0 - xs.shape = (0, 2) - xs.strides = (16, 16) - assert np.lexsort((xs,), axis=0).shape[0] == 0 - - xs.shape = (2, 0) - xs.strides = (16, 16) - assert np.lexsort((xs,), axis=0).shape[0] == 2 + xt = as_strided(xs, shape=(2, 0), strides=(16, 16)) + assert np.lexsort((xt,), axis=0).shape[0] == 2 def test_lexsort_invalid_axis(self): assert_raises(AxisError, np.lexsort, (np.arange(1),), axis=2) @@ -644,7 +642,7 @@ def test_reshape_order(self): def test_reshape_zero_strides(self): # Issue #380, test reshaping of zero strided arrays a = np.ones(1) - a = np.lib.stride_tricks.as_strided(a, shape=(5,), strides=(0,)) + a = as_strided(a, shape=(5,), strides=(0,)) assert_(a.reshape(5, 1).strides[0] == 0) def test_reshape_zero_size(self): @@ -1654,7 +1652,7 @@ def test_eq_string_and_object_array(self): def test_nonzero_byteswap(self): a = np.array([0x80000000, 0x00000080, 0], dtype=np.uint32) - a.dtype = np.float32 + a = a.view(np.float32) assert_equal(a.nonzero()[0], [1]) a = a.byteswap() a = a.view(a.dtype.newbyteorder()) @@ -1878,7 +1876,8 @@ def test_alignment_update(self): # Check that alignment flag is updated on stride setting a = np.arange(10) assert_(a.flags.aligned) - a.strides = 3 + with pytest.warns(DeprecationWarning): + a.strides = 3 assert_(not a.flags.aligned) def test_ticket_1770(self): diff --git a/numpy/lib/_npyio_impl.py b/numpy/lib/_npyio_impl.py index f284eeb74834..36ead97a1aae 100644 --- a/numpy/lib/_npyio_impl.py +++ b/numpy/lib/_npyio_impl.py @@ -1730,7 +1730,7 @@ def fromregex(file, regexp, dtype, encoding=None): # re-interpret as a single-field structured array. newdtype = np.dtype(dtype[dtype.names[0]]) output = np.array(seq, dtype=newdtype) - output.dtype = dtype + output = output.view(dtype) else: output = np.array(seq, dtype=dtype) From 20d034fff6931b97780e9f22172309d81a5e8322 Mon Sep 17 00:00:00 2001 From: Koki Watanabe <56009584+math-hiyoko@users.noreply.github.com> Date: Fri, 20 Jun 2025 22:56:31 +0900 Subject: [PATCH 22/26] ENH: np.unique: support hash based unique for string dtype (#28767) * Support NPY_STRING, NPY_UNICODE * unique for NPY_STRING and NPY_UNICODE * fix construct array * remove unneccessary include * refactor * refactoring * comment * feature: unique for NPY_VSTRING * refactoring * remove unneccessary include * add test * add error message * linter * linter * reserve bucket * remove emoji from testcase * fix testcase * remove error * fix testcase * fix testcase name * use basic_string * fix testcase * add ValueError * fix testcase * fix memory error * remove multibyte char * refactoring * add multibyte char * refactoring * fix memory error * fix GIL * fix strlen * remove PyArray_GETPTR1 * refactoring * refactoring * use optional * refactoring * refactoring * refactoring * refactoring * fix comment * linter * add doc * DOC: fix * DOC: fix format * MNT: refactoring * MNT: refactoring * ENH: Store pointers to strings in the set instead of the strings themselves. * FIX: length in memcmp * ENH: refactoring * DOC: 49sec -> 34sec * Update numpy/lib/_arraysetops_impl.py Co-authored-by: Nathan Goldbaum * DOC: Mention that hash-based np.unique returns unsorted strings * ENH: support medium and long vstrings * FIX: comment * ENH: use RAII wrapper * FIX: error handling of string packing * FIX: error handling of string packing * FIX: change default bucket size * FIX: include * FIX: cast * ENH: support equal_nan=False * FIX: function equal * FIX: check the case if pack_status douesn't return NULL * FIX: check the case if pack_status douesn't return NULL * FIX: stderr * ENH: METH_VARARGS -> METH_FASTCALL * FIX: log * FIX: release allocator * FIX: comment * FIX: delete log * ENH: implemented FNV-1a as hash function * bool -> npy_bool * FIX: cast * 34sec -> 35.1sec * fix: lint * fix: cast using const void * * fix: fix fnv1a hash * fix: lint * 35.1sec -> 33.5sec * enh: define macro HASH_TABLE_INITIAL_BUCKETS * enh: error handling of NpyString_load * enh: delete comments on GIL * fix: PyErr_SetString when NpyString_load failed * fix: PyErr_SetString -> npy_gil_error --------- Co-authored-by: Nathan Goldbaum --- doc/release/upcoming_changes/28767.change.rst | 10 + .../upcoming_changes/28767.performance.rst | 10 + numpy/_core/meson.build | 1 + numpy/_core/src/multiarray/fnv.c | 85 ++++ numpy/_core/src/multiarray/fnv.h | 26 ++ numpy/_core/src/multiarray/multiarraymodule.c | 2 +- numpy/_core/src/multiarray/unique.cpp | 368 ++++++++++++++---- numpy/_core/src/multiarray/unique.h | 3 +- numpy/lib/_arraysetops_impl.py | 3 +- numpy/lib/tests/test_arraysetops.py | 186 ++++++++- 10 files changed, 605 insertions(+), 89 deletions(-) create mode 100644 doc/release/upcoming_changes/28767.change.rst create mode 100644 doc/release/upcoming_changes/28767.performance.rst create mode 100644 numpy/_core/src/multiarray/fnv.c create mode 100644 numpy/_core/src/multiarray/fnv.h diff --git a/doc/release/upcoming_changes/28767.change.rst b/doc/release/upcoming_changes/28767.change.rst new file mode 100644 index 000000000000..ec173c3672b0 --- /dev/null +++ b/doc/release/upcoming_changes/28767.change.rst @@ -0,0 +1,10 @@ +``unique_values`` for string dtypes may return unsorted data +------------------------------------------------------------ +np.unique now supports hash‐based duplicate removal for string dtypes. +This enhancement extends the hash-table algorithm to byte strings ('S'), +Unicode strings ('U'), and the experimental string dtype ('T', StringDType). +As a result, calling np.unique() on an array of strings will use +the faster hash-based method to obtain unique values. +Note that this hash-based method does not guarantee that the returned unique values will be sorted. +This also works for StringDType arrays containing None (missing values) +when using equal_nan=True (treating missing values as equal). diff --git a/doc/release/upcoming_changes/28767.performance.rst b/doc/release/upcoming_changes/28767.performance.rst new file mode 100644 index 000000000000..ef8ac1c3a45d --- /dev/null +++ b/doc/release/upcoming_changes/28767.performance.rst @@ -0,0 +1,10 @@ +Performance improvements to ``np.unique`` for string dtypes +----------------------------------------------------------- +The hash-based algorithm for unique extraction provides +an order-of-magnitude speedup on large string arrays. +In an internal benchmark with about 1 billion string elements, +the hash-based np.unique completed in roughly 33.5 seconds, +compared to 498 seconds with the sort-based method +– about 15× faster for unsorted unique operations on strings. +This improvement greatly reduces the time to find unique values +in very large string datasets. diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build index cd46a20b0246..6098986618e4 100644 --- a/numpy/_core/meson.build +++ b/numpy/_core/meson.build @@ -1207,6 +1207,7 @@ src_multiarray = multiarray_gen_headers + [ # Remove this `arm64_exports.c` file once scipy macos arm64 build correctly # links to the arm64 npymath library, see gh-22673 'src/npymath/arm64_exports.c', + 'src/multiarray/fnv.c', ] src_umath = umath_gen_headers + [ diff --git a/numpy/_core/src/multiarray/fnv.c b/numpy/_core/src/multiarray/fnv.c new file mode 100644 index 000000000000..2b7848519e61 --- /dev/null +++ b/numpy/_core/src/multiarray/fnv.c @@ -0,0 +1,85 @@ +/* + FNV-1a hash algorithm implementation + Based on the implementation from: + https://github.com/lcn2/fnv +*/ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE + +#include +#include "numpy/npy_common.h" +#include "fnv.h" + + +#define FNV1A_32_INIT ((npy_uint32)0x811c9dc5) +#define FNV1A_64_INIT ((npy_uint64)0xcbf29ce484222325ULL) + +/* + Compute a 32-bit FNV-1a hash of buffer + original implementation from: + https://github.com/lcn2/fnv/blob/b7fcbee95538ee6a15744e756e7e7f1c02862cb0/hash_32a.c +*/ +npy_uint32 +npy_fnv1a_32(const void *buf, size_t len, npy_uint32 hval) +{ + const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */ + const unsigned char *be = bp + len; /* beyond end of buffer */ + + /* + FNV-1a hash each octet in the buffer + */ + while (bp < be) { + + /* xor the bottom with the current octet */ + hval ^= (npy_uint32)*bp++; + + /* multiply by the 32 bit FNV magic prime */ + /* hval *= 0x01000193; */ + hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); + } + + return hval; +} + +/* + Compute a 64-bit FNV-1a hash of the given data + original implementation from: + https://github.com/lcn2/fnv/blob/b7fcbee95538ee6a15744e756e7e7f1c02862cb0/hash_64a.c +*/ +npy_uint64 +npy_fnv1a_64(const void *buf, size_t len, npy_uint64 hval) +{ + const unsigned char *bp = (const unsigned char *)buf; /* start of buffer */ + const unsigned char *be = bp + len; /* beyond end of buffer */ + + /* + FNV-1a hash each octet in the buffer + */ + while (bp < be) { + + /* xor the bottom with the current octet */ + hval ^= (npy_uint64)*bp++; + + /* multiply by the 64 bit FNV magic prime */ + /* hval *= 0x100000001b3ULL; */ + hval += (hval << 1) + (hval << 4) + (hval << 5) + + (hval << 7) + (hval << 8) + (hval << 40); + } + + return hval; +} + +/* + * Compute a size_t FNV-1a hash of the given data + * This will use 32-bit or 64-bit hash depending on the size of size_t + */ +size_t +npy_fnv1a(const void *buf, size_t len) +{ +#if NPY_SIZEOF_SIZE_T == 8 + return (size_t)npy_fnv1a_64(buf, len, FNV1A_64_INIT); +#else /* NPY_SIZEOF_SIZE_T == 4 */ + return (size_t)npy_fnv1a_32(buf, len, FNV1A_32_INIT); +#endif +} diff --git a/numpy/_core/src/multiarray/fnv.h b/numpy/_core/src/multiarray/fnv.h new file mode 100644 index 000000000000..c76f54a645b9 --- /dev/null +++ b/numpy/_core/src/multiarray/fnv.h @@ -0,0 +1,26 @@ +/* + FNV-1a hash algorithm implementation + Based on the implementation from: + https://github.com/lcn2/fnv +*/ + +#ifndef NUMPY_CORE_INCLUDE_NUMPY_MULTIARRAY_FNV_H_ +#define NUMPY_CORE_INCLUDE_NUMPY_MULTIARRAY_FNV_H_ + + +/* + Compute a size_t FNV-1a hash of the given data + This will use 32-bit or 64-bit hash depending on the size of size_t + + Parameters: + ----------- + buf - pointer to the data to be hashed + len - length of the data in bytes + + Returns: + ----------- + size_t hash value +*/ +size_t npy_fnv1a(const void *buf, size_t len); + +#endif // NUMPY_CORE_INCLUDE_NUMPY_MULTIARRAY_FNV_H_ diff --git a/numpy/_core/src/multiarray/multiarraymodule.c b/numpy/_core/src/multiarray/multiarraymodule.c index d4766b5af7b4..e80c6c0cd45c 100644 --- a/numpy/_core/src/multiarray/multiarraymodule.c +++ b/numpy/_core/src/multiarray/multiarraymodule.c @@ -4579,7 +4579,7 @@ static struct PyMethodDef array_module_methods[] = { {"from_dlpack", (PyCFunction)from_dlpack, METH_FASTCALL | METH_KEYWORDS, NULL}, {"_unique_hash", (PyCFunction)array__unique_hash, - METH_O, "Collect unique values via a hash map."}, + METH_FASTCALL | METH_KEYWORDS, "Collect unique values via a hash map."}, {NULL, NULL, 0, NULL} /* sentinel */ }; diff --git a/numpy/_core/src/multiarray/unique.cpp b/numpy/_core/src/multiarray/unique.cpp index f36acfdef49a..636f1ef0137c 100644 --- a/numpy/_core/src/multiarray/unique.cpp +++ b/numpy/_core/src/multiarray/unique.cpp @@ -1,13 +1,21 @@ #define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE +#define HASH_TABLE_INITIAL_BUCKETS 1024 #include -#include +#include +#include #include +#include #include #include "numpy/arrayobject.h" +#include "gil_utils.h" +extern "C" { + #include "fnv.h" + #include "npy_argparse.h" +} // This is to use RAII pattern to handle cpp exceptions while avoiding memory leaks. // Adapted from https://stackoverflow.com/a/25510879/2536294 @@ -18,77 +26,128 @@ struct FinalAction { private: F clean_; }; - template FinalAction finally(F f) { return FinalAction(f); } -template +template static PyObject* -unique(PyArrayObject *self) +unique_integer(PyArrayObject *self, npy_bool equal_nan) { - /* This function takes a numpy array and returns a numpy array containing - the unique values. - - It assumes the numpy array includes data that can be viewed as unsigned integers - of a certain size (sizeof(T)). - - It doesn't need to know the actual type, since it needs to find unique values - among binary representations of the input data. This means it won't apply to - custom or complicated dtypes or string values. + /* + * Returns a new NumPy array containing the unique values of the input array of integer. + * This function uses hashing to identify uniqueness efficiently. */ NPY_ALLOW_C_API_DEF; - std::unordered_set hashset; - - NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY | - NPY_ITER_EXTERNAL_LOOP | - NPY_ITER_REFS_OK | - NPY_ITER_ZEROSIZE_OK | - NPY_ITER_GROWINNER, - NPY_KEEPORDER, NPY_NO_CASTING, - NULL); - // Making sure the iterator is deallocated when the function returns, with - // or w/o an exception - auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); }); - if (iter == NULL) { - return NULL; + NPY_ALLOW_C_API; + PyArray_Descr *descr = PyArray_DESCR(self); + Py_INCREF(descr); + NPY_DISABLE_C_API; + + PyThreadState *_save1 = PyEval_SaveThread(); + + // number of elements in the input array + npy_intp isize = PyArray_SIZE(self); + + // Reserve hashset capacity in advance to minimize reallocations and collisions. + // We use min(isize, HASH_TABLE_INITIAL_BUCKETS) as the initial bucket count: + // - Reserving for all elements (isize) may over-allocate when there are few unique values. + // - Using a moderate upper bound HASH_TABLE_INITIAL_BUCKETS(1024) keeps memory usage reasonable (4 KiB for pointers). + // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631 + std::unordered_set hashset(std::min(isize, (npy_intp)HASH_TABLE_INITIAL_BUCKETS)); + + // Input array is one-dimensional, enabling efficient iteration using strides. + char *idata = PyArray_BYTES(self); + npy_intp istride = PyArray_STRIDES(self)[0]; + for (npy_intp i = 0; i < isize; i++, idata += istride) { + hashset.insert(*(T *)idata); } - NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL); - if (iternext == NULL) { + npy_intp length = hashset.size(); + + PyEval_RestoreThread(_save1); + NPY_ALLOW_C_API; + PyObject *res_obj = PyArray_NewFromDescr( + &PyArray_Type, + descr, + 1, // ndim + &length, // shape + NULL, // strides + NULL, // data + // This flag is needed to be able to call .sort on it. + NPY_ARRAY_WRITEABLE, // flags + NULL // obj + ); + + if (res_obj == NULL) { return NULL; } - char **dataptr = NpyIter_GetDataPtrArray(iter); - npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter); - npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter); - - // release the GIL - PyThreadState *_save; - _save = PyEval_SaveThread(); - // Making sure the GIL is re-acquired when the function returns, with - // or w/o an exception - auto grab_gil = finally([&]() { PyEval_RestoreThread(_save); }); - // first we put the data in a hash map - - if (NpyIter_GetIterSize(iter) > 0) { - do { - char* data = *dataptr; - npy_intp stride = *strideptr; - npy_intp count = *innersizeptr; - - while (count--) { - hashset.insert(*((T *) data)); - data += stride; - } - } while (iternext(iter)); + NPY_DISABLE_C_API; + PyThreadState *_save2 = PyEval_SaveThread(); + auto save2_dealloc = finally([&]() { + PyEval_RestoreThread(_save2); + }); + + char *odata = PyArray_BYTES((PyArrayObject *)res_obj); + npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0]; + // Output array is one-dimensional, enabling efficient iteration using strides. + for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) { + *(T *)odata = *it; } - npy_intp length = hashset.size(); + return res_obj; +} +template +static PyObject* +unique_string(PyArrayObject *self, npy_bool equal_nan) +{ + /* + * Returns a new NumPy array containing the unique values of the input array of fixed size strings. + * This function uses hashing to identify uniqueness efficiently. + */ + NPY_ALLOW_C_API_DEF; NPY_ALLOW_C_API; PyArray_Descr *descr = PyArray_DESCR(self); Py_INCREF(descr); + NPY_DISABLE_C_API; + + PyThreadState *_save1 = PyEval_SaveThread(); + + // number of elements in the input array + npy_intp isize = PyArray_SIZE(self); + + // variables for the string + npy_intp itemsize = descr->elsize; + npy_intp num_chars = itemsize / sizeof(T); + auto hash = [num_chars](const T *value) -> size_t { + return npy_fnv1a(value, num_chars * sizeof(T)); + }; + auto equal = [itemsize](const T *lhs, const T *rhs) -> bool { + return std::memcmp(lhs, rhs, itemsize) == 0; + }; + + // Reserve hashset capacity in advance to minimize reallocations and collisions. + // We use min(isize, HASH_TABLE_INITIAL_BUCKETS) as the initial bucket count: + // - Reserving for all elements (isize) may over-allocate when there are few unique values. + // - Using a moderate upper bound HASH_TABLE_INITIAL_BUCKETS(1024) keeps memory usage reasonable (4 KiB for pointers). + // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631 + std::unordered_set hashset( + std::min(isize, (npy_intp)HASH_TABLE_INITIAL_BUCKETS), hash, equal + ); + + // Input array is one-dimensional, enabling efficient iteration using strides. + char *idata = PyArray_BYTES(self); + npy_intp istride = PyArray_STRIDES(self)[0]; + for (npy_intp i = 0; i < isize; i++, idata += istride) { + hashset.insert((T *)idata); + } + + npy_intp length = hashset.size(); + + PyEval_RestoreThread(_save1); + NPY_ALLOW_C_API; PyObject *res_obj = PyArray_NewFromDescr( &PyArray_Type, descr, @@ -100,18 +159,147 @@ unique(PyArrayObject *self) NPY_ARRAY_WRITEABLE, // flags NULL // obj ); + + if (res_obj == NULL) { + return NULL; + } NPY_DISABLE_C_API; + PyThreadState *_save2 = PyEval_SaveThread(); + auto save2_dealloc = finally([&]() { + PyEval_RestoreThread(_save2); + }); + + char *odata = PyArray_BYTES((PyArrayObject *)res_obj); + npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0]; + // Output array is one-dimensional, enabling efficient iteration using strides. + for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) { + std::memcpy(odata, *it, itemsize); + } + + return res_obj; +} + +static PyObject* +unique_vstring(PyArrayObject *self, npy_bool equal_nan) +{ + /* + * Returns a new NumPy array containing the unique values of the input array. + * This function uses hashing to identify uniqueness efficiently. + */ + NPY_ALLOW_C_API_DEF; + NPY_ALLOW_C_API; + PyArray_Descr *descr = PyArray_DESCR(self); + Py_INCREF(descr); + NPY_DISABLE_C_API; + + PyThreadState *_save1 = PyEval_SaveThread(); + + // number of elements in the input array + npy_intp isize = PyArray_SIZE(self); + + // variables for the vstring + npy_string_allocator *in_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)descr); + auto hash = [equal_nan](const npy_static_string *value) -> size_t { + if (value->buf == NULL) { + if (equal_nan) { + return 0; + } else { + return std::hash{}(value); + } + } + return npy_fnv1a(value->buf, value->size * sizeof(char)); + }; + auto equal = [equal_nan](const npy_static_string *lhs, const npy_static_string *rhs) -> bool { + if (lhs->buf == NULL && rhs->buf == NULL) { + if (equal_nan) { + return true; + } else { + return lhs == rhs; + } + } + if (lhs->buf == NULL || rhs->buf == NULL) { + return false; + } + if (lhs->size != rhs->size) { + return false; + } + return std::memcmp(lhs->buf, rhs->buf, lhs->size) == 0; + }; + // Reserve hashset capacity in advance to minimize reallocations and collisions. + // We use min(isize, HASH_TABLE_INITIAL_BUCKETS) as the initial bucket count: + // - Reserving for all elements (isize) may over-allocate when there are few unique values. + // - Using a moderate upper bound HASH_TABLE_INITIAL_BUCKETS(1024) keeps memory usage reasonable (4 KiB for pointers). + // See discussion: https://github.com/numpy/numpy/pull/28767#discussion_r2064267631 + std::unordered_set hashset( + std::min(isize, (npy_intp)HASH_TABLE_INITIAL_BUCKETS), hash, equal + ); + + // Input array is one-dimensional, enabling efficient iteration using strides. + char *idata = PyArray_BYTES(self); + npy_intp istride = PyArray_STRIDES(self)[0]; + // unpacked_strings need to be allocated outside of the loop because of the lifetime problem. + std::vector unpacked_strings(isize, {0, NULL}); + for (npy_intp i = 0; i < isize; i++, idata += istride) { + npy_packed_static_string *packed_string = (npy_packed_static_string *)idata; + int is_null = NpyString_load(in_allocator, packed_string, &unpacked_strings[i]); + if (is_null == -1) { + npy_gil_error(PyExc_RuntimeError, + "Failed to load string from packed static string. "); + return NULL; + } + hashset.insert(&unpacked_strings[i]); + } + + NpyString_release_allocator(in_allocator); + + npy_intp length = hashset.size(); + + PyEval_RestoreThread(_save1); + NPY_ALLOW_C_API; + PyObject *res_obj = PyArray_NewFromDescr( + &PyArray_Type, + descr, + 1, // ndim + &length, // shape + NULL, // strides + NULL, // data + // This flag is needed to be able to call .sort on it. + NPY_ARRAY_WRITEABLE, // flags + NULL // obj + ); if (res_obj == NULL) { return NULL; } + PyArray_Descr *res_descr = PyArray_DESCR((PyArrayObject *)res_obj); + Py_INCREF(res_descr); + NPY_DISABLE_C_API; + + PyThreadState *_save2 = PyEval_SaveThread(); + auto save2_dealloc = finally([&]() { + PyEval_RestoreThread(_save2); + }); + + npy_string_allocator *out_allocator = NpyString_acquire_allocator((PyArray_StringDTypeObject *)res_descr); + auto out_allocator_dealloc = finally([&]() { + NpyString_release_allocator(out_allocator); + }); - // then we iterate through the map's keys to get the unique values - T* data = (T *)PyArray_DATA((PyArrayObject *)res_obj); - auto it = hashset.begin(); - size_t i = 0; - for (; it != hashset.end(); it++, i++) { - data[i] = *it; + char *odata = PyArray_BYTES((PyArrayObject *)res_obj); + npy_intp ostride = PyArray_STRIDES((PyArrayObject *)res_obj)[0]; + // Output array is one-dimensional, enabling efficient iteration using strides. + for (auto it = hashset.begin(); it != hashset.end(); it++, odata += ostride) { + npy_packed_static_string *packed_string = (npy_packed_static_string *)odata; + int pack_status = 0; + if ((*it)->buf == NULL) { + pack_status = NpyString_pack_null(out_allocator, packed_string); + } else { + pack_status = NpyString_pack(out_allocator, packed_string, (*it)->buf, (*it)->size); + } + if (pack_status == -1) { + // string packing failed + return NULL; + } } return res_obj; @@ -119,27 +307,30 @@ unique(PyArrayObject *self) // this map contains the functions used for each item size. -typedef std::function function_type; +typedef std::function function_type; std::unordered_map unique_funcs = { - {NPY_BYTE, unique}, - {NPY_UBYTE, unique}, - {NPY_SHORT, unique}, - {NPY_USHORT, unique}, - {NPY_INT, unique}, - {NPY_UINT, unique}, - {NPY_LONG, unique}, - {NPY_ULONG, unique}, - {NPY_LONGLONG, unique}, - {NPY_ULONGLONG, unique}, - {NPY_INT8, unique}, - {NPY_INT16, unique}, - {NPY_INT32, unique}, - {NPY_INT64, unique}, - {NPY_UINT8, unique}, - {NPY_UINT16, unique}, - {NPY_UINT32, unique}, - {NPY_UINT64, unique}, - {NPY_DATETIME, unique}, + {NPY_BYTE, unique_integer}, + {NPY_UBYTE, unique_integer}, + {NPY_SHORT, unique_integer}, + {NPY_USHORT, unique_integer}, + {NPY_INT, unique_integer}, + {NPY_UINT, unique_integer}, + {NPY_LONG, unique_integer}, + {NPY_ULONG, unique_integer}, + {NPY_LONGLONG, unique_integer}, + {NPY_ULONGLONG, unique_integer}, + {NPY_INT8, unique_integer}, + {NPY_INT16, unique_integer}, + {NPY_INT32, unique_integer}, + {NPY_INT64, unique_integer}, + {NPY_UINT8, unique_integer}, + {NPY_UINT16, unique_integer}, + {NPY_UINT32, unique_integer}, + {NPY_UINT64, unique_integer}, + {NPY_DATETIME, unique_integer}, + {NPY_STRING, unique_string}, + {NPY_UNICODE, unique_string}, + {NPY_VSTRING, unique_vstring}, }; @@ -154,14 +345,21 @@ std::unordered_map unique_funcs = { * type is unsupported or `NULL` with an error set. */ extern "C" NPY_NO_EXPORT PyObject * -array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *arr_obj) +array__unique_hash(PyObject *NPY_UNUSED(module), + PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) { - if (!PyArray_Check(arr_obj)) { - PyErr_SetString(PyExc_TypeError, - "_unique_hash() requires a NumPy array input."); + PyArrayObject *arr = NULL; + npy_bool equal_nan = NPY_TRUE; // default to True + + NPY_PREPARE_ARGPARSER; + if (npy_parse_arguments("_unique_hash", args, len_args, kwnames, + "arr", &PyArray_Converter, &arr, + "|equal_nan", &PyArray_BoolConverter, &equal_nan, + NULL, NULL, NULL + ) < 0 + ) { return NULL; } - PyArrayObject *arr = (PyArrayObject *)arr_obj; try { auto type = PyArray_TYPE(arr); @@ -170,7 +368,7 @@ array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *arr_obj) Py_RETURN_NOTIMPLEMENTED; } - return unique_funcs[type](arr); + return unique_funcs[type](arr, equal_nan); } catch (const std::bad_alloc &e) { PyErr_NoMemory(); diff --git a/numpy/_core/src/multiarray/unique.h b/numpy/_core/src/multiarray/unique.h index 3e258405e8f4..7b3fb143ada4 100644 --- a/numpy/_core/src/multiarray/unique.h +++ b/numpy/_core/src/multiarray/unique.h @@ -5,7 +5,8 @@ extern "C" { #endif -PyObject* array__unique_hash(PyObject *NPY_UNUSED(dummy), PyObject *args); +PyObject* array__unique_hash(PyObject *NPY_UNUSED(dummy), + PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames); #ifdef __cplusplus } diff --git a/numpy/lib/_arraysetops_impl.py b/numpy/lib/_arraysetops_impl.py index ef0739ba486f..c4788385b924 100644 --- a/numpy/lib/_arraysetops_impl.py +++ b/numpy/lib/_arraysetops_impl.py @@ -368,7 +368,8 @@ def _unique1d(ar, return_index=False, return_inverse=False, conv = _array_converter(ar) ar_, = conv - if (hash_unique := _unique_hash(ar_)) is not NotImplemented: + if (hash_unique := _unique_hash(ar_, equal_nan=equal_nan)) \ + is not NotImplemented: if sorted: hash_unique.sort() # We wrap the result back in case it was a subclass of numpy.ndarray. diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 7865e1b16ee9..b3e2bfa279b0 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -5,6 +5,7 @@ import numpy as np from numpy import ediff1d, intersect1d, isin, setdiff1d, setxor1d, union1d, unique +from numpy.dtypes import StringDType from numpy.exceptions import AxisError from numpy.testing import ( assert_array_equal, @@ -813,7 +814,9 @@ def test_unique_1d(self): def test_unique_zero_sized(self): # test for zero-sized arrays - for dt in self.get_types(): + types = self.get_types() + types.extend('SU') + for dt in types: a = np.array([], dt) b = np.array([], dt) i1 = np.array([], np.int64) @@ -838,6 +841,187 @@ class Subclass(np.ndarray): bb = Subclass(b.shape, dtype=dt, buffer=b) self.check_all(aa, bb, i1, i2, c, dt) + def test_unique_byte_string_hash_based(self): + # test for byte string arrays + arr = ['apple', 'banana', 'apple', 'cherry', 'date', 'banana', 'fig', 'grape'] + unq_sorted = ['apple', 'banana', 'cherry', 'date', 'fig', 'grape'] + + a1 = unique(arr, sorted=False) + # the result varies depending on the impl of std::unordered_set, + # so we check them by sorting + assert_array_equal(sorted(a1.tolist()), unq_sorted) + + def test_unique_unicode_string_hash_based(self): + # test for unicode string arrays + arr = [ + 'café', 'cafe', 'café', 'naïve', 'naive', + 'résumé', 'naïve', 'resume', 'résumé', + ] + unq_sorted = ['cafe', 'café', 'naive', 'naïve', 'resume', 'résumé'] + + a1 = unique(arr, sorted=False) + # the result varies depending on the impl of std::unordered_set, + # so we check them by sorting + assert_array_equal(sorted(a1.tolist()), unq_sorted) + + def test_unique_vstring_hash_based_equal_nan(self): + # test for unicode and nullable string arrays (equal_nan=True) + a = np.array([ + # short strings + 'straße', + None, + 'strasse', + 'straße', + None, + 'niño', + 'nino', + 'élève', + 'eleve', + 'niño', + 'élève', + # medium strings + 'b' * 20, + 'ß' * 30, + None, + 'é' * 30, + 'e' * 20, + 'ß' * 30, + 'n' * 30, + 'ñ' * 20, + None, + 'e' * 20, + 'ñ' * 20, + # long strings + 'b' * 300, + 'ß' * 400, + None, + 'é' * 400, + 'e' * 300, + 'ß' * 400, + 'n' * 400, + 'ñ' * 300, + None, + 'e' * 300, + 'ñ' * 300, + ], + dtype=StringDType(na_object=None) + ) + unq_sorted_wo_none = [ + 'b' * 20, + 'b' * 300, + 'e' * 20, + 'e' * 300, + 'eleve', + 'nino', + 'niño', + 'n' * 30, + 'n' * 400, + 'strasse', + 'straße', + 'ß' * 30, + 'ß' * 400, + 'élève', + 'é' * 30, + 'é' * 400, + 'ñ' * 20, + 'ñ' * 300, + ] + + a1 = unique(a, sorted=False, equal_nan=True) + # the result varies depending on the impl of std::unordered_set, + # so we check them by sorting + + # a1 should have exactly one None + count_none = sum(x is None for x in a1) + assert_equal(count_none, 1) + + a1_wo_none = sorted(x for x in a1 if x is not None) + assert_array_equal(a1_wo_none, unq_sorted_wo_none) + + def test_unique_vstring_hash_based_not_equal_nan(self): + # test for unicode and nullable string arrays (equal_nan=False) + a = np.array([ + # short strings + 'straße', + None, + 'strasse', + 'straße', + None, + 'niño', + 'nino', + 'élève', + 'eleve', + 'niño', + 'élève', + # medium strings + 'b' * 20, + 'ß' * 30, + None, + 'é' * 30, + 'e' * 20, + 'ß' * 30, + 'n' * 30, + 'ñ' * 20, + None, + 'e' * 20, + 'ñ' * 20, + # long strings + 'b' * 300, + 'ß' * 400, + None, + 'é' * 400, + 'e' * 300, + 'ß' * 400, + 'n' * 400, + 'ñ' * 300, + None, + 'e' * 300, + 'ñ' * 300, + ], + dtype=StringDType(na_object=None) + ) + unq_sorted_wo_none = [ + 'b' * 20, + 'b' * 300, + 'e' * 20, + 'e' * 300, + 'eleve', + 'nino', + 'niño', + 'n' * 30, + 'n' * 400, + 'strasse', + 'straße', + 'ß' * 30, + 'ß' * 400, + 'élève', + 'é' * 30, + 'é' * 400, + 'ñ' * 20, + 'ñ' * 300, + ] + + a1 = unique(a, sorted=False, equal_nan=False) + # the result varies depending on the impl of std::unordered_set, + # so we check them by sorting + + # a1 should have exactly one None + count_none = sum(x is None for x in a1) + assert_equal(count_none, 6) + + a1_wo_none = sorted(x for x in a1 if x is not None) + assert_array_equal(a1_wo_none, unq_sorted_wo_none) + + def test_unique_vstring_errors(self): + a = np.array( + [ + 'apple', 'banana', 'apple', None, 'cherry', + 'date', 'banana', 'fig', None, 'grape', + ] * 2, + dtype=StringDType(na_object=None) + ) + assert_raises(ValueError, unique, a, equal_nan=False) + @pytest.mark.parametrize("arg", ["return_index", "return_inverse", "return_counts"]) def test_unsupported_hash_based(self, arg): """These currently never use the hash-based solution. However, From 7c91551b85650c5465d21956acf66a47d9d0d02b Mon Sep 17 00:00:00 2001 From: specsy Date: Fri, 20 Jun 2025 20:29:10 +0530 Subject: [PATCH 23/26] DOC: Update CONTRIBUTING.rst (#28158) * Update CONTRIBUTING.rst fixes #19778 Updating the contibution section so that contributors avoid doing mkstakes while asking questions, instead they focus on doing contribution and work on project right after. * Update CONTRIBUTING.rst Shortened the length of the sentence. * Update CONTRIBUTING.rst --- CONTRIBUTING.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 6e019983a0a2..0919790c65d1 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -7,8 +7,9 @@ Whether you're new to open source or experienced, your contributions help us grow. Pull requests (PRs) are always welcome, but making a PR is just the -start. Please respond to comments and requests for changes to help -move the process forward. Please follow our +start. Please respond to comments and requests for changes to help move the process forward. +Skip asking for an issue to be assigned to you on GitHub—send in your PR, explain what you did and ask for a review. It makes collaboration and support much easier. +Please follow our `Code of Conduct `__, which applies to all interactions, including issues and PRs. From 1b3a0d9385aa1ded43885d966bb8733da839762f Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Sat, 21 Jun 2025 00:32:00 +0000 Subject: [PATCH 24/26] DOC: Fix some markup errors --- doc/source/f2py/buildtools/distutils-to-meson.rst | 2 +- doc/source/reference/random/multithreading.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/f2py/buildtools/distutils-to-meson.rst b/doc/source/f2py/buildtools/distutils-to-meson.rst index bf5da973e9fa..b24638e62239 100644 --- a/doc/source/f2py/buildtools/distutils-to-meson.rst +++ b/doc/source/f2py/buildtools/distutils-to-meson.rst @@ -117,7 +117,7 @@ sample is included below. +------------------------------------+-------------------------------+ | LDFLAGS | Linker options | +------------------------------------+-------------------------------+ - | LD\ :sub:`LIBRARY`\ \ :sub:`PATH`\ | Library file locations (Unix) | + | LD\_LIBRARY\_PATH | Library file locations (Unix) | +------------------------------------+-------------------------------+ | LIBS | Libraries to link against | +------------------------------------+-------------------------------+ diff --git a/doc/source/reference/random/multithreading.rst b/doc/source/reference/random/multithreading.rst index 17c6a515cdbc..73d2fc9ee5ad 100644 --- a/doc/source/reference/random/multithreading.rst +++ b/doc/source/reference/random/multithreading.rst @@ -9,7 +9,7 @@ well-behaved (writable and aligned). Under normal circumstances, arrays created using the common constructors such as :meth:`numpy.empty` will satisfy these requirements. -This example makes use of:mod:`concurrent.futures` to fill an array using +This example makes use of :mod:`concurrent.futures` to fill an array using multiple threads. Threads are long-lived so that repeated calls do not require any additional overheads from thread creation. From 124ac8d41f2d913cbac1a05452cdae3d80d7e789 Mon Sep 17 00:00:00 2001 From: Charles Harris Date: Sat, 21 Jun 2025 07:40:15 -0600 Subject: [PATCH 25/26] MAINT: Update main after 2.1.0 release. - Add 2.3.1-notes.rst - Add 2.3.1-changelog.rst - Update release.rst [skip cirrus] [skip azp] [skip actions] --- doc/changelog/2.3.1-changelog.rst | 34 +++++++++++++++++++ doc/source/release.rst | 1 + doc/source/release/2.3.1-notes.rst | 53 ++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) create mode 100644 doc/changelog/2.3.1-changelog.rst create mode 100644 doc/source/release/2.3.1-notes.rst diff --git a/doc/changelog/2.3.1-changelog.rst b/doc/changelog/2.3.1-changelog.rst new file mode 100644 index 000000000000..a1c840f8beda --- /dev/null +++ b/doc/changelog/2.3.1-changelog.rst @@ -0,0 +1,34 @@ + +Contributors +============ + +A total of 9 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Brad Smith + +* Charles Harris +* Developer-Ecosystem-Engineering +* François Rozet +* Joren Hammudoglu +* Matti Picus +* Mugundan Selvanayagam +* Nathan Goldbaum +* Sebastian Berg + +Pull requests merged +==================== + +A total of 12 pull requests were merged for this release. + +* `#29140 `__: MAINT: Prepare 2.3.x for further development +* `#29191 `__: BUG: fix matmul with transposed out arg (#29179) +* `#29192 `__: TYP: Backport typing fixes and improvements. +* `#29205 `__: BUG: Revert ``np.vectorize`` casting to legacy behavior (#29196) +* `#29222 `__: TYP: Backport typing fixes +* `#29233 `__: BUG: avoid negating unsigned integers in resize implementation... +* `#29234 `__: TST: Fix test that uses unininitialized memory (#29232) +* `#29235 `__: BUG: Address interaction between SME and FPSR (#29223) +* `#29237 `__: BUG: Enforce integer limitation in concatenate (#29231) +* `#29238 `__: CI: Add support for building NumPy with LLVM for Win-ARM64 +* `#29241 `__: ENH: Detect CPU features on OpenBSD ARM and PowerPC64 +* `#29242 `__: ENH: Detect CPU features on FreeBSD / OpenBSD RISC-V64. diff --git a/doc/source/release.rst b/doc/source/release.rst index 6c6a853b06f5..59e6dd07b002 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -6,6 +6,7 @@ Release notes :maxdepth: 2 2.4.0 + 2.3.1 2.3.0 2.2.6 2.2.5 diff --git a/doc/source/release/2.3.1-notes.rst b/doc/source/release/2.3.1-notes.rst new file mode 100644 index 000000000000..d8193f07671c --- /dev/null +++ b/doc/source/release/2.3.1-notes.rst @@ -0,0 +1,53 @@ +.. currentmodule:: numpy + +========================= +NumPy 2.3.1 Release Notes +========================= + +The NumPy 2.3.1 release is a patch release with several bug fixes, annotation +improvements, and better support for OpenBSD. Highlights are: + +- Fix bug in ``matmul`` for non-contiguous out kwarg parameter +- Fix for Accelerate runtime warnings on M4 hardware +- Fix new in NumPy 2.3.0 ``np.vectorize`` casting errors +- Improved support of cpu features for FreeBSD and OpenBSD + +This release supports Python versions 3.11-3.13, Python 3.14 will be supported +when it is released. + + +Contributors +============ + +A total of 9 people contributed to this release. People with a "+" by their +names contributed a patch for the first time. + +* Brad Smith + +* Charles Harris +* Developer-Ecosystem-Engineering +* François Rozet +* Joren Hammudoglu +* Matti Picus +* Mugundan Selvanayagam +* Nathan Goldbaum +* Sebastian Berg + + +Pull requests merged +==================== + +A total of 12 pull requests were merged for this release. + +* `#29140 `__: MAINT: Prepare 2.3.x for further development +* `#29191 `__: BUG: fix matmul with transposed out arg (#29179) +* `#29192 `__: TYP: Backport typing fixes and improvements. +* `#29205 `__: BUG: Revert ``np.vectorize`` casting to legacy behavior (#29196) +* `#29222 `__: TYP: Backport typing fixes +* `#29233 `__: BUG: avoid negating unsigned integers in resize implementation... +* `#29234 `__: TST: Fix test that uses uninitialized memory (#29232) +* `#29235 `__: BUG: Address interaction between SME and FPSR (#29223) +* `#29237 `__: BUG: Enforce integer limitation in concatenate (#29231) +* `#29238 `__: CI: Add support for building NumPy with LLVM for Win-ARM64 +* `#29241 `__: ENH: Detect CPU features on OpenBSD ARM and PowerPC64 +* `#29242 `__: ENH: Detect CPU features on FreeBSD / OpenBSD RISC-V64. + From 1fefc5c6b767e86c2642641e7ba7a22ab0cf554b Mon Sep 17 00:00:00 2001 From: Mohammed Abdul Rahman <130785777+that-ar-guy@users.noreply.github.com> Date: Mon, 23 Jun 2025 23:26:14 +0530 Subject: [PATCH 26/26] DOC: Clarify dtype argument for __array__ in custom container guide (#29254) * DOC: Clarify dtype argument for __array__ in custom container guide --- doc/source/user/basics.dispatch.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/doc/source/user/basics.dispatch.rst b/doc/source/user/basics.dispatch.rst index ae53995a3917..117d60f85467 100644 --- a/doc/source/user/basics.dispatch.rst +++ b/doc/source/user/basics.dispatch.rst @@ -46,6 +46,21 @@ array([[1., 0., 0., 0., 0.], [0., 0., 0., 1., 0.], [0., 0., 0., 0., 1.]]) +The ``__array__`` method can optionally accept a `dtype` argument. If provided, +this argument specifies the desired data type for the resulting NumPy array. +Your implementation should attempt to convert the data to this `dtype` +if possible. If the conversion is not supported, it's generally best +to fall back to a default type or raise a `TypeError` or `ValueError`. + +Here's an example demonstrating its use with `dtype` specification: + +>>> np.asarray(arr, dtype=np.float32) +array([[1., 0., 0., 0., 0.], + [0., 1., 0., 0., 0.], + [0., 0., 1., 0., 0.], + [0., 0., 0., 1., 0.], + [0., 0., 0., 0., 1.]], dtype=float32) + If we operate on ``arr`` with a numpy function, numpy will again use the ``__array__`` interface to convert it to an array and then apply the function in the usual way.