diff --git a/.github/workflows/linux_simd.yml b/.github/workflows/linux_simd.yml index 4cd87ab37dd4..a9f065e25cc0 100644 --- a/.github/workflows/linux_simd.yml +++ b/.github/workflows/linux_simd.yml @@ -212,7 +212,7 @@ jobs: python -m pip install pytest pytest-xdist hypothesis typing_extensions - name: Build - run: CC=gcc-13 CXX=g++-13 spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_skx -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR' + run: CC=gcc-13 CXX=g++-13 spin build -- -Denable-openmp=true -Dallow-noblas=true -Dcpu-baseline=avx512_skx -Dtest-simd='BASELINE,AVX512_KNL,AVX512_KNM,AVX512_SKX,AVX512_CLX,AVX512_CNL,AVX512_ICL,AVX512_SPR' - name: Meson Log if: always() @@ -263,7 +263,7 @@ jobs: python -m pip install pytest pytest-xdist hypothesis typing_extensions - name: Build - run: CC=gcc-13 CXX=g++-13 spin build -- -Dallow-noblas=true -Dcpu-baseline=avx512_spr + run: CC=gcc-13 CXX=g++-13 spin build -- -Denable-openmp=true -Dallow-noblas=true -Dcpu-baseline=avx512_spr - name: Meson Log if: always() diff --git a/doc/release/upcoming_changes/28619.highlight.rst b/doc/release/upcoming_changes/28619.highlight.rst new file mode 100644 index 000000000000..6c296b92899e --- /dev/null +++ b/doc/release/upcoming_changes/28619.highlight.rst @@ -0,0 +1,6 @@ +Building NumPy with OpenMP Parallelization +------------------------------------------- +NumPy now supports OpenMP parallel processing capabilities when built with the +``-Denable_openmp=true`` Meson build flag. This feature is disabled by default. +When enabled, ``np.sort`` and ``np.argsort`` functions can utilize OpenMP for +parallel thread execution, improving performance for these operations. diff --git a/doc/release/upcoming_changes/28619.performance.rst b/doc/release/upcoming_changes/28619.performance.rst new file mode 100644 index 000000000000..904decbe0ba6 --- /dev/null +++ b/doc/release/upcoming_changes/28619.performance.rst @@ -0,0 +1,7 @@ +Performance improvements to ``np.sort`` and ``np.argsort`` +---------------------------------------------------------- +``np.sort`` and ``np.argsort`` functions now can leverage OpenMP for parallel +thread execution, resulting in up to 3.5x speedups on x86 architectures with +AVX2 or AVX-512 instructions. This opt-in feature requires NumPy to be built +with the -Denable_openmp Meson flag. Users can control the number of threads +used by setting the OMP_NUM_THREADS environment variable. diff --git a/meson.options b/meson.options index 1be05d324756..b09992fe9b91 100644 --- a/meson.options +++ b/meson.options @@ -22,6 +22,8 @@ option('disable-intel-sort', type: 'boolean', value: false, description: 'Disables SIMD-optimized operations related to Intel x86-simd-sort') option('disable-threading', type: 'boolean', value: false, description: 'Disable threading support (see `NPY_ALLOW_THREADS` docs)') +option('enable-openmp', type: 'boolean', value: false, + description: 'Enable building NumPy with openmp support') option('disable-optimization', type: 'boolean', value: false, description: 'Disable CPU optimized code (dispatch,simd,unroll...)') option('cpu-baseline', type: 'string', value: 'min', diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build index 3bffed752474..a4d2050122c6 100644 --- a/numpy/_core/meson.build +++ b/numpy/_core/meson.build @@ -128,6 +128,21 @@ if use_intel_sort and not fs.exists('src/npysort/x86-simd-sort/README.md') error('Missing the `x86-simd-sort` git submodule! Run `git submodule update --init` to fix this.') endif +# openMP related settings: +if get_option('disable-threading') and get_option('enable-openmp') + error('Build options `disable-threading` and `enable-openmp` are conflicting. Please set at most one to true.') +endif + +use_openmp = get_option('enable-openmp') and not get_option('disable-threading') + +# Setup openmp flags for x86-simd-sort: +omp = [] +omp_dep = [] +if use_intel_sort and use_openmp + omp = dependency('openmp', required : true) + omp_dep = declare_dependency(dependencies: omp, compile_args: ['-DXSS_USE_OPENMP']) +endif + if not fs.exists('src/common/pythoncapi-compat') error('Missing the `pythoncapi-compat` git submodule! ' + 'Run `git submodule update --init` to fix this.') @@ -867,12 +882,15 @@ foreach gen_mtargets : [ ] : [] ], ] + + + mtargets = mod_features.multi_targets( gen_mtargets[0], multiarray_gen_headers + gen_mtargets[1], dispatch: gen_mtargets[2], # baseline: CPU_BASELINE, it doesn't provide baseline fallback prefix: 'NPY_', - dependencies: [py_dep, np_core_dep], + dependencies: [py_dep, np_core_dep, omp_dep], c_args: c_args_common + max_opt, cpp_args: cpp_args_common + max_opt, include_directories: [ @@ -1286,7 +1304,7 @@ py.extension_module('_multiarray_umath', 'src/umath', 'src/highway' ], - dependencies: [blas_dep], + dependencies: [blas_dep, omp], link_with: [ npymath_lib, unique_hash_so, diff --git a/numpy/_core/src/npysort/x86-simd-sort b/numpy/_core/src/npysort/x86-simd-sort index 9a1b616d5cd4..c306ac581a59 160000 --- a/numpy/_core/src/npysort/x86-simd-sort +++ b/numpy/_core/src/npysort/x86-simd-sort @@ -1 +1 @@ -Subproject commit 9a1b616d5cd4eaf49f7664fb86ccc1d18bad2b8d +Subproject commit c306ac581a59f89585d778254c4ed7197e64ba2d diff --git a/numpy/_core/tests/test_multiarray.py b/numpy/_core/tests/test_multiarray.py index 0a62cb6945f0..d58471247682 100644 --- a/numpy/_core/tests/test_multiarray.py +++ b/numpy/_core/tests/test_multiarray.py @@ -10292,6 +10292,21 @@ def test_argsort_int(N, dtype): arr[N - 1] = maxv assert_arg_sorted(arr, np.argsort(arr, kind='quick')) +# Test large arrays that leverage openMP implementations from x86-simd-sort: +@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) +def test_sort_largearrays(dtype): + N = 1000000 + rnd = np.random.RandomState(1100710816) + arr = -0.5 + rnd.random(N).astype(dtype) + assert_equal(np.sort(arr, kind='quick'), np.sort(arr, kind='heap')) + +# Test large arrays that leverage openMP implementations from x86-simd-sort: +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_argsort_largearrays(dtype): + N = 1000000 + rnd = np.random.RandomState(1100710816) + arr = -0.5 + rnd.random(N).astype(dtype) + assert_arg_sorted(arr, np.argsort(arr, kind='quick')) @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") def test_gh_22683():