8000 BLD, SIMD: The meson CPU dispatcher implementation (#23096) · numpy/numpy@4ec0182 · GitHub
[go: up one dir, main page]

Skip to content

Commit 4ec0182

Browse files
authored
BLD, SIMD: The meson CPU dispatcher implementation (#23096)
Almost gives the same functionality as Distutils/CCompiler Opt, with a few changes to the way we specify the targets. Also, it abandons the idea of wrapping the dispatchable sources, instead it counts on static libraries to enable different paths and flags.
1 parent 5697016 commit 4ec0182

File tree

18 files changed

+1421
-126
lines changed

18 files changed

+1421
-126
lines changed

.github/meson_actions/action.yml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: MesonBuildTest
2+
description: "checkout repo, build, and test numpy"
3+
runs:
4+
using: composite
5+
steps:
6+
- name: Install dependencies
7+
shell: bash
8+
run: pip install -r build_requirements.txt
9+
- name: Build
10+
shell: 'script -q -e -c "bash --noprofile --norc -eo pipefail {0}"'
11+
env:
12+
TERM: xterm-256color
13+
run:
14+
spin build -- ${MESON_ARGS[@]}
15+
- name: Check build-internal dependencies
16+
shell: bash
17+
run:
18+
ninja -C build -t missingdeps
19+
- name: Check installed test and stub files
20+
shell: bash
21+
run:
22+
python tools/check_installed_files.py $(find ./build-install -path '*/site-packages/numpy')
23+
- name: Test
24+
shell: 'script -q -e -c "bash --noprofile --norc -eo pipefail {0}"'
25+
env:
26+
TERM: xterm-256color
27+
run: |
28+
pip install pytest pytest-xdist hypothesis typing_extensions
29+
spin test -j auto

.github/workflows/build_test.yml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
if: "github.repository == 'numpy/numpy'"
5050
runs-on: ubuntu-latest
5151
env:
52-
WITHOUT_SIMD: 1
52+
MESON_ARGS: "-Dallow-noblas=true -Dcpu-baseline=none -Dcpu-dispatch=none"
5353
steps:
5454
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
5555
with:
@@ -58,7 +58,7 @@ jobs:
5858
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
5959
with:
6060
python-version: ${{ env.PYTHON_VERSION }}
61-
- uses: ./.github/actions
61+
- uses: ./.github/meson_actions
6262

6363
basic:
6464
needs: [smoke_test]
@@ -122,7 +122,7 @@ jobs:
122122
runs-on: ubuntu-latest
123123
if: github.event_name != 'push'
124124
env:
125-
WITHOUT_OPTIMIZATIONS: 1
125+
MESON_ARGS: "-Dallow-noblas=true -Ddisable-optimization=true"
126126
steps:
127127
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
128128
with:
@@ -131,14 +131,14 @@ jobs:
131131
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
132132
with:
133133
python-version: ${{ env.PYTHON_VERSION }}
134-
- uses: ./.github/actions
134+
- uses: ./.github/meson_actions
135135

136136
with_baseline_only:
137137
needs: [smoke_test]
138138
runs-on: ubuntu-latest
139139
if: github.event_name != 'push'
140140
env:
141-
CPU_DISPATCH: "none"
141+
MESON_ARGS: "-Dallow-noblas=true -Dcpu-dispatch=none"
142142
steps:
143143
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
144144
with:
@@ -147,14 +147,14 @@ jobs:
147147
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
148148
with:
149149
python-version: ${{ env.PYTHON_VERSION }}
150-
- uses: ./.github/actions
150+
- uses: ./.github/meson_actions
151151

152152
without_avx512:
153153
needs: [smoke_test]
154154
runs-on: ubuntu-latest
155155
if: github.event_name != 'push'
156156
env:
157-
CPU_DISPATCH: "max -xop -fma4 -avx512f -avx512cd -avx512_knl -avx512_knm -avx512_skx -avx512_clx -avx512_cnl -avx512_icl"
157+
MESON_ARGS: "-Dallow-noblas=true -Dcpu-dispatch=SSSE3,SSE41,POPCNT,SSE42,AVX,F16C,AVX2,FMA3"
158158
steps:
159159
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
160160
with:
@@ -163,14 +163,14 @@ jobs:
163163
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
164164
with:
165165
python-version: ${{ env.PYTHON_VERSION }}
166-
- uses: ./.github/actions
166+
- uses: ./.github/meson_actions
167167

168168
without_avx512_avx2_fma3:
169169
needs: [smoke_test]
170170
runs-on: ubuntu-latest
171171
if: github.event_name != 'push'
172172
env:
173-
CPU_DISPATCH: "SSSE3 SSE41 POPCNT SSE42 AVX F16C"
173+
MESON_ARGS: "-Dallow-noblas=true -Dcpu-dispatch=SSSE3,SSE41,POPCNT,SSE42,AVX,F16C"
174174
steps:
175175
- uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
176176
with:
@@ -179,7 +179,7 @@ jobs:
179179
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
180180
with:
181181
python-version: ${{ env.PYTHON_VERSION }}
182-
- uses: ./.github/actions
182+
- uses: ./.github/meson_actions
183183

184184
debug:
185185
needs: [smoke_test]

MANIFEST.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ recursive-include numpy/random *.pyx *.pxd *.pyx.in *.pxd.in
2121
include numpy/py.typed
2222
include numpy/random/include/*
2323
include numpy/*.pxd
24+
# Meson CPU Dispatcher
25+
recursive-include meson_cpu *.build *.in
2426
# Add build support that should go in sdist, but not go in bdist/be installed
2527
# Note that sub-directories that don't have __init__ are apparently not
2628
# included by 'recursive-include', so list those separately

build_requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
meson-python>=0.10.0
2-
Cython
1+
meson-python>=0.13.1
2+
Cython>=3.0
33
wheel==0.38.1
44
ninja
55
spin==0.4

doc/source/user/quickstart.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ and other Python sequences.
517517
>>> for i in a:
518518
... print(i**(1 / 3.))
519519
...
520-
9.999999999999998
520+
9.999999999999998 # may vary
521521
1.0
522522
9.999999999999998
523523
3.0

meson.build

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ project(
66
# See `numpy/__init__.py`
77
version: '2.0.0.dev0',
88
license: 'BSD-3',
9-
meson_version: '>= 1.1.0',
9+
meson_version: '>=1.2.99', # version in vendored-meson is 1.2.99
1010
default_options: [
1111
'buildtype=debugoptimized',
1212
'b_ndebug=if-release',
@@ -80,4 +80,5 @@ else
8080
meson.add_dist_script(py, versioneer, '-o', '_version_meson.py')
8181
endif
8282

83+
subdir('meson_cpu')
8384
subdir('numpy')

meson_cpu/arm/meson.build

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
source_root = meson.project_source_root()
2+
mod_features = import('features')
3+
NEON = mod_features.new(
4+
'NEON', 1,
5+
test_code: files(source_root + '/numpy/distutils/checks/cpu_neon.c')[0]
6+
)
7+
NEON_FP16 = mod_features.new(
8+
'NEON_FP16', 2, implies: NEON,
9+
test_code: files(source_root + '/numpy/distutils/checks/cpu_neon_fp16.c')[0]
10+
)
11+
# FMA
12+
NEON_VFPV4 = mod_features.new(
13+
'NEON_VFPV4', 3, implies: NEON_FP16,
14+
test_code: files(source_root + '/numpy/distutils/checks/cpu_neon_vfpv4.c')[0]
15+
)
16+
# Advanced SIMD
17+
ASIMD = mod_features.new(
18+
'ASIMD', 4, implies: NEON_VFPV4, detect: {'val': 'ASIMD', 'match': 'NEON.*'},
19+
test_code: files(source_root + '/numpy/distutils/checks/cpu_asimd.c')[0]
20+
)
21+
cpu_family = host_machine.cpu_family()
22+
if cpu_family == 'aarch64'
23+
# hardware baseline
24+
NEON.update(implies: [NEON_FP16, NEON_VFPV4, ASIMD])
25+
NEON_FP16.update(implies: [NEON, NEON_VFPV4, ASIMD])
26+
NEON_VFPV4.update(implies: [NEON, NEON_FP16, ASIMD])
27+
elif cpu_family == 'arm'
28+
NEON.update(args: '-mfpu=neon')
29+
NEON_FP16.update(args: ['-mfp16-format=ieee', {'val': '-mfpu=neon-fp16', 'match': '-mfpu=.*'}])
30+
NEON_VFPV4.update(args: [{'val': '-mfpu=neon-vfpv4', 'match': '-mfpu=.*'}])
31+
ASIMD.update(args: [
32+
{'val': '-mfpu=neon-fp-armv8', 'match': '-mfpu=.*'},
33+
'-march=armv8-a+simd'
34+
])
35+
endif
36+
# ARMv8.2 half-precision & vector arithm
37+
ASIMDHP = mod_features.new(
38+
'ASIMDHP', 5, implies: ASIMD,
39+
args: {'val': '-march=armv8.2-a+fp16', 'match': '-march=.*', 'mfilter': '\+.*'},
40+
test_code: files(source_root + '/numpy/distutils/checks/cpu_asimdhp.c')[0]
41+
)
42+
## ARMv8.2 dot product
43+
ASIMDDP = mod_features.new(
44+
'ASIMDDP', 6, implies: ASIMD,
45+
args: {'val': '-march=armv8.2-a+dotprod', 'match': '-march=.*', 'mfilter': '\+.*'},
46+
test_code: files(source_root + '/numpy/distutils/checks/cpu_asimddp.c')[0]
47+
)
48+
## ARMv8.2 Single & half-precision Multiply
49+
ASIMDFHM = mod_features.new(
50+
'ASIMDFHM', 7, implies: ASIMDHP,
51+
args: {'val': '-march=armv8.2-a+fp16fml', 'match': '-march=.*', 'mfilter': '\+.*'},
52+
test_code: files(source_root + '/numpy/distutils/checks/cpu_asimdfhm.c')[0]
53+
)
54+
# TODO: Add support for MSVC
55+
ARM_FEATURES = {
56+
'NEON': NEON, 'NEON_FP16': NEON_FP16, 'NEON_VFPV4': NEON_VFPV4,
57+
'ASIMD': ASIMD, 'ASIMDHP': ASIMDHP, 'ASIMDFHM': ASIMDFHM
58+
}

0 commit comments

Comments
 (0)
0