|
| 1 | +source_root = meson.project_source_root() |
| 2 | +mod_features = import('features') |
| 3 | +NEON = mod_features.new( |
| 4 | + 'NEON', 1, |
| 5 | + test_code: files(source_root + '/numpy/distutils/checks/cpu_neon.c')[0] |
| 6 | +) |
| 7 | +NEON_FP16 = mod_features.new( |
| 8 | + 'NEON_FP16', 2, implies: NEON, |
| 9 | + test_code: files(source_root + '/numpy/distutils/checks/cpu_neon_fp16.c')[0] |
| 10 | +) |
| 11 | +# FMA |
| 12 | +NEON_VFPV4 = mod_features.new( |
| 13 | + 'NEON_VFPV4', 3, implies: NEON_FP16, |
| 14 | + test_code: files(source_root + '/numpy/distutils/checks/cpu_neon_vfpv4.c')[0] |
| 15 | +) |
| 16 | +# Advanced SIMD |
| 17 | +ASIMD = mod_features.new( |
| 18 | + 'ASIMD', 4, implies: NEON_VFPV4, detect: {'val': 'ASIMD', 'match': 'NEON.*'}, |
| 19 | + test_code: files(source_root + '/numpy/distutils/checks/cpu_asimd.c')[0] |
| 20 | +) |
| 21 | +cpu_family = host_machine.cpu_family() |
| 22 | +if cpu_family == 'aarch64' |
| 23 | + # hardware baseline |
| 24 | + NEON.update(implies: [NEON_FP16, NEON_VFPV4, ASIMD]) |
| 25 | + NEON_FP16.update(implies: [NEON, NEON_VFPV4, ASIMD]) |
| 26 | + NEON_VFPV4.update(implies: [NEON, NEON_FP16, ASIMD]) |
| 27 | +elif cpu_family == 'arm' |
| 28 | + NEON.update(args: '-mfpu=neon') |
| 29 | + NEON_FP16.update(args: ['-mfp16-format=ieee', {'val': '-mfpu=neon-fp16', 'match': '-mfpu=.*'}]) |
| 30 | + NEON_VFPV4.update(args: [{'val': '-mfpu=neon-vfpv4', 'match': '-mfpu=.*'}]) |
| 31 | + ASIMD.update(args: [ |
| 32 | + {'val': '-mfpu=neon-fp-armv8', 'match': '-mfpu=.*'}, |
| 33 | + '-march=armv8-a+simd' |
| 34 | + ]) |
| 35 | +endif |
| 36 | +# ARMv8.2 half-precision & vector arithm |
| 37 | +ASIMDHP = mod_features.new( |
| 38 | + 'ASIMDHP', 5, implies: ASIMD, |
| 39 | + args: {'val': '-march=armv8.2-a+fp16', 'match': '-march=.*', 'mfilter': '\+.*'}, |
| 40 | + test_code: files(source_root + '/numpy/distutils/checks/cpu_asimdhp.c')[0] |
| 41 | +) |
| 42 | +## ARMv8.2 dot product |
| 43 | +ASIMDDP = mod_features.new( |
| 44 | + 'ASIMDDP', 6, implies: ASIMD, |
| 45 | + args: {'val': '-march=armv8.2-a+dotprod', 'match': '-march=.*', 'mfilter': '\+.*'}, |
| 46 | + test_code: files(source_root + '/numpy/distutils/checks/cpu_asimddp.c')[0] |
| 47 | +) |
| 48 | +## ARMv8.2 Single & half-precision Multiply |
| 49 | +ASIMDFHM = mod_features.new( |
| 50 | + 'ASIMDFHM', 7, implies: ASIMDHP, |
| 51 | + args: {'val': '-march=armv8.2-a+fp16fml', 'match': '-march=.*', 'mfilter': '\+.*'}, |
| 52 | + test_code: files(source_root + '/numpy/distutils/checks/cpu_asimdfhm.c')[0] |
| 53 | +) |
| 54 | +# TODO: Add support for MSVC |
| 55 | +ARM_FEATURES = { |
| 56 | + 'NEON': NEON, 'NEON_FP16': NEON_FP16, 'NEON_VFPV4': NEON_VFPV4, |
| 57 | + 'ASIMD': ASIMD, 'ASIMDHP': ASIMDHP, 'ASIMDFHM': ASIMDFHM |
| 58 | +} |
0 commit comments