From 7d5b0d9146428c54bc0651b811a2bf859511ca5c Mon Sep 17 00:00:00 2001 From: Nikita Shulga <2453524+malfet@users.noreply.github.com> Date: Thu, 20 Jun 2024 16:22:57 +0000 Subject: [PATCH] [CpuInductor] Enable NEON ISA detection on Linux ARM (#129075) Also, cleanup code a bit to use `x in [y, z]` instead of `x == y or x == z` And do not redefine `at_align`, but instead use `alignas(64)` as was suggested in https://github.com/pytorch/pytorch/pull/128686/files#r1639365978 Test plan: `python3 -c "import torch._inductor.codecache as cc; isa = cc.valid_vec_isa_list()[0];print(str(isa), bool(isa))"` Pull Request resolved: https://github.com/pytorch/pytorch/pull/129075 Approved by: https://github.com/jansel (cherry picked from commit b2a9b8d48534ae76d9952f1d6d5fd628106b78f1) --- torch/_inductor/codecache.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py index ae8453660813..8b9503a1c70a 100644 --- a/torch/_inductor/codecache.py +++ b/torch/_inductor/codecache.py @@ -1275,7 +1275,7 @@ class VecISA: #include #endif -__attribute__((aligned(64))) float in_out_ptr0[16] = {0.0}; +alignas(64) float in_out_ptr0[16] = {0.0}; extern "C" void __avx_chk_kernel() { auto tmp0 = at::vec::Vectorized(1); @@ -1470,12 +1470,12 @@ def _check_and_append_supported_isa( # we only cache some key isa information. @functools.lru_cache(None) def valid_vec_isa_list() -> List[VecISA]: + isa_list: List[VecISA] = [] if sys.platform == "darwin" and platform.processor() == "arm": - return [VecNEON()] + isa_list.append(VecNEON()) - cur_os = sys.platform - if cur_os != "linux" and cur_os != "win32": - return [] + if sys.platform not in ["linux", "win32"]: + return isa_list if platform.machine() == "s390x": with open("/proc/cpuinfo") as _cpu_info: @@ -1488,8 +1488,18 @@ def valid_vec_isa_list() -> List[VecISA]: if featuresmatch: for group in featuresmatch.groups(): if re.search(r"[\^ ]+vxe[\$ ]+", group): - return [VecZVECTOR()] - return [] + isa_list.append(VecZVECTOR()) + break + elif arch == "aarch64": + isa_list.append(VecNEON()) + elif arch in ["x86_64", "AMD64"]: + """ + arch value is x86_64 on Linux, and the value is AMD64 on Windows. + """ + _cpu_supported_x86_isa = x86_isa_checker() + for isa in supported_vec_isa_list: + if str(isa) in _cpu_supported_x86_isa and isa: + isa_list.append(isa) isa_list = [] _cpu_supported_isa = x86_isa_checker()