8000 Update · pytorch/pytorch@e1f7982 · GitHub
[go: up one dir, main page]

Skip to content

Commit e1f7982

Browse files
committed
Update
[ghstack-poisoned]
2 parents 8d70545 + d4685e6 commit e1f7982

File tree

8 files changed

+236
-229
lines changed

8 files changed

+236
-229
lines changed

test/inductor/test_cpu_repro.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2063,6 +2063,52 @@ def fn(x: torch.Tensor, y: torch.Tensor):
20632063
self.common(_fn, _args)
20642064
check_metrics_vec_kernel_count(1)
20652065

2066+
@requires_vectorization
2067+
def test_vec_bitwise(self):
2068+
for dtype in [
2069+
torch.bool,
2070+
torch.uint8,
2071+
torch.int8,
2072+
torch.int32,
2073+
torch.int64,
2074+
]:
2075+
x = torch.randn(64, dtype=torch.float32)
2076+
y = torch.randn(64, dtype=torch.float32)
2077+
if dtype == torch.bool:
2078+
x = x > 0
2079+
y = y > 0
2080+
else:
2081+
x = x.to(dtype)
2082+
y = y.to(dtype)
2083+
bitwise_fns = [
2084+
torch.bitwise_and,
2085+
torch.bitwise_not,
2086+
torch.bitwise_or,
2087+
torch.bitwise_xor,
2088+
torch.bitwise_left_shift,
2089+
torch.bitwise_right_shift,
2090+
]
2091+
for bitwise_fn in bitwise_fns:
2092+
if (
2093+
bitwise_fn
2094+
in [
2095+
torch.bitwise_left_shift,
2096+
torch.bitwise_right_shift,
2097+
]
2098+
and dtype == torch.bool
2099+
):
2100+
# Eager doesn't support bool
2101+
# https://pytorch.org/docs/stable/generated/torch.bitwise_left_shift.html
2102+
continue
2103+
torch._dynamo.reset()
2104+
metrics.reset()
2105+
if bitwise_fn == torch.bitwise_not:
2106+
_args = (x,)
2107+
else:
2108+
_args = (x, y)
2109+
self.common(bitwise_fn, _args)
2110+
check_metrics_vec_kernel_count(1)
2111+
20662112
@requires_vectorization
20672113
@patch("torch.cuda.is_available", lambda: False)
20682114
def test_vec_compare_op_cpu_only(self):

torch/_inductor/codecache.py

Lines changed: 33 additions & 162 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,20 @@
5656
rocm_compile_command,
5757
rocm_compiler,
5858
)
59-
from torch._inductor.cpu_vec_isa import (
59+
60+
"""
61+
codecache.py, cpp_builder.py and cpu_vec_isa.py import rule:
62+
https://github.com/pytorch/pytorch/issues/124245#issuecomment-2197778902
63+
"""
64+
from torch._inductor.cpp_builder import (
65+
_set_gpu_runtime_env,
66+
_transform_cuda_paths,
67+
CppBuilder,
68+
CppOptions,
69+
CppTorchCudaOptions,
6070
get_compiler_version_info,
61-
invalid_vec_isa,
62-
pick_vec_isa,
63-
VecISA,
6471
)
72+
from torch._inductor.cpu_vec_isa import invalid_vec_isa, pick_vec_isa, VecISA
6573
from torch._inductor.runtime.compile_tasks import (
6674
_module_to_triton_kernel,
6775
_reload_python_module,
@@ -1178,99 +1186,23 @@ def __call__(self, inputs: List[Any]) -> Any:
11781186
return self.current_callable(inputs)
11791187

11801188

1181-
def cpp_compiler() -> str:
1182-
if config.is_fbcode():
1183-
return build_paths.cc() if torch.version.hip is None else build_paths.clang()
1184-
if isinstance(config.cpp.cxx, (list, tuple)):
1185-
search = tuple(config.cpp.cxx)
1186-
else:
1187-
search = (config.cpp.cxx,)
1188-
return cpp_compiler_search(search)
1189-
1190-
1191-
@functools.lru_cache(1)
1192-
def cpp_compiler_search(search: str) -> str:
1193-
for cxx in search:
1194-
try:
1195-
if cxx is None:
1196-
# gxx package is only available for Linux
1197-
# according to https://anaconda.org/conda-forge/gxx/
1198-
if sys.platform != "linux":
1199-
continue
1200-
# Do not install GXX by default
1201-
if not os.getenv("TORCH_INDUCTOR_INSTALL_GXX"):
1202-
continue
1203-
from filelock import FileLock
1204-
1205-
lock_dir = get_lock_dir()
1206-
lock = FileLock(
1207-
os.path.join(lock_dir, "g++.lock"), timeout=LOCK_TIMEOUT
1208-
)
1209-
with lock:
1210-
cxx = install_gcc_via_conda()
1211-
subprocess.check_output([cxx, "--version"])
1212-
return cxx
1213-
except (subprocess.SubprocessError, FileNotFoundError, ImportError):
1214-
continue
1215-
raise exc.InvalidCxxCompiler
1216-
1217-
1218-
def install_gcc_via_conda() -> str:
1219-
"""On older systems, this is a quick way to get a modern compiler"""
1220-
prefix = os.path.join(cache_dir(), "gcc")
1221-
cxx_path = os.path.join(prefix, "bin", "g++")
1222-
if not os.path.exists(cxx_path):
1223-
log.info("Downloading GCC via conda")
1224-
conda = os.environ.get("CONDA_EXE", "conda")
1225-
if conda is None:
1226-
conda = shutil.which("conda")
1227-
if conda is not None:
1228-
subprocess.check_call(
1229-
[
1230-
conda,
1231-
"create",
1232-
f"--prefix={prefix}",
1233-
"--channel=conda-forge",
1234-
"--quiet",
1235-
"-y",
1236-
"python=3.8",
1237-
"gxx",
1238-
],
1239-
stdout=subprocess.PIPE,
1240-
)
1241-
return cxx_path
1242-
1243-
1244-
def is_gcc() -> bool:
1245-
if sys.platform == "darwin" and is_apple_clang():
1246-
return False
1247-
return bool(re.search(r"(gcc|g\+\+)", cpp_compiler()))
1248-
1249-
1250-
@functools.lru_cache(None)
1251-
def is_apple_clang() -> bool:
1252-
cxx = cpp_compiler()
1253-
version_string = subprocess.check_output([cxx, "--version"]).decode("utf8")
1254-
return "Apple" in version_string.splitlines()[0]
1255-
1256-
1257-
def is_clang() -> bool:
1258-
# Mac OS apple clang maybe named as gcc, need check compiler info.
1259-
if sys.platform == "darwin":
1260-
return is_apple_clang()
1261-
return bool(re.search(r"(clang|clang\+\+)", cpp_compiler()))
< F987 code>1189+
"""
1190+
TODO: will remove old cpp builder when we switch to the new one.
1191+
"""
12621192

12631193

12641194
def get_compile_only(compile_only: bool = True) -> str:
12651195
return "-c" if compile_only else ""
12661196

12671197

12681198
def get_shared(shared: bool = True, compile_only: bool = False) -> str:
1199+
from .cpp_builder import get_cpp_compiler
1200+
12691201
if not shared:
12701202
return ""
12711203
if compile_only:
12721204
return "-fPIC"
1273-
if platform.system() == "Darwin" and "clang" in cpp_compiler():
1205+
if platform.system() == "Darwin" and "clang" in get_cpp_compiler():
12741206
# This causes undefined symbols to behave the same as linux
12751207
return "-shared -fPIC -undefined dynamic_lookup"
12761208
else:
@@ -1286,6 +1218,8 @@ def get_glibcxx_abi_build_flags() -> str:
12861218

12871219

12881220
def cpp_flags() -> str:
1221+
from .cpp_builder import is_clang
1222+
12891223
flags = ["-std=c++17", "-Wno-unused-variable", "-Wno-unknown-pragmas"]
12901224
if is_clang():
12911225
flags.append("-Werror=ignored-optimization-argument")
@@ -1356,82 +1290,19 @@ def use_standard_sys_dir_headers() -> str:
13561290
return ""
13571291

13581292

1359-
@functools.lru_cache(None)
1360-
def is_conda_llvm_openmp_installed() -> bool:
1361-
try:
1362-
command = "conda list llvm-openmp --json"
1363-
output = subprocess.check_output(command.split()).decode("utf8")
1364-
return len(json.loads(output)) > 0
1365-
except subprocess.SubprocessError:
1366-
return False
1367-
1368-
1369-
@functools.lru_cache(None)
1370-
def homebrew_libomp() -> Tuple[bool, str]:
1371-
try:
1372-
# check if `brew` is installed
1373-
subprocess.check_output(["which", "brew"])
1374-
# get the location of `libomp` if it is installed
1375-
# this is the location that `libomp` **would** be installed
1376-
# see https://github.com/Homebrew/brew/issues/10261#issuecomment-756563567 for details
1377-
libomp_path = (
1378-
subprocess.check_output(["brew", "--prefix", "libomp"])
1379-
.decode("utf8")
1380-
.strip()
1381-
)
1382-
# check if `libomp` is installed
1383-
omp_available = os.path.exists(libomp_path)
1384-
return omp_available, libomp_path
1385-
except subprocess.SubprocessError:
1386-
return False, ""
1387-
1388-
1389-
def _set_gpu_runtime_env() -> None:
1390-
if (
1391-
config.is_fbcode()
1392-
and torch.version.hip is None
1393-
and "CUDA_HOME" not in os.environ
1394-
and "CUDA_PATH" not in os.environ
1395-
):
1396-
os.environ["CUDA_HOME"] = build_paths.cuda()
1397-
1398-
1399-
def _get_python_include_dirs():
1400-
include_dir = Path(sysconfig.get_path("include"))
1401-
# On Darwin Python executable from a framework can return
1402-
# non-existing /Library/Python/... include path, in which case
1403-
# one should use Headers folder from the framework
1404-
if not include_dir.exists() and platform.system() == "Darwin":
1405-
std_lib = Path(sysconfig.get_path("stdlib"))
1406-
include_dir = (std_lib.parent.parent / "Headers").absolute()
1407-
if not (include_dir / "Python.h").exists():
1408-
warnings.warn(f"Can't find Python.h in {str(include_dir)}")
1409-
return [str(include_dir)]
1410-
1411-
1412-
def _transform_cuda_paths(lpaths):
1413-
# This handles two cases:
1414-
# 1. Meta internal cuda-12 where libs are in lib/cuda-12 and lib/cuda-12/stubs
1415-
# 2. Linux machines may have CUDA installed under either lib64/ or lib/
1416-
for i, path in enumerate(lpaths):
1417-
if (
1418-
"CUDA_HOME" in os.environ
1419-
and path.startswith(os.environ["CUDA_HOME"])
1420-
and not os.path.exists(f"{path}/libcudart_static.a")
1421-
):
1422-
for root, dirs, files in os.walk(path):
1423-
if "libcudart_static.a" in files:
1424-
lpaths[i] = os.path.join(path, root)
1425-
lpaths.append(os.path.join(lpaths[i], "stubs"))
1426-
break
1427-
1428-
14291293
def get_include_and_linking_paths(
14301294
include_pytorch: bool = False,
14311295
vec_isa: VecISA = invalid_vec_isa,
14321296
cuda: bool = False,
14331297
aot_mode: bool = False,
14341298
) -> Tuple[List[str], str, str, str, str]:
1299+
from .cpp_builder import (
1300+
_get_python_include_dirs,
1301+
homebrew_libomp,
1302+
is_apple_clang,
1303+
is_conda_llvm_openmp_installed,
1304+
)
1305+
14351306
_set_gpu_runtime_env()
14361307
from torch.utils import cpp_extension
14371308

@@ -1616,6 +1487,8 @@ def cpp_compile_command(
16161487
use_mmap_weights: bool = False,
16171488
extra_flags: Sequence[str] = (),
16181489
) -> str:
1490+
from .cpp_builder import get_cpp_compiler, is_clang
1491+
16191492
ipaths, lpaths, libs, macros, build_arch_flags = get_include_and_linking_paths(
16201493
include_pytorch, vec_isa, cuda, aot_mode
16211494
)
@@ -1654,7 +1527,7 @@ def cpp_compile_command(
16541527
r"[ \n]+",
16551528
" ",
16561529
f"""
1657-
{cpp_compiler()} {inp_name_str} {get_shared(shared, compile_only)}
1530+
{get_cpp_compiler()} {inp_name_str} {get_shared(shared, compile_only)}
16581531
{get_warning_all_flag(warning_all)} {cpp_flags()}
16591532
{get_glibcxx_abi_build_flags()}
16601533
{ipaths_str} {lpaths} {libs} {build_arch_flags}
@@ -1726,6 +1599,8 @@ def compile(
17261599
serialized_extern_kernel_nodes: Optional[str],
17271600
cuda: bool,
17281601
) -> str:
1602+
from .cpp_builder import get_cpp_compiler
1603+
17291604
picked_vec_isa = pick_vec_isa()
17301605
cpp_command = repr(
17311606
cpp_compile_command(
@@ -1870,7 +1745,7 @@ def _compile_consts_darwin(consts: bytes) -> str:
18701745
specified_dir=specified_output_path,
18711746
)
18721747
consts_o = os.path.splitext(consts_path)[0] + ".o"
1873-
cmd = f"{cpp_compiler()} -c -o {consts_o} {consts_path}"
1748+
cmd = f"{get_cpp_compiler()} -c -o {consts_o} {consts_path}"
18741749
run_command_and_check(cmd)
18751750
if is_large_consts:
18761751
with open(consts_o, "r+b") as f:
@@ -2520,8 +2395,6 @@ def _do_validate_cpp_commands(
25202395
extra_flags=extra_flags,
25212396
).split(" ")
25222397

2523-
from torch._inductor.cpp_builder import CppBuilder, CppTorchCudaOptions
2524-
25252398
dummy_build_option = CppTorchCudaOptions(
25262399
vec_isa=picked_isa,
25272400
include_pytorch=include_pytorch,
@@ -2718,8 +2591,6 @@ def _codegen_glue(cls, meta, headerfile):
27182591
@classmethod
27192592
@functools.lru_cache(None)
27202593
def config_hash(cls):
2721-
from torch._inductor.cpp_builder import CppBuilder, CppOptions
2722-
27232594
command_gen = CppBuilder(
27242595
name="O",
27252596
sources="I",

torch/_inductor/codegen/common.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,6 @@ def boolean_ops():
251251
return (
252252
"is_inf",
253253
"is_nan",
254-
"bitwise_xor",
255254
"logical_not",
256255
"signbit",
257256
"le",

0 commit comments

Comments
 (0)
0