8000 [Intel GPU] allow_tf32 for oneDNN backend - XPU part by ZhiweiYan-96 · Pull Request #137570 · pytorch/pytorch · GitHub
[go: up one dir, main page]

Skip to content

[Intel GPU] allow_tf32 for oneDNN backend - XPU part #137570

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 40 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7cb460c
Update
ZhiweiYan-96 Oct 9, 2024
aa6eb63
Update
ZhiweiYan-96 Oct 12, 2024
ac3000e
Update
ZhiweiYan-96 Oct 15, 2024
7c7f5bd
Update
ZhiweiYan-96 Oct 16, 2024
a7ecf30
Update
ZhiweiYan-96 Oct 17, 2024
e14a980
Update
ZhiweiYan-96 Oct 17, 2024
48d0530
Update
ZhiweiYan-96 Oct 17, 2024
d9ab777
Update
ZhiweiYan-96 Oct 21, 2024
b6e6e4e
Update
ZhiweiYan-96 Oct 26, 2024
5a6f59a
Update
ZhiweiYan-96 Oct 28, 2024
60fb9f7
Update
ZhiweiYan-96 Oct 29, 2024
3bea915
Update
ZhiweiYan-96 Nov 13, 2024
06d1680
Update
ZhiweiYan-96 Nov 13, 2024
bf3c110
Update
ZhiweiYan-96 Nov 13, 2024
8e74b7c
Update
ZhiweiYan-96 Nov 14, 2024
2e0f344
Update
ZhiweiYan-96 Nov 20, 2024
4bc77f8
Update
ZhiweiYan-96 Nov 27, 2024
2c667ac
Update
ZhiweiYan-96 Nov 27, 2024
bc921a5
Update
ZhiweiYan-96 Nov 27, 2024
f0d657e
Update
ZhiweiYan-96 Nov 28, 2024
ab586cd
Update
ZhiweiYan-96 Dec 4, 2024
b91414a
Update
ZhiweiYan-96 Dec 5, 2024
6ce003d
Update
ZhiweiYan-96 Dec 5, 2024
c159f6b
Update
ZhiweiYan-96 Dec 5, 2024
0112cb7
Update
ZhiweiYan-96 Dec 6, 2024
4a6ac2d
Update
ZhiweiYan-96 Dec 11, 2024
ea9e69b
Update
ZhiweiYan-96 Dec 12, 2024
6c87ae6
Update
ZhiweiYan-96 Dec 17, 2024
198cc01
Update
ZhiweiYan-96 Dec 18, 2024
ea08036
Update
ZhiweiYan-96 Dec 20, 2024
913e675
Update
ZhiweiYan-96 Dec 23, 2024
5c6fac3
Update
ZhiweiYan-96 Dec 25, 2024
f95ada4
Update
ZhiweiYan-96 Dec 30, 2024
096fd09
Update
ZhiweiYan-96 Jan 6, 2025
689dadf
Update
ZhiweiYan-96 Jan 7, 2025
5f7d0e7
Update
ZhiweiYan-96 Jan 9, 2025
fb2a6c4
Update
ZhiweiYan-96 Jan 20, 2025
80ea731
Update
ZhiweiYan-96 Feb 12, 2025
bbad26b
Update
guangyey Feb 15, 2025
cc3eefe
Update
ZhiweiYan-96 Feb 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions aten/src/ATen/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,18 @@ std::array<at::SDPBackend, at::num_sdp_backends> Context::sDPPriorityOrder() {
return sdp_priority_order;
}

bool Context::allowTF32OneDNN() const {
return allow_tf32_onednn;
}

void Context::setAllowTF32OneDNN(bool b){
#ifdef USE_XPU
allow_tf32_onednn = b;
#else
TORCH_WARN("TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support.");
#endif
}

bool Context::userEnabledFlashSDP() const {
return enabled_flashSDP;
}
Expand Down
3 changes: 3 additions & 0 deletions aten/src/ATen/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,8 @@ class TORCH_API Context {
void setFloat32MatmulPrecision(const std::string& s);
bool allowTF32CuDNN() const;
void setAllowTF32CuDNN(bool);
bool allowTF32OneDNN() const;
void setAllowTF32OneDNN(bool);
bool allowTF32CuBLAS() const;
void setAllowTF32CuBLAS(bool);
Float32MatmulPrecision float32MatmulPrecision() const;
Expand Down Expand Up @@ -422,6 +424,7 @@ class TORCH_API Context {
bool allow_bf16_reduction_cublas = true;
bool allow_fp16_accumulation_cublas = false;
bool enabled_mkldnn = true;
bool allow_tf32_onednn = false;
bool enabled_nnpack = true;
at::LinalgBackend linalg_preferred_backend =
c10::utils::check_env("TORCH_LINALG_PREFER_CUSOLVER") == true
Expand Down
7 changes: 7 additions & 0 deletions aten/src/ATen/native/mkldnn/xpu/detail/Conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ sycl::event convolution(
}
#endif

at::native::onednn::apply_tf32_if_allowed(pattr);

auto conv_fwd_pd = dnnl::convolution_forward::primitive_desc(
engine,
dnnl::prop_kind::forward,
Expand Down Expand Up @@ -211,6 +213,8 @@ sycl::event convolution_backward_weights(
}
#endif

at::native::onednn::apply_tf32_if_allowed(pattr);

pattr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
auto conv_fwd_pd = dnnl::convolution_forward::primitive_desc(
engine,
Expand Down Expand Up @@ -319,6 +323,9 @@ sycl::event convolution_backward_data(
dnnl::memory::dims _padding_back_bottom_right =
padding_back_bottom_right.vec();
dnnl::memory::dims _dilation = compatible_dilation(dilation);

at::native::onednn::apply_tf32_if_allowed(pattr);

auto conv_forward_pd = dnnl::convolution_forward::primitive_desc(
engine,
dnnl::prop_kind::forward,
Expand Down
11 changes: 11 additions & 0 deletions aten/src/ATen/native/mkldnn/xpu/detail/Utils.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#include <ATen/Context.h>
#include <ATen/native/ConvUtils.h>
#include <ATen/native/mkldnn/xpu/detail/Utils.h>
#include <dnnl.hpp>
#include <dnnl_common.hpp>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ZhiweiYan-96 this file dnnl_common.hpp is in the oneapi/dnnl subdirectory of intel/oneapi/dnnl/latest/include/ which is not included by default in the oneapi include paths. On windows this is causing a problem. Can we update the path here to include oneapi/dnnl prefix? Do you want me to file an issue?

Copy link
Collaborator
@EikanWang EikanWang Feb 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alexbaden , XPU does use the oneapi oneDNN. XPU builds its onednn from source code. Do you mean it may conflict with oneapi bundle or cannot pass Windows build?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cannot pass windows build. We think this is because the include is not recursive. It could be that the proper path needs to be added when oneDNN is built from source for XPU.


namespace at::native::onednn {

Expand Down Expand Up @@ -487,4 +490,12 @@ dnnl::memory::format_tag conv_weight_fmt(
}
}

void apply_tf32_if_allowed(dnnl::primitive_attr& pattr) {
auto& ctx = at::globalContext();
bool allow_tf32 = ctx.allowTF32OneDNN();
if (allow_tf32) {
pattr.set_fpmath_mode(dnnl::fpmath_mode::tf32);
}
}

} // namespace at::native::onednn
2 changes: 2 additions & 0 deletions aten/src/ATen/native/mkldnn/xpu/detail/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ bool is_broadcast_from_other_to_self(

at::MemoryFormat get_cl_tag_by_ndim(const int64_t ndim);

void apply_tf32_if_allowed(dnnl::primitive_attr& primitive_attr);

bool binary_valid(
const at::Tensor& self,
const at::Tensor& other,
Expand Down
11 changes: 11 additions & 0 deletions test/xpu/test_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -1258,6 +1258,17 @@ def test_channels_last_ouput_stride(self, device, dtype):
# input NHWC, output NHWC
assert_size_stride(out, (2, 512, 7, 7), (25088, 1, 3584, 512))

@onlyXPU
def test_onednn_allow_tf32_get_set(self):
with torch.backends.mkldnn.flags(
enabled=None, deterministic=None, allow_tf32=False
):
self.assertFalse(torch.backends.mkldnn.allow_tf32)
with torch.backends.mkldnn.flags(
enabled=None, deterministic=None, allow_tf32=True
):
F438 self.assertTrue(torch.backends.mkldnn.allow_tf32)


instantiate_device_type_tests(
TestConvolutionNNDeviceType, globals(), only_for="xpu", allow_xpu=True
Expand Down
2 changes: 2 additions & 0 deletions torch/_C/__init__.pyi.in
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,8 @@ def _get_cudnn_deterministic() -> _bool: ... # THPModule_deterministicCuDNN
def _set_cudnn_deterministic(arg: _bool) -> None: ... # THPModule_setDeterministicCuDNN
def _get_mkldnn_deterministic() -> _bool: ... # THPModule_deterministicMkldnn
def _set_mkldnn_deterministic(arg: _bool) -> None: ... # THPModule_setDeterministicMkldnn
def _get_onednn_allow_tf32() -> _bool: ... # THPModule_allowTF32OneDNN
def _set_onednn_allow_tf32(arg: _bool) -> None: ... # THPModule_setAllowTF32OneDNN
def _get_deterministic_algorithms() -> _bool: ... # THPModule_deterministicAlgorithms
def _get_deterministic_algorithms_warn_only() -> _bool: ... # THPModule_deterministicAlgorithmsWarnOnly
def _set_deterministic_algorithms(
Expand Down
21 changes: 16 additions & 5 deletions torch/backends/mkldnn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,25 @@ def __exit__(self, exc_type, exc_val, exc_tb):
return False


def set_flags(_enabled, _deterministic=None):
orig_flags = (torch._C._get_mkldnn_enabled(), torch._C._get_mkldnn_deterministic())
torch._C._set_mkldnn_enabled(_enabled)
def set_flags(_enabled=None, _deterministic=None, _allow_tf32=None):
orig_flags = (
torch._C._get_mkldnn_enabled(),
torch._C._get_mkldnn_deterministic(),
torch._C._get_onednn_allow_tf32(),
)
if _enabled is not None:
torch._C._set_mkldnn_enabled(_enabled)
if _deterministic is not None:
torch._C._set_mkldnn_deterministic(_deterministic)
if _allow_tf32 is not None:
torch._C._set_onednn_allow_tf32(_allow_tf32)
return orig_flags


@contextmanager
def flags(enabled=False, deterministic=False):
def flags(enabled=False, deterministic=False, allow_tf32=True):
with __allow_nonbracketed_mutation():
orig_flags = set_flags(enabled, deterministic)
orig_flags = set_flags(enabled, deterministic, allow_tf32)
try:
yield
finally:
Expand All @@ -91,10 +98,14 @@ def __init__(self, m, name):
deterministic = ContextProp(
torch._C._get_mkldnn_deterministic, torch._C._set_mkldnn_deterministic
)
allow_tf32 = ContextProp(
torch._C._get_onednn_allow_tf32, torch._C._set_onednn_allow_tf32
)


if TYPE_CHECKING:
enabled: ContextProp
deterministic: ContextProp
allow_tf32: ContextProp

sys.modules[__name__] = MkldnnModule(sys.modules[__name__], __name__)
25 changes: 25 additions & 0 deletions torch/csrc/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,29 @@ static PyObject* THPModule_setDeterministicAlgorithms(
END_HANDLE_TH_ERRORS
}

static PyObject* THPModule_setAllowTF32OneDNN(
PyObject* _unsued,
PyObject* arg) {
HANDLE_TH_ERRORS
TORCH_CHECK(
PyBool_Check(arg),
"_set_onednn_allow_tf32 expects a bool, "
"but got ",
THPUtils_typename(arg));
at::globalContext().setAllowTF32OneDNN(arg == Py_True);
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}

static PyObject* THPModule_allowTF32OneDNN(
PyObject* _unused,
PyObject* noargs) {
if (at::globalContext().allowTF32OneDNN())
Py_RETURN_TRUE;
else
Py_RETURN_FALSE;
}

static PyObject* THPModule_deterministicAlgorithms(
PyObject* _unused,
PyObject* noargs) {
Expand Down Expand Up @@ -1527,6 +1550,8 @@ static std::initializer_list<PyMethodDef> TorchMethods = {
{"_set_mkldnn_enabled", THPModule_setUserEnabledMkldnn, METH_O, nullptr},
{"_get_cudnn_allow_tf32", THPModule_allowTF32CuDNN, METH_NOARGS, nullptr},
{"_set_cudnn_allow_tf32", THPModule_setAllowTF32CuDNN, METH_O, nullptr},
{"_get_onednn_allow_tf32", THPModule_allowTF32OneDNN, METH_NOARGS, nullptr},
{"_set_onednn_allow_tf32", THPModule_setAllowTF32OneDNN, METH_O, nullptr},
{"_get_cudnn_benchmark", THPModule_benchmarkCuDNN, METH_NOARGS, nullptr},
{"_set_cudnn_benchmark", THPModule_setBenchmarkCuDNN, METH_O, nullptr},
{"_get_cudnn_deterministic",
Expand Down
Loading
0