pytorch
diff --git a/‎aten/src/ATen/Context.cpp
Lines changed: 8 additions & 0 deletions b/‎aten/src/ATen/Context.cpp
Lines changed: 8 additions & 0 deletions
diff --git a/‎aten/src/ATen/Context.h
Lines changed: 3 additions & 0 deletions b/‎aten/src/ATen/Context.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎aten/src/ATen/native/mkldnn/xpu/detail/Conv.cpp
Lines changed: 19 additions & 0 deletions b/‎aten/src/ATen/native/mkldnn/xpu/detail/Conv.cpp
Lines changed: 19 additions & 0 deletions
diff --git a/‎test/xpu/test_conv.py
Lines changed: 11 additions & 0 deletions b/‎test/xpu/test_conv.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎torch/_C/__init__.pyi.in
Lines changed: 2 additions & 0 deletions b/‎torch/_C/__init__.pyi.in
Lines changed: 2 additions & 0 deletions
diff --git a/‎torch/backends/mkldnn/__init__.py
Lines changed: 16 additions & 5 deletions b/‎torch/backends/mkldnn/__init__.py
Lines changed: 16 additions & 5 deletions
diff --git a/‎torch/csrc/Module.cpp
Lines changed: 21 additions & 0 deletions b/‎torch/csrc/Module.cpp
Lines changed: 21 additions & 0 deletions
@@ -121,6 +121,14 @@ void Context::setAllowTF32CuDNN(bool b) {
   allow_tf32_cudnn = b;
 }
 
+bool Context::allowTF32Mkldnn() const {
+  return allow_tf32_mkldnn;
+}
+
+void Context::setAllowTF32Mkldnn(bool b){
+  allow_tf32_mkldnn = b;
+}
+
 bool Context::userEnabledFlashSDP() const {
   return enabled_flashSDP;
 }
 
@@ -312,6 +312,8 @@ class TORCH_API Context {
   void setFloat32MatmulPrecision(const std::string& s);
   bool allowTF32CuDNN() const;
   void setAllowTF32CuDNN(bool);
+  bool allowTF32Mkldnn() const;
+  void setAllowTF32Mkldnn(bool);
   bool allowTF32CuBLAS() const;
   void setAllowTF32CuBLAS(bool);
   Float32MatmulPrecision float32MatmulPrecision() const;
@@ -369,6 +371,7 @@ class TORCH_API Context {
   bool allow_fp16_reduction_cublas = true;
   bool allow_bf16_reduction_cublas = true;
   bool enabled_mkldnn = true;
+  bool allow_tf32_mkldnn = true;
   bool enabled_nnpack = true;
   at::LinalgBackend linalg_preferred_backend =
       c10::utils::check_env("TORCH_LINALG_PREFER_CUSOLVER") == true
 
@@ -201,6 +201,12 @@ sycl::event convolution(
     }
   #endif
 
+  auto& ctx = at::globalContext();
+  bool allow_tf32 = ctx.allowTF32Mkldnn();
+  if(allow_tf32) {
+    pattr.set_fpmath_mode(dnnl::fpmath_mode::tf32);
+  }
+
   auto conv_fwd_pd = dnnl::convolution_forward::primitive_desc(
       engine,
       dnnl::prop_kind::forward,
@@ -288,6 +294,12 @@ sycl::event convolution_backward_weights(
     }
   #endif
 
+  auto& ctx = at::globalContext();
+  bool allow_tf32 = ctx.allowTF32Mkldnn();
+  if(allow_tf32) {
+    pattr.set_fpmath_mode(dnnl::fpmath_mode::tf32);
+  }
+
   pattr.set_scratchpad_mode(dnnl::scratchpad_mode::user);
   auto conv_fwd_pd = dnnl::convolution_forward::primitive_desc(
       engine,
@@ -390,6 +402,13 @@ sycl::event convolution_backward_data(
   dnnl::memory::dims _padding_front_top_left = padding_front_top_left.vec();
   dnnl::memory::dims _padding_back_bottom_right = padding_back_bottom_right.vec();
   dnnl::memory::dims _dilation = compatible_dilation(dilation);
+
+  auto& ctx = at::globalContext();
+  bool allow_tf32 = ctx.allowTF32Mkldnn();
+  if(allow_tf32) {
+    pattr.set_fpmath_mode(dnnl::fpmath_mode::tf32);
+  }
+
   auto conv_forward_pd = dnnl::convolution_forward::primitive_desc(
       engine,
       dnnl::prop_kind::forward,
 
@@ -1264,6 +1264,17 @@ def test_channels_last_ouput_stride(self, device, dtype):
             # input NHWC, output NHWC
             assert_size_stride(out, (2, 512, 7, 7), (25088, 1, 3584, 512))
 
+    @onlyXPU
+    def test_mkldnn_allow_tf32_get_set(self, device):
+        with torch.backends.mkldnn.flags(
+            enabled=None, deterministic=None, allow_tf32=False
+        ):
+            self.assertFalse(torch.backends.mkldnn.allow_tf32)
+        with torch.backends.mkldnn.flags(
+            enabled=None, deterministic=None, allow_tf32=True
+        ):
+            self.assertTrue(torch.backends.mkldnn.allow_tf32)
+
 
 instantiate_device_type_tests(
     TestConvolutionNNDeviceType, globals(), only_for="xpu", allow_xpu=True
 
@@ -1170,6 +1170,8 @@ def _get_cudnn_deterministic() -> _bool: ...  # THPModule_deterministicCuDNN
 def _set_cudnn_deterministic(arg: _bool) -> None: ...  # THPModule_setDeterministicCuDNN
 def _get_mkldnn_deterministic() -> _bool: ...  # THPModule_deterministicMkldnn
 def _set_mkldnn_deterministic(arg: _bool) -> None: ...  # THPModule_setDeterministicMkldnn
+def _get_mkldnn_allow_tf32() -> _bool: ... # THPModule_allowTF32Mkldnn
+def _set_mkldnn_allow_tf32(arg: _bool) -> None: ... # THPModule_setAllowTF32Mkldnn
 def _get_deterministic_algorithms() -> _bool: ...  # THPModule_deterministicAlgorithms
 def _get_deterministic_algorithms_warn_only() -> _bool: ...  # THPModule_deterministicAlgorithmsWarnOnly
 def _set_deterministic_algorithms(
 
@@ -64,18 +64,25 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         return False
 
 
-def set_flags(_enabled, _deterministic=None):
-    <
F438
span class=pl-s1>orig_flags = (torch._C._get_mkldnn_enabled(), torch._C._get_mkldnn_deterministic())
-    torch._C._set_mkldnn_enabled(_enabled)
+def set_flags(_enabled=None, _deterministic=None, _allow_tf32=None):
+    orig_flags = (
+        torch._C._get_mkldnn_enabled(),
+        torch._C._get_mkldnn_deterministic(),
+        torch._C._get_mkldnn_allow_tf32(),
+    )
+    if _enabled is not None:
+        torch._C._set_mkldnn_enabled(_enabled)
     if _deterministic is not None:
         torch._C._set_mkldnn_deterministic(_deterministic)
+    if _allow_tf32 is not None:
+        torch._C._set_mkldnn_allow_tf32(_allow_tf32)
     return orig_flags
 
 
 @contextmanager
-def flags(enabled=False, deterministic=False):
+def flags(enabled=False, deterministic=False, allow_tf32=True):
     with __allow_nonbracketed_mutation():
-        orig_flags = set_flags(enabled, deterministic)
+        orig_flags = set_flags(enabled, deterministic, allow_tf32)
     try:
         yield
     finally:
@@ -91,10 +98,14 @@ def __init__(self, m, name):
     deterministic = ContextProp(
         torch._C._get_mkldnn_deterministic, torch._C._set_mkldnn_deterministic
     )
+    allow_tf32 = ContextProp(
+        torch._C._get_mkldnn_allow_tf32, torch._C._set_mkldnn_allow_tf32
+    )
 
 
 if TYPE_CHECKING:
     enabled: ContextProp
     deterministic: ContextProp
+    allow_tf32: ContextProp
 
 sys.modules[__name__] = MkldnnModule(sys.modules[__name__], __name__)
@@ -888,6 +888,25 @@ PyObject* THPModule_setDeterministicAlgorithms(
   END_HANDLE_TH_ERRORS
 }
 
+PyObject* THPModule_setAllowTF32Mkldnn(PyObject* _unsued, PyObject* arg) {
+  HANDLE_TH_ERRORS
+  TORCH_CHECK(
+      PyBool_Check(arg),
+      "set_allow_tf32_cublas expects a bool, "
+      "but got ",
+      THPUtils_typename(arg));
+  at::globalContext().setAllowTF32Mkldnn(arg == Py_True);
+  Py_RETURN_NONE;
+  END_HANDLE_TH_ERRORS
+}
+
+PyObject* THPModule_allowTF32Mkldnn(PyObject* _unused, PyObject* noargs) {
+  if (at::globalContext().allowTF32Mkldnn())
+    Py_RETURN_TRUE;
+  else
+    Py_RETURN_FALSE;
+}
+
 PyObject* THPModule_deterministicAlgorithms(
     PyObject* _unused,
     PyObject* noargs) {
@@ -1410,6 +1429,8 @@ static PyMethodDef TorchMethods[] = { // NOLINT
     {"_set_mkldnn_enabled", THPModule_setUserEnabledMkldnn, METH_O, nullptr},
     {"_get_cudnn_allow_tf32", THPModule_allowTF32CuDNN, METH_NOARGS, nullptr},
     {"_set_cudnn_allow_tf32", THPModule_setAllowTF32CuDNN, METH_O, nullptr},
+    {"_get_mkldnn_allow_tf32", THPModule_allowTF32Mkldnn, METH_NOARGS, nullptr},
+    {"_set_mkldnn_allow_tf32", THPModule_setAllowTF32Mkldnn, METH_O, nullptr},
     {"_get_cudnn_benchmark", THPModule_benchmarkCuDNN, METH_NOARGS, nullptr},
     {"_set_cudnn_benchmark", THPModule_setBenchmarkCuDNN, METH_O, nullptr},
     {"_get_cudnn_deterministic",
Original file line number	Diff line number	Diff line change
`@@ -121,6 +121,14 @@ void Context::setAllowTF32CuDNN(bool b) {`
`121`	`121`	`allow_tf32_cudnn = b;`
`122`	`122`	`}`
`123`	`123`
	`124`	`+bool Context::allowTF32Mkldnn() const {`
	`125`	`+ return allow_tf32_mkldnn;`
	`126`	`+}`
	`127`	`+`
	`128`	`+void Context::setAllowTF32Mkldnn(bool b){`
	`129`	`+ allow_tf32_mkldnn = b;`
	`130`	`+}`
	`131`	`+`
`124`	`132`	`bool Context::userEnabledFlashSDP() const {`
`125`	`133`	`return enabled_flashSDP;`
`126`	`134`	`}`