Add XPU device to nested_layer_norm (#148593)

min-jean-cho · pytorchmergebot · commit 215f856142bc · 2025-03-12T19:07:08.000Z
Work with intel/torch-xpu-ops#1416 . Pull Request resolved: #148593 Approved by: https://github.com/guangyey, https://github.com/jbschlosser
diff --git a/aten/src/ATen/native/nested/NestedTensorMath.cpp b/aten/src/ATen/native/nested/NestedTensorMath.cpp
@@ -172,14 +172,14 @@ std::tuple<Tensor, Tensor, Tensor> nested_layer_norm(
       std::nullopt /* pin_memory */,
       at::MemoryFormat::Contiguous);
   auto options = input_buffer.options();
-  if (input_buffer.is_cuda()) {
-    auto acc_type = at::toAccumulateType(input_buffer.scalar_type(), true);
+  if (input_buffer.is_cuda() || input_buffer.is_xpu()) {
+    auto acc_type = at::toAccumulateType(input_buffer.scalar_type(), input_buffer.device().type());
     options = options.dtype(acc_type);
   }
   Tensor mean = at::empty({M}, options);
   Tensor rstd = at::empty({M}, options);
   LayerNormKernel(
-      input_buffer.is_cuda() ? kCUDA : kCPU,
+      input_buffer.device().type(),
       input_buffer,
       *weight_contig,
       *bias_contig,